实现一个编译器
最终效果
动态类型, 内置int,str,list,dict(from python), function
function执行后可以保留内部的变量.
不想自己实现这么多类型,用python来实现
整个语言基于Auto变量,所以两句话就可以跑起编译器
a = Auto(open("hello.pym").read(), name='__main__') a.call()
call会执行几个过程:
1) tokenize:词法分析
2) stepize:产生字节码
3) step:执行字节码
若输入代码
1 fab = function() { 2 value = [1,1]; 3 size = len(value); 4 function get(pos) { 5 pos -= 1; 6 if (pos < 0) { 7 return -1; 8 } else if (pos < size) { 9 return value[pos]; 10 } else { 11 while (pos >= size) { 12 value += [value[size-1]+value[size-2]]; 13 size += 1; 14 } 15 return value[pos]; 16 } 17 } 18 }; 19 fab(); 20 print fab.get(10); 21 print fab.get(20);
会产生输出
55 6765
运行中产生的字节码:
[STEPIZE __main__] 0 PUSH_VAR (str)fab 1 PUSH_CONST (str)\n value = [1,1];\n size = len(value);\n function get(pos) {\n pos -= 1;\n if (pos < 0) {\n return -1;\n } else if (pos < size) {\n return value[pos];\n } else {\n while (pos >= size) {\n value += [value[size-1]+value[size-2]];\n size += 1;\n }\n return value[pos];\n }\n } 2 PUSH_CONST (list)[] 3 MAKE_FUNCTION (NoneType)None 4 ASSIGN (NoneType)None 5 PUSH_VAR (str)fab 6 CALL (int)0 7 POP_ALL (NoneType)None 8 PUSH_VAR (str)fab 9 PUSH_CONST (str)get 10 GET_METHOD (NoneType)None 11 PUSH_CONST (int)10 12 CALL (int)1 13 PRINT (NoneType)None 14 PUSH_VAR (str)fab 15 PUSH_CONST (str)get 16 GET_METHOD (NoneType)None 17 PUSH_CONST (int)20 18 CALL (int)1 19 PRINT (NoneType)None [STEPIZE fab] 0 PUSH_VAR (str)value 1 PUSH_CONST (int)1 2 PUSH_CONST (int)1 3 BUILD_LIST (int)2 4 ASSIGN (NoneType)None 5 PUSH_VAR (str)size 6 PUSH_VAR (str)len 7 PUSH_VAR (str)value 8 CALL (int)1 9 ASSIGN (NoneType)None 10 PUSH_VAR (str)get 11 PUSH_CONST (str)\n pos -= 1;\n if (pos < 0) {\n return -1;\n } else if (pos < size) {\n return value[pos];\n } else {\n while (pos >= size) {\n value += [value[size-1]+value[size-2]];\n size += 1;\n }\n return value[pos];\n } 12 PUSH_CONST (list)['pos'] 13 MAKE_FUNCTION (NoneType)None 14 ASSIGN (NoneType)None [STEPIZE get] 0 PUSH_VAR (str)pos 1 PUSH_VAR (str)pos 2 PUSH_CONST (int)1 3 SUB (NoneType)None 4 ASSIGN (NoneType)None 5 PUSH_VAR (str)pos 6 PUSH_CONST (int)0 7 LT (NoneType)None 8 JUMP_IF_FALSE (int)13 9 PUSH_CONST (int)1 10 NEG (NoneType)None 11 RETURN (NoneType)None 12 JUMP (int)52 13 PUSH_VAR (str)pos 14 PUSH_VAR (str)size 15 LT (NoneType)None 16 JUMP_IF_FALSE (int)22 17 PUSH_VAR (str)value 18 PUSH_VAR (str)pos 19 GET_ITEM (NoneType)None 20 RETURN (NoneType)None 21 JUMP (int)52 22 PUSH_VAR (str)pos 23 PUSH_VAR (str)size 24 GE (NoneType)None 25 JUMP_IF_FALSE (int)48 26 PUSH_VAR (str)value 27 PUSH_VAR (str)value 28 PUSH_VAR (str)value 29 PUSH_VAR (str)size 30 PUSH_CONST (int)1 31 SUB (NoneType)None 32 GET_ITEM (NoneType)None 33 PUSH_VAR (str)value 34 PUSH_VAR (str)size 35 PUSH_CONST (int)2 36 SUB (NoneType)None 37 GET_ITEM (NoneType)None 38 ADD (NoneType)None 39 BUILD_LIST (int)1 40 ADD (NoneType)None 41 ASSIGN (NoneType)None 42 PUSH_VAR (str)size 43 PUSH_VAR (str)size 44 PUSH_CONST (int)1 45 ADD (NoneType)None 46 ASSIGN (NoneType)None 47 JUMP (int)22 48 PUSH_VAR (str)value 49 PUSH_VAR (str)pos 50 GET_ITEM (NoneType)None 51 RETURN (NoneType)None 55 [STEPIZE get] 0 PUSH_VAR (str)pos 1 PUSH_VAR (str)pos 2 PUSH_CONST (int)1 3 SUB (NoneType)None 4 ASSIGN (NoneType)None 5 PUSH_VAR (str)pos 6 PUSH_CONST (int)0 7 LT (NoneType)None 8 JUMP_IF_FALSE (int)13 9 PUSH_CONST (int)1 10 NEG (NoneType)None 11 RETURN (NoneType)None 12 JUMP (int)52 13 PUSH_VAR (str)pos 14 PUSH_VAR (str)size 15 LT (NoneType)None 16 JUMP_IF_FALSE (int)22 17 PUSH_VAR (str)value 18 PUSH_VAR (str)pos 19 GET_ITEM (NoneType)None 20 RETURN (NoneType)None 21 JUMP (int)52 22 PUSH_VAR (str)pos 23 PUSH_VAR (str)size 24 GE (NoneType)None 25 JUMP_IF_FALSE (int)48 26 PUSH_VAR (str)value 27 PUSH_VAR (str)value 28 PUSH_VAR (str)value 29 PUSH_VAR (str)size 30 PUSH_CONST (int)1 31 SUB (NoneType)None 32 GET_ITEM (NoneType)None 33 PUSH_VAR (str)value 34 PUSH_VAR (str)size 35 PUSH_CONST (int)2 36 SUB (NoneType)None 37 GET_ITEM (NoneType)None 38 ADD (NoneType)None 39 BUILD_LIST (int)1 40 ADD (NoneType)None 41 ASSIGN (NoneType)None 42 PUSH_VAR (str)size 43 PUSH_VAR (str)size 44 PUSH_CONST (int)1 45 ADD (NoneType)None 46 ASSIGN (NoneType)None 47 JUMP (int)22 48 PUSH_VAR (str)value 49 PUSH_VAR (str)pos 50 GET_ITEM (NoneType)None 51 RETURN (NoneType)None 6765
Auto的实现
每个Auto都有一个value,用来存储python内置类型
每个Auto都有一个namespace,用来存储内部名称空间(名称+值)
每个Auto都有一个belongto,用来表示属于那个Auto的名称空间
若Auto被当做函数(call):
先看是不是buildin
若不是, 将value中的str作为函数体,argnames为参数名, 执行 stepize等等
若是, 执行buildin
可以任意使用/修改,上层或全局变量
其中产生的变量会加入namespace
固定返回一个值
class Auto(object): def __init__(self,value=None,belongto=None,argnames=None,name=None,buildin=None): self.value = value self.belongto = belongto self.argnames = argnames or [] self.buildin = buildin if name: self.namespace = {'self':self, '__name__':Auto(name)} else: self.namespace = {'self':self} def __str__(self): s = str(self.value).replace("\n",r"\n") if output_short and len(s)>15: return s[:10]+'...' return s def __repr__(self): s = str(self.value).replace("\n",r"\n") if output_short and len(s)>15: return "Auto("+s[:10]+'...'+")" return "Auto("+s+")" def call(self, args=None): if self.buildin != None: return self.buildin(args) self.stack = [] if not isinstance(self.value, str): raise Exception("uncallable") if args: for x,y in zip(self.argnames,args): y.belongto = self y.namespace['__name__'] = Auto(x) self.namespace[x] = y funcname = self.namespace.get('__name__') if not funcname: funcname = '' else: funcname = str(funcname.value) if show_tokens: print "\n[TOKENIZE "+funcname+"]" tokens = tokenize(self.value) if show_steps: print "\n[STEPIZE "+funcname+"]" self.steps = stepize(tokens) if show_steps: for i,x in enumerate(self.steps): print " {0:3}".format(i),x # run steps if show_var: print "\n[CALL "+funcname+"]" self.l = len(self.steps) self.i = 0 while self.i < self.l: self.step_once() if show_var: print "[END "+funcname+"]\n" if self.stack: return self.stack[0] else: return Auto(None) def step_once(self): t = self.steps[self.i] if show_var: print self.i,":",t self.i += 1 if t.type == "PUSH_VAR": a = self.namespace.get(t.value) b = self.belongto while a == None and b != None: a = b.namespace.get(t.value, None) b = b.belongto if a == None: a = Auto(None) a.namespace['__name__'] = Auto(t.value) a.belongto = self self.stack.append(a) elif t.type == "ASSIGN": a = self.stack.pop() b = self.stack.pop() name = b.namespace['__name__'] if b.belongto != None: a.namespace['__name__'] = name a.belongto = b.belongto b.belongto.namespace[name.value] = a else: a.namespace['__name__'] = name a.belongto = self self.namespace[name.value] = a elif t.type == "PUSH_CONST": self.stack.append(Auto(t.value)) elif t.type == "POP_ALL": self.stack = [] elif t.type == "GE": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value >= a.value)) elif t.type == "GT": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value > a.value)) elif t.type == "LE": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value <= a.value)) elif t.type == "LT": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value < a.value)) elif t.type == "EQ": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value == a.value)) elif t.type == "NE": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value != a.value)) elif t.type == "ADD": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value + a.value)) elif t.type == "SUB": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value - a.value)) elif t.type == "MUL": b = self.stack.pop() a = self.stack.pop() self.stack.append(Auto(b.value * a.value)) elif t.type == "DIV": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value / a.value)) elif t.type == "AND": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value and a.value)) elif t.type == "OR": a = self.stack.pop() b = self.stack.pop() self.stack.append(Auto(b.value or a.value)) elif t.type == "NOT": a = self.stack.pop() self.stack.append(Auto(not a.value)) elif t.type == "NEG": a = self.stack.pop() if isinstance(a.value, str): self.stack.append(Auto(a.value[::-1])) else: self.stack.append(Auto(-a.value)) elif t.type == "JUMP_IF_FALSE": a = self.stack.pop() if not a.value: self.i = int(t.value) elif t.type == "JUMP": self.i = int(t.value) elif t.type == "PRINT": for x in self.stack: print x, print self.stack = [] elif t.type == "GET_METHOD": a = self.stack.pop() b = self.stack.pop() c = b.namespace.get(a.value,Auto(None)) c.belongto = b self.stack.append(c) elif t.type == "CALL": args = self.stack[-t.value:] for x in range(t.value): self.stack.pop() a = self.stack.pop() self.stack.append(a.call(args)) elif t.type == "RETURN": a = self.stack.pop() self.stack = [a] self.i = self.l elif t.type == "MAKE_FUNCTION": a = self.stack.pop() b = self.stack.pop() if isinstance(b.value, str) and isinstance(a.value, list): self.stack.append(Auto(b.value,argnames=a.value)) else: self.stack.append(Auto(None)) elif t.type == 'BUILD_LIST': l = self.stack[-t.value:] for x in range(t.value): self.stack.pop() self.stack.append(Auto(l)) elif t.type == 'BUILD_MAP': m = {} for x in range(t.value): v = self.stack.pop() i = self.stack.pop() m[i.value] = v self.stack.append(Auto(m)) elif t.type == 'GET_ITEM': a = self.stack.pop() b = self.stack.pop() if isinstance(a.value, int) and isinstance(b.value, list): if a.value < len(b.value): c = b.value[a.value] else: c = Auto(None) elif isinstance(a.value, int) and isinstance(b.value, str): if a.value < len(b.value): c = Auto(b.value[a.value]) else: c = Auto(None) elif isinstance(a.value, str) and isinstance(b.value, dict): c = b.value.get(a.value,Auto(None)) else: raise Exception("error in getitem") c.belongto = b self.stack.append(c) else: raise Exception('canot step '+t.type) if show_var: print " "*40,self.stack print " "*40,self.namespace def func_register(self,name,func): self.namespace[name] = Auto("<buildin-function "+name+'>', buildin=func, name=name)
TOKENIZE
仍是暴力实现,不过有一个小问题,解析整数时不能解析-号,否则就会出错.
例如: x-1; 若 int是 -?\d+ 就不对,
所以int 应该是 \d+, 负号单独解析
STEPIZE
由于是手写的,所以就简单暴力点
<stmt> => 'print' ( <expr5> (',' <expr5>)* )? ';'
| if语句 | while语句| function语句 | name语句
| 'continue' ';'| 'break' ';' | 'return' <expr5>? ';' | <expr5> ';'
name语句 => 'name' ';' | 'name' 'assign' <expr5> | 'name' <name_tail> ';'
<expr5> => <expr4> //这是预留的一个优先级
<expr4> => <expr3> ( ('or' | 'and') <expr3> )*
<expr3> => <expr2> ( ('cmp') <expr2> )*
<expr2> => <expr1> ( ('+' | '-') <expr1> )*
<expr1> => <expr> ( ('*' | '/') <expr> )*
<expr> => ('not' | 'neg')* ( 'name' | 'str' | 'int' | '(' <expr5> ')' | function表达式 | list表达式 | dict表达式 ) <name_tail>
<name_tail> => '(' ( <expr5> (',' <expr5>)* )? ')' <name_tail>
| '[' <expr5> ']' <name_tail>
| '.' 'name' <name_tail>
| 空
所有代码:
1 import re 2 from collections import namedtuple 3 4 show_tokens = False 5 show_steps = True 6 show_var = False 7 output_short = False 8 9 patterns = [ 10 #('\n', r'(\n)'), 11 12 ('cmp', r'(==)'), 13 ('cmp', r'(!=)'), 14 ('cmp', r'(>=)'), 15 ('cmp', r'(<=)'), 16 ('cmp', r'(>)'), 17 ('cmp', r'(<)'), 18 19 ('int', r'(\d+)'), 20 ('str', r"'([^\n']*)'"), 21 ('str', r'"([^\n"]*)"'), 22 23 ('or', r'(\|\|)'), 24 ('or', r'(\bor\b)'), 25 ('and', r'(&&)'), 26 ('and', r'(\band\b)'), 27 28 ('inc', r'(\+\+)'), 29 ('dec', r'(--)'), 30 31 ('assign', r'(\+=)'), 32 ('assign', r'(-=)'), 33 ('assign', r'(\/=)'), 34 ('assign', r'(\*=)'), 35 ('assign', r'(=)'), 36 37 ('+', r'(\+)'), 38 ('-', r'(-)'), 39 ('*', r'(\*)'), 40 ('/', r'(\/)'), 41 ('not', r'(!)'), 42 ('not', r'\b(not)\b'), 43 ('print', r'\b(print)\b'), 44 45 (';', r'(;)'), 46 (':', r'(:)'), 47 (',', r'(,)'), 48 ('.', r'(\.)'), 49 ('(', r'(\()'), 50 (')', r'(\))'), 51 ('[', r'(\[)'), 52 (']', r'(\])'), 53 ('{', r'(\{)'), 54 ('}', r'(\})'), 55 56 ('if', r'(\bif\b)'), 57 ('else', r'(\belse\b)'), 58 ('for', r'(\bfor\b)'), 59 ('while', r'(\bwhile\b)'), 60 ('break', r'(\bbreak\b)'), 61 ('continue',r'(\bcontinue\b)'), 62 ('return', r'(\breturn\b)'), 63 ('function',r'(\bfunction\b)'), 64 ('True', r'(\bTrue\b)'), 65 ('False', r'(\bFalse\b)'), 66 67 ('name', r'([A-Za-z_][\w_]*)'), 68 ] 69 70 Token = namedtuple("Token", ["type", "value", "str"]) 71 def _token_repr(self): 72 return " {0:9} ==> {1:8} ({2})".format(self.type, self.value.replace("\n", r"\n"), self.str.replace("\n", r"\n")) 73 Token.__repr__ = _token_repr 74 Step = namedtuple("Step", ["type", "value"]) 75 def _step_repr(self): 76 return " {0:14} ({2}){1:<12}".format(self.type, str(self.value).replace("\n", r"\n"), 77 str(type(self.value)).replace("<type '","").replace("'>","")) 78 Step.__repr__ = _step_repr 79 80 def tokenize(buf): 81 if type(buf) == "str": 82 raise Exception("not a function") 83 tokens = [] 84 i = 0 85 l = len(buf) 86 while i < l: 87 prestr = "" 88 while i < l and buf[i] in " \r\n\t": 89 prestr += buf[i] 90 i += 1 91 if i >= l: 92 break 93 for t,p in patterns: 94 m = re.match(p, buf[i:]) 95 if m: 96 prestr += m.group(0) 97 token = Token(t, m.group(1), prestr) 98 tokens.append(token) 99 i += len(m.group(0)) 100 if show_tokens: 101 print(token) 102 break 103 else: 104 raise Exception("not match any pattern-") 105 return tokens 106 107 def stepize(tokens): 108 class Trans(object): 109 def __init__(self, tokens): 110 self.i = 0 111 self.tokens = tokens 112 self.l = len(tokens) 113 self.steps = [] 114 self.continue_point = -1 115 self.break_point = -1 116 def pos(self): 117 return len(self.steps) 118 def type(self): 119 if self.i >= self.l: 120 return None 121 return self.tokens[self.i].type 122 def prestr(self): 123 if self.i >= self.l: 124 return "" 125 return self.tokens[self.i].str 126 def reset(self, pos, value): 127 self.steps[pos] = Step(self.steps[pos].type, value) 128 def value(self): 129 if self.i >= self.l: 130 return None 131 return self.tokens[self.i].value 132 def push(self, t, v=None): 133 self.steps.append(Step(t,v)) 134 def match(self, p): 135 if self.i >= self.l: 136 raise Exception("unexceptable end") 137 if self.tokens[self.i].type != p: 138 raise Exception("should be "+p) 139 self.i += 1 140 def stmt(self): 141 if self.type() == "print": 142 self.i += 1 143 while 1: 144 if self.type() == ';': 145 self.i += 1 146 break 147 self.expr5() 148 if self.type() == ',': 149 self.i += 1 150 elif self.type() == ';': 151 self.i += 1 152 break 153 else: 154 raise Exception("not ok-") 155 self.push("PRINT") 156 elif self.type() == 'break': 157 self.break_point = self.pos() 158 self.push("JUMP") 159 self.i += 1 160 self.match(';') 161 elif self.type() == 'continue': 162 self.continue_point = self.pos() 163 self.push("JUMP") 164 self.i += 1 165 self.match(';') 166 elif self.type() == "return": 167 self.i += 1 168 if self.type() == ";": 169 self.push("PUSH_CONST", None) 170 self.i += 1 171 else: 172 self.expr5() 173 self.match(';') 174 self.push("RETURN") 175 elif self.type() == 'function': 176 self.i += 1 177 name = "" 178 if self.type() == 'name': 179 self.push("PUSH_VAR",self.value()) 180 name = self.value() 181 self.i += 1 182 self.match('(') 183 names = [] 184 while 1: 185 if self.type() == 'name': 186 names.append(self.value()) 187 self.i += 1 188 else: 189 self.match(')') 190 break 191 if self.type() == ',': 192 self.i += 1 193 elif self.type() == ')': 194 self.i += 1 195 break 196 else: 197 raise Exception("bad function") 198 s = '' 199 self.match('{') 200 count = 1 201 while 1: 202 if self.type() == '{': 203 count += 1 204 elif self.type() == '}': 205 count -= 1 206 if count == 0: 207 self.i += 1 208 break 209 s += self.prestr() 210 self.i += 1 211 self.push('PUSH_CONST', s) 212 self.push('PUSH_CONST', names) 213 self.push('MAKE_FUNCTION') 214 if name: 215 self.push("ASSIGN") 216 elif self.type() == 'if': 217 self.i += 1 218 self.match('(') 219 self.expr5() 220 self.match(')') 221 jump_pos = self.pos() 222 self.push('JUMP_IF_FALSE') 223 self.block() 224 if self.type() == 'else': 225 self.i += 1 226 jump_back_pos = self.pos() 227 self.push('JUMP') 228 self.reset(jump_pos, self.pos()) 229 self.block() 230 self.reset(jump_back_pos, self.pos()) 231 else: 232 self.reset(jump_pos, self.pos()) 233 elif self.type() == 'while': 234 self.i += 1 235 self.match('(') 236 jump_here = self.pos() 237 self.expr5() 238 self.match(')') 239 jump_pos = self.pos() 240 self.push('JUMP_IF_FALSE') 241 self.block() 242 self.push('JUMP', jump_here) 243 self.reset(jump_pos, self.pos()) 244 if self.break_point != -1: 245 self.reset(self.break_point, self.pos()) 246 self.break_point = -1 247 if self.continue_point != -1: 248 self.reset(self.continue_point, jump_here) 249 self.continue_point = -1 250 elif self.type() == "name": 251 self.push("PUSH_VAR", self.value()) 252 name = self.value() 253 self.i += 1 254 if self.type() == ';': 255 self.i += 1 256 elif self.type() == 'assign': 257 t = self.value() 258 if t != '=': 259 self.push("PUSH_VAR", name) 260 self.i += 1 261 if t == '=': 262 self.expr5() 263 self.push("ASSIGN") 264 elif t == '+=': 265 self.expr5() 266 self.push("ADD") 267 self.push("ASSIGN") 268 elif t == '*=': 269 self.expr5() 270 self.push("MUL") 271 self.push("ASSIGN") 272 elif t == '-=': 273 self.expr5() 274 self.push("SUB") 275 self.push("ASSIGN") 276 elif t == '/=': 277 self.expr5() 278 self.push("DIV") 279 self.push("ASSIGN") 280 else: 281 raise Exception("bad assign") 282 self.match(';') 283 elif self.type() == "inc": 284 self.i += 1 285 self.push("INC") 286 self.match(';') 287 elif self.type() == "dec": 288 self.i += 1 289 self.push("DEC") 290 self.match(';') 291 else: 292 self.name_tail() 293 self.match(';') 294 self.push("POP_ALL") 295 else: 296 self.expr5() 297 self.push("POP_ALL") 298 self.match(';') 299 def expr(self): 300 if self.type() == "int": 301 self.push("PUSH_CONST", int(self.value())) 302 self.i += 1 303 elif self.type() == "False": 304 self.push("PUSH_CONST", False) 305 self.i += 1 306 elif self.type() == "True": 307 self.push("PUSH_CONST", True) 308 self.i += 1 309 elif self.type() == "not": 310 self.i += 1 311 self.expr() 312 self.push("NOT") 313 elif self.type() == "-": 314 self.i += 1 315 self.expr() 316 self.push("NEG") 317 elif self.type() == "str": 318 self.push("PUSH_CONST", str(self.value())) 319 self.i += 1 320 elif self.type() == "name": 321 self.push("PUSH_VAR", self.value()) 322 self.i += 1 323 elif self.type() == '(': 324 self.i += 1 325 self.expr5() 326 self.match(")") 327 elif self.type() == '[': 328 self.i += 1 329 count = 0 330 while self.type() != ']': 331 self.expr5() 332 count += 1 333 if self.type() == ']': 334 break 335 self.match(',') 336 self.match(']') 337 self.push("BUILD_LIST", count) 338 elif self.type() == '{': 339 self.i += 1 340 count = 0 341 while self.type() != '}': 342 self.expr5() 343 self.match(':') 344 self.expr5() 345 count += 1 346 if self.type() == '}': 347 break 348 self.match(',') 349 self.match('}') 350 self.push("BUILD_MAP", count) 351 elif self.type() == 'function': 352 self.i += 1 353 name = "" 354 if self.type() == 'name': 355 name = self.value() 356 self.push("PUSH_VAR", name) 357 self.i += 1 358 self.match('(') 359 names = [] 360 while 1: 361 if self.type() == 'name': 362 names.append(self.value()) 363 self.i += 1 364 else: 365 self.match(')') 366 break 367 if self.type() == ',': 368 self.i += 1 369 elif self.type() == ')': 370 self.i += 1 371 break 372 else: 373 raise Exception("bad function") 374 s = '' 375 self.match('{') 376 count = 1 377 while 1: 378 if self.type() == '{': 379 count += 1 380 elif self.type() == '}': 381 count -= 1 382 if count == 0: 383 self.i += 1 384 break 385 s += self.prestr() 386 self.i += 1 387 self.push('PUSH_CONST', s) 388 self.push('PUSH_CONST', names) 389 self.push('MAKE_FUNCTION') 390 if name: 391 self.push('ASSIGN') 392 self.name_tail() 393 def name_tail(self): 394 while True: 395 if self.type() == "(": 396 self.i += 1 397 count = 0 398 while 1: 399 if self.type() == ")": 400 self.i += 1 401 break 402 self.expr5() 403 count += 1 404 if self.type() == ",": 405 self.i += 1 406 elif self.type() == ")": 407 self.i += 1 408 break 409 else: 410 raise Exception("not ok") 411 self.push("CALL", count) 412 elif self.type() == '[': 413 self.i += 1 414 self.expr5() 415 self.match(']') 416 self.push("GET_ITEM") 417 elif self.type() == '.': 418 self.i += 1 419 if self.type() != 'name': 420 raise Exception("need a name") 421 self.push("PUSH_CONST", self.value()) 422 self.i += 1 423 self.push("GET_METHOD") 424 elif self.type() == "inc": 425 self.i += 1 426 self.push("INC") 427 else: 428 break 429 def expr1(self): 430 self.expr() 431 while self.type() == '*' or self.type() == '/': 432 t = self.type() 433 self.i += 1 434 self.expr() 435 if t == "*": 436 self.push("MUL") 437 else: 438 self.push("DIV") 439 def expr2(self): 440 self.expr1() 441 while self.type() == '+' or self.type() == '-': 442 t = self.type() 443 self.i += 1 444 self.expr1() 445 if t == "+": 446 self.push("ADD") 447 else: 448 self.push("SUB") 449 def expr3(self): 450 self.expr2() 451 while self.type() == "cmp": 452 t = self.value() 453 self.i += 1 454 self.expr2() 455 if t == ">=": 456 self.push("GE") 457 elif t == "<=": 458 self.push("LE") 459 elif t == "<": 460 self.push("LT") 461 elif t == ">": 462 self.push("GT") 463 elif t == "==": 464 self.push("EQ") 465 else: 466 self.push("NE") 467 def expr4(self): 468 self.expr3() 469 while self.type() == 'or' or self.type() == 'and': 470 t = self.type() 471 self.i += 1 472 self.expr3() 473 if t == "or": 474 self.push("OR") 475 else: 476 self.push("AND") 477 def expr5(self): 478 self.expr4() 479 def block(self): 480 if self.type() == '{': 481 self.i += 1 482 while self.type() != '}': 483 self.stmt() 484 self.i += 1 485 else: 486 self.stmt() 487 def eval(self): 488 while self.i < self.l: 489 self.stmt() 490 t = Trans(tokens) 491 t.eval(); 492 return t.steps 493 494 class Auto(object): 495 def __init__(self,value=None,belongto=None,argnames=None,name=None,buildin=None): 496 self.value = value 497 self.belongto = belongto 498 self.argnames = argnames or [] 499 self.buildin = buildin 500 if name: 501 self.namespace = {'self':self, '__name__':Auto(name)} 502 else: 503 self.namespace = {'self':self} 504 def __str__(self): 505 s = str(self.value).replace("\n",r"\n") 506 if output_short and len(s)>15: 507 return s[:10]+'...' 508 return s 509 def __repr__(self): 510 s = str(self.value).replace("\n",r"\n") 511 if output_short and len(s)>15: 512 return "Auto("+s[:10]+'...'+")" 513 return "Auto("+s+")" 514 def call(self, args=None): 515 if self.buildin != None: 516 return self.buildin(args) 517 self.stack = [] 518 if not isinstance(self.value, str): 519 raise Exception("uncallable") 520 if args: 521 for x,y in zip(self.argnames,args): 522 y.belongto = self 523 y.namespace['__name__'] = Auto(x) 524 self.namespace[x] = y 525 funcname = self.namespace.get('__name__') 526 if not funcname: 527 funcname = '' 528 else: 529 funcname = str(funcname.value) 530 if show_tokens: 531 print "\n[TOKENIZE "+funcname+"]" 532 tokens = tokenize(self.value) 533 if show_steps: 534 print "\n[STEPIZE "+funcname+"]" 535 self.steps = stepize(tokens) 536 if show_steps: 537 for i,x in enumerate(self.steps): 538 print " {0:3}".format(i),x 539 # run steps 540 if show_var: 541 print "\n[CALL "+funcname+"]" 542 self.l = len(self.steps) 543 self.i = 0 544 while self.i < self.l: 545 self.step_once() 546 if show_var: 547 print "[END "+funcname+"]\n" 548 if self.stack: 549 return self.stack[0] 550 else: 551 return Auto(None) 552 def step_once(self): 553 t = self.steps[self.i] 554 if show_var: 555 print self.i,":",t 556 self.i += 1 557 if t.type == "PUSH_VAR": 558 a = self.namespace.get(t.value) 559 b = self.belongto 560 while a == None and b != None: 561 a = b.namespace.get(t.value, None) 562 b = b.belongto 563 if a == None: 564 a = Auto(None) 565 a.namespace['__name__'] = Auto(t.value) 566 a.belongto = self 567 self.stack.append(a) 568 elif t.type == "ASSIGN": 569 a = self.stack.pop() 570 b = self.stack.pop() 571 name = b.namespace['__name__'] 572 if b.belongto != None: 573 a.namespace['__name__'] = name 574 a.belongto = b.belongto 575 b.belongto.namespace[name.value] = a 576 else: 577 a.namespace['__name__'] = name 578 a.belongto = self 579 self.namespace[name.value] = a 580 elif t.type == "PUSH_CONST": 581 self.stack.append(Auto(t.value)) 582 elif t.type == "POP_ALL": 583 self.stack = [] 584 elif t.type == "GE": 585 a = self.stack.pop() 586 b = self.stack.pop() 587 self.stack.append(Auto(b.value >= a.value)) 588 elif t.type == "GT": 589 a = self.stack.pop() 590 b = self.stack.pop() 591 self.stack.append(Auto(b.value > a.value)) 592 elif t.type == "LE": 593 a = self.stack.pop() 594 b = self.stack.pop() 595 self.stack.append(Auto(b.value <= a.value)) 596 elif t.type == "LT": 597 a = self.stack.pop() 598 b = self.stack.pop() 599 self.stack.append(Auto(b.value < a.value)) 600 elif t.type == "EQ": 601 a = self.stack.pop() 602 b = self.stack.pop() 603 self.stack.append(Auto(b.value == a.value)) 604 elif t.type == "NE": 605 a = self.stack.pop() 606 b = self.stack.pop() 607 self.stack.append(Auto(b.value != a.value)) 608 elif t.type == "ADD": 609 a = self.stack.pop() 610 b = self.stack.pop() 611 self.stack.append(Auto(b.value + a.value)) 612 elif t.type == "SUB": 613 a = self.stack.pop() 614 b = self.stack.pop() 615 self.stack.append(Auto(b.value - a.value)) 616 elif t.type == "MUL": 617 b = self.stack.pop() 618 a = self.stack.pop() 619 self.stack.append(Auto(b.value * a.value)) 620 elif t.type == "DIV": 621 a = self.stack.pop() 622 b = self.stack.pop() 623 self.stack.append(Auto(b.value / a.value)) 624 elif t.type == "AND": 625 a = self.stack.pop() 626 b = self.stack.pop() 627 self.stack.append(Auto(b.value and a.value)) 628 elif t.type == "OR": 629 a = self.stack.pop() 630 b = self.stack.pop() 631 self.stack.append(Auto(b.value or a.value)) 632 elif t.type == "NOT": 633 a = self.stack.pop() 634 self.stack.append(Auto(not a.value)) 635 elif t.type == "NEG": 636 a = self.stack.pop() 637 if isinstance(a.value, str): 638 self.stack.append(Auto(a.value[::-1])) 639 else: 640 self.stack.append(Auto(-a.value)) 641 elif t.type == "JUMP_IF_FALSE": 642 a = self.stack.pop() 643 if not a.value: 644 self.i = int(t.value) 645 elif t.type == "JUMP": 646 self.i = int(t.value) 647 elif t.type == "PRINT": 648 for x in self.stack: 649 print x, 650 print 651 self.stack = [] 652 elif t.type == "GET_METHOD": 653 a = self.stack.pop() 654 b = self.stack.pop() 655 c = b.namespace.get(a.value,Auto(None)) 656 c.belongto = b 657 self.stack.append(c) 658 elif t.type == "CALL": 659 args = self.stack[-t.value:] 660 for x in range(t.value): 661 self.stack.pop() 662 a = self.stack.pop() 663 self.stack.append(a.call(args)) 664 elif t.type == "RETURN": 665 a = self.stack.pop() 666 self.stack = [a] 667 self.i = self.l 668 elif t.type == "MAKE_FUNCTION": 669 a = self.stack.pop() 670 b = self.stack.pop() 671 if isinstance(b.value, str) and isinstance(a.value, list): 672 self.stack.append(Auto(b.value,argnames=a.value)) 673 else: 674 self.stack.append(Auto(None)) 675 elif t.type == 'BUILD_LIST': 676 l = self.stack[-t.value:] 677 for x in range(t.value): 678 self.stack.pop() 679 self.stack.append(Auto(l)) 680 elif t.type == 'BUILD_MAP': 681 m = {} 682 for x in range(t.value): 683 v = self.stack.pop() 684 i = self.stack.pop() 685 m[i.value] = v 686 self.stack.append(Auto(m)) 687 elif t.type == 'GET_ITEM': 688 a = self.stack.pop() 689 b = self.stack.pop() 690 if isinstance(a.value, int) and isinstance(b.value, list): 691 if a.value < len(b.value): 692 c = b.value[a.value] 693 else: 694 c = Auto(None) 695 elif isinstance(a.value, int) and isinstance(b.value, str): 696 if a.value < len(b.value): 697 c = Auto(b.value[a.value]) 698 else: 699 c = Auto(None) 700 elif isinstance(a.value, str) and isinstance(b.value, dict): 701 c = b.value.get(a.value,Auto(None)) 702 else: 703 raise Exception("error in getitem") 704 c.belongto = b 705 self.stack.append(c) 706 else: 707 raise Exception('canot step '+t.type) 708 if show_var: 709 print " "*40,self.stack 710 print " "*40,self.namespace 711 def func_register(self,name,func): 712 self.namespace[name] = Auto("<buildin-function "+name+'>', 713 buildin=func, name=name) 714 715 def function_str(args): 716 return Auto(str(args[0])) 717 def function_int(args): 718 return Auto(int(args[0])) 719 def function_len(args): 720 return Auto(len(args[0].value)) 721 722 if __name__ == '__main__': 723 a = Auto(open("hello.pym").read(), name='__main__') 724 a.func_register('str', function_str) 725 a.func_register('int', function_int) 726 a.func_register('len', function_len) 727 a.call()