实现一个编译器

最终效果

动态类型, 内置int,str,list,dict(from python), function

function执行后可以保留内部的变量.

不想自己实现这么多类型,用python来实现

整个语言基于Auto变量,所以两句话就可以跑起编译器

a = Auto(open("hello.pym").read(), name='__main__')
a.call()

call会执行几个过程:

1) tokenize:词法分析

2) stepize:产生字节码

3) step:执行字节码

 

若输入代码

 1 fab = function() {
 2     value = [1,1];
 3     size = len(value);
 4     function get(pos) {
 5         pos -= 1;
 6         if (pos < 0) {
 7             return -1;
 8         } else if (pos < size) {
 9             return value[pos];
10         } else {
11             while (pos >= size) {
12                 value += [value[size-1]+value[size-2]];
13                 size += 1;
14             }
15             return value[pos];
16         }
17     }
18 };
19 fab();
20 print fab.get(10);
21 print fab.get(20);

会产生输出

55
6765

运行中产生的字节码:

[STEPIZE __main__]
   0  PUSH_VAR       (str)fab         
   1  PUSH_CONST     (str)\n    value = [1,1];\n    size = len(value);\n    function get(pos) {\n        pos -= 1;\n        if (pos < 0) {\n            return -1;\n        } else if (pos < size) {\n            return value[pos];\n        } else {\n            while (pos >= size) {\n                value += [value[size-1]+value[size-2]];\n                size += 1;\n            }\n            return value[pos];\n        }\n    }
   2  PUSH_CONST     (list)[]          
   3  MAKE_FUNCTION  (NoneType)None        
   4  ASSIGN         (NoneType)None        
   5  PUSH_VAR       (str)fab         
   6  CALL           (int)0           
   7  POP_ALL        (NoneType)None        
   8  PUSH_VAR       (str)fab         
   9  PUSH_CONST     (str)get         
  10  GET_METHOD     (NoneType)None        
  11  PUSH_CONST     (int)10          
  12  CALL           (int)1           
  13  PRINT          (NoneType)None        
  14  PUSH_VAR       (str)fab         
  15  PUSH_CONST     (str)get         
  16  GET_METHOD     (NoneType)None        
  17  PUSH_CONST     (int)20          
  18  CALL           (int)1           
  19  PRINT          (NoneType)None        

[STEPIZE fab]
   0  PUSH_VAR       (str)value       
   1  PUSH_CONST     (int)1           
   2  PUSH_CONST     (int)1           
   3  BUILD_LIST     (int)2           
   4  ASSIGN         (NoneType)None        
   5  PUSH_VAR       (str)size        
   6  PUSH_VAR       (str)len         
   7  PUSH_VAR       (str)value       
   8  CALL           (int)1           
   9  ASSIGN         (NoneType)None        
  10  PUSH_VAR       (str)get         
  11  PUSH_CONST     (str)\n        pos -= 1;\n        if (pos < 0) {\n            return -1;\n        } else if (pos < size) {\n            return value[pos];\n        } else {\n            while (pos >= size) {\n                value += [value[size-1]+value[size-2]];\n                size += 1;\n            }\n            return value[pos];\n        }
  12  PUSH_CONST     (list)['pos']     
  13  MAKE_FUNCTION  (NoneType)None        
  14  ASSIGN         (NoneType)None        

[STEPIZE get]
   0  PUSH_VAR       (str)pos         
   1  PUSH_VAR       (str)pos         
   2  PUSH_CONST     (int)1           
   3  SUB            (NoneType)None        
   4  ASSIGN         (NoneType)None        
   5  PUSH_VAR       (str)pos         
   6  PUSH_CONST     (int)0           
   7  LT             (NoneType)None        
   8  JUMP_IF_FALSE  (int)13          
   9  PUSH_CONST     (int)1           
  10  NEG            (NoneType)None        
  11  RETURN         (NoneType)None        
  12  JUMP           (int)52          
  13  PUSH_VAR       (str)pos         
  14  PUSH_VAR       (str)size        
  15  LT             (NoneType)None        
  16  JUMP_IF_FALSE  (int)22          
  17  PUSH_VAR       (str)value       
  18  PUSH_VAR       (str)pos         
  19  GET_ITEM       (NoneType)None        
  20  RETURN         (NoneType)None        
  21  JUMP           (int)52          
  22  PUSH_VAR       (str)pos         
  23  PUSH_VAR       (str)size        
  24  GE             (NoneType)None        
  25  JUMP_IF_FALSE  (int)48          
  26  PUSH_VAR       (str)value       
  27  PUSH_VAR       (str)value       
  28  PUSH_VAR       (str)value       
  29  PUSH_VAR       (str)size        
  30  PUSH_CONST     (int)1           
  31  SUB            (NoneType)None        
  32  GET_ITEM       (NoneType)None        
  33  PUSH_VAR       (str)value       
  34  PUSH_VAR       (str)size        
  35  PUSH_CONST     (int)2           
  36  SUB            (NoneType)None        
  37  GET_ITEM       (NoneType)None        
  38  ADD            (NoneType)None        
  39  BUILD_LIST     (int)1           
  40  ADD            (NoneType)None        
  41  ASSIGN         (NoneType)None        
  42  PUSH_VAR       (str)size        
  43  PUSH_VAR       (str)size        
  44  PUSH_CONST     (int)1           
  45  ADD            (NoneType)None        
  46  ASSIGN         (NoneType)None        
  47  JUMP           (int)22          
  48  PUSH_VAR       (str)value       
  49  PUSH_VAR       (str)pos         
  50  GET_ITEM       (NoneType)None        
  51  RETURN         (NoneType)None        
55

[STEPIZE get]
   0  PUSH_VAR       (str)pos         
   1  PUSH_VAR       (str)pos         
   2  PUSH_CONST     (int)1           
   3  SUB            (NoneType)None        
   4  ASSIGN         (NoneType)None        
   5  PUSH_VAR       (str)pos         
   6  PUSH_CONST     (int)0           
   7  LT             (NoneType)None        
   8  JUMP_IF_FALSE  (int)13          
   9  PUSH_CONST     (int)1           
  10  NEG            (NoneType)None        
  11  RETURN         (NoneType)None        
  12  JUMP           (int)52          
  13  PUSH_VAR       (str)pos         
  14  PUSH_VAR       (str)size        
  15  LT             (NoneType)None        
  16  JUMP_IF_FALSE  (int)22          
  17  PUSH_VAR       (str)value       
  18  PUSH_VAR       (str)pos         
  19  GET_ITEM       (NoneType)None        
  20  RETURN         (NoneType)None        
  21  JUMP           (int)52          
  22  PUSH_VAR       (str)pos         
  23  PUSH_VAR       (str)size        
  24  GE             (NoneType)None        
  25  JUMP_IF_FALSE  (int)48          
  26  PUSH_VAR       (str)value       
  27  PUSH_VAR       (str)value       
  28  PUSH_VAR       (str)value       
  29  PUSH_VAR       (str)size        
  30  PUSH_CONST     (int)1           
  31  SUB            (NoneType)None        
  32  GET_ITEM       (NoneType)None        
  33  PUSH_VAR       (str)value       
  34  PUSH_VAR       (str)size        
  35  PUSH_CONST     (int)2           
  36  SUB            (NoneType)None        
  37  GET_ITEM       (NoneType)None        
  38  ADD            (NoneType)None        
  39  BUILD_LIST     (int)1           
  40  ADD            (NoneType)None        
  41  ASSIGN         (NoneType)None        
  42  PUSH_VAR       (str)size        
  43  PUSH_VAR       (str)size        
  44  PUSH_CONST     (int)1           
  45  ADD            (NoneType)None        
  46  ASSIGN         (NoneType)None        
  47  JUMP           (int)22          
  48  PUSH_VAR       (str)value       
  49  PUSH_VAR       (str)pos         
  50  GET_ITEM       (NoneType)None        
  51  RETURN         (NoneType)None        
6765

Auto的实现

每个Auto都有一个value,用来存储python内置类型

每个Auto都有一个namespace,用来存储内部名称空间(名称+值)

每个Auto都有一个belongto,用来表示属于那个Auto的名称空间

若Auto被当做函数(call):

  先看是不是buildin

  若不是, 将value中的str作为函数体,argnames为参数名, 执行 stepize等等

  若是, 执行buildin

  可以任意使用/修改,上层或全局变量

  其中产生的变量会加入namespace

  固定返回一个值

 

class Auto(object):
    def __init__(self,value=None,belongto=None,argnames=None,name=None,buildin=None):
        self.value = value
        self.belongto = belongto
        self.argnames = argnames or []
        self.buildin = buildin
        if name:
            self.namespace = {'self':self, '__name__':Auto(name)}
        else:
            self.namespace = {'self':self}
    def __str__(self):
        s = str(self.value).replace("\n",r"\n")
        if output_short and len(s)>15:
            return s[:10]+'...'
        return s
    def __repr__(self):
        s = str(self.value).replace("\n",r"\n")
        if output_short and len(s)>15:
            return "Auto("+s[:10]+'...'+")"
        return "Auto("+s+")"
    def call(self, args=None):
        if self.buildin != None:
            return self.buildin(args)
        self.stack = []
        if not isinstance(self.value, str):
            raise Exception("uncallable")
        if args:
            for x,y in zip(self.argnames,args):
                y.belongto = self
                y.namespace['__name__'] = Auto(x)
                self.namespace[x] = y
        funcname = self.namespace.get('__name__')
        if not funcname:
            funcname = ''
        else:
            funcname = str(funcname.value)
        if show_tokens:
            print "\n[TOKENIZE "+funcname+"]"
        tokens = tokenize(self.value)
        if show_steps:
            print "\n[STEPIZE "+funcname+"]"
        self.steps = stepize(tokens)
        if show_steps:
            for i,x in enumerate(self.steps):
                print " {0:3}".format(i),x
        # run steps
        if show_var:
            print "\n[CALL "+funcname+"]"
        self.l = len(self.steps)
        self.i = 0
        while self.i < self.l:
            self.step_once()
        if show_var:
            print "[END "+funcname+"]\n"
        if self.stack:
            return self.stack[0]
        else:
            return Auto(None)
    def step_once(self):
        t = self.steps[self.i]
        if show_var:
            print self.i,":",t
        self.i += 1
        if t.type == "PUSH_VAR":
            a = self.namespace.get(t.value)
            b = self.belongto
            while a == None and b != None:
                a = b.namespace.get(t.value, None)
                b = b.belongto
            if a == None:
                a = Auto(None)
                a.namespace['__name__'] = Auto(t.value)
                a.belongto = self
            self.stack.append(a)
        elif t.type == "ASSIGN":
            a = self.stack.pop()
            b = self.stack.pop()
            name = b.namespace['__name__']
            if b.belongto != None:
                a.namespace['__name__'] = name
                a.belongto = b.belongto
                b.belongto.namespace[name.value] = a
            else:
                a.namespace['__name__'] = name
                a.belongto = self
                self.namespace[name.value] = a
        elif t.type == "PUSH_CONST":
            self.stack.append(Auto(t.value))
        elif t.type == "POP_ALL":
            self.stack = []
        elif t.type == "GE":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value >= a.value))
        elif t.type == "GT":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value > a.value))
        elif t.type == "LE":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value <= a.value))
        elif t.type == "LT":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value < a.value))
        elif t.type == "EQ":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value == a.value))
        elif t.type == "NE":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value != a.value))
        elif t.type == "ADD":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value + a.value))
        elif t.type == "SUB":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value - a.value))
        elif t.type == "MUL":
            b = self.stack.pop()
            a = self.stack.pop()
            self.stack.append(Auto(b.value * a.value))
        elif t.type == "DIV":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value / a.value))
        elif t.type == "AND":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value and a.value))
        elif t.type == "OR":
            a = self.stack.pop()
            b = self.stack.pop()
            self.stack.append(Auto(b.value or a.value))
        elif t.type == "NOT":
            a = self.stack.pop()
            self.stack.append(Auto(not a.value))
        elif t.type == "NEG":
            a = self.stack.pop()
            if isinstance(a.value, str):
                self.stack.append(Auto(a.value[::-1]))
            else:
                self.stack.append(Auto(-a.value))
        elif t.type == "JUMP_IF_FALSE":
            a = self.stack.pop()
            if not a.value:
                self.i =  int(t.value)
        elif t.type == "JUMP":
            self.i = int(t.value)
        elif t.type == "PRINT":
            for x in self.stack:
                print x,
            print
            self.stack = []
        elif t.type == "GET_METHOD":
            a = self.stack.pop()
            b = self.stack.pop()
            c = b.namespace.get(a.value,Auto(None))
            c.belongto = b
            self.stack.append(c)
        elif t.type == "CALL":
            args = self.stack[-t.value:]
            for x in range(t.value):
                self.stack.pop()
            a = self.stack.pop()
            self.stack.append(a.call(args))
        elif t.type == "RETURN":
            a = self.stack.pop()
            self.stack = [a]
            self.i = self.l
        elif t.type == "MAKE_FUNCTION":
            a = self.stack.pop()
            b = self.stack.pop()
            if isinstance(b.value, str) and isinstance(a.value, list):
                self.stack.append(Auto(b.value,argnames=a.value))
            else:
                self.stack.append(Auto(None))
        elif t.type == 'BUILD_LIST':
            l = self.stack[-t.value:]
            for x in range(t.value):
                self.stack.pop()
            self.stack.append(Auto(l))
        elif t.type == 'BUILD_MAP':
            m = {}
            for x in range(t.value):
                v = self.stack.pop()
                i = self.stack.pop()
                m[i.value] = v
            self.stack.append(Auto(m))
        elif t.type == 'GET_ITEM':
            a = self.stack.pop()
            b = self.stack.pop()
            if isinstance(a.value, int) and isinstance(b.value, list):
                if a.value < len(b.value):
                    c = b.value[a.value]
                else:
                    c = Auto(None)
            elif isinstance(a.value, int) and isinstance(b.value, str):
                if a.value < len(b.value):
                    c = Auto(b.value[a.value])
                else:
                    c = Auto(None)
            elif isinstance(a.value, str) and isinstance(b.value, dict):
                c = b.value.get(a.value,Auto(None))
            else:
                raise Exception("error in getitem")
            c.belongto = b
            self.stack.append(c)
        else:
            raise Exception('canot step '+t.type)
        if show_var:
            print " "*40,self.stack
            print " "*40,self.namespace
    def func_register(self,name,func):
        self.namespace[name] = Auto("<buildin-function "+name+'>', buildin=func, name=name)

TOKENIZE

  仍是暴力实现,不过有一个小问题,解析整数时不能解析-号,否则就会出错.

  例如: x-1; 若 int是 -?\d+ 就不对,

  所以int 应该是 \d+, 负号单独解析

STEPIZE

  由于是手写的,所以就简单暴力点

  <stmt> => 'print' ( <expr5> (',' <expr5>)* )?  ';'

      | if语句 | while语句| function语句 | name语句

      | 'continue' ';'| 'break' ';' | 'return' <expr5>?  ';' | <expr5> ';'

  name语句 =>  'name' ';'  | 'name' 'assign' <expr5> | 'name' <name_tail> ';'

  <expr5> => <expr4>      //这是预留的一个优先级

  <expr4> => <expr3> ( ('or' | 'and')   <expr3> )*

  <expr3> => <expr2> ( ('cmp')   <expr2> )*

  <expr2> => <expr1> ( ('+' | '-') <expr1> )*

  <expr1> => <expr>  ( ('*' | '/') <expr> )*

  <expr>  => ('not' | 'neg')* ( 'name' | 'str' | 'int' | '(' <expr5> ')' | function表达式 | list表达式 | dict表达式 ) <name_tail>

  <name_tail> => '(' ( <expr5> (',' <expr5>)* )? ')' <name_tail>

        | '[' <expr5> ']' <name_tail>

        | '.' 'name' <name_tail>

        | 空

 

所有代码:

  1 import re
  2 from collections import namedtuple
  3 
  4 show_tokens = False
  5 show_steps = True
  6 show_var = False
  7 output_short = False
  8 
  9 patterns = [
 10     #('\n',        r'(\n)'),
 11 
 12     ('cmp',        r'(==)'),
 13     ('cmp',        r'(!=)'),
 14     ('cmp',        r'(>=)'),
 15     ('cmp',        r'(<=)'),
 16     ('cmp',        r'(>)'),
 17     ('cmp',        r'(<)'),
 18 
 19     ('int',        r'(\d+)'),
 20     ('str',        r"'([^\n']*)'"),
 21     ('str',        r'"([^\n"]*)"'),
 22     
 23     ('or',        r'(\|\|)'),
 24     ('or',        r'(\bor\b)'),
 25     ('and',        r'(&&)'),
 26     ('and',        r'(\band\b)'),
 27     
 28     ('inc',        r'(\+\+)'),
 29     ('dec',        r'(--)'),
 30 
 31     ('assign',    r'(\+=)'),
 32     ('assign',    r'(-=)'),
 33     ('assign',    r'(\/=)'),
 34     ('assign',    r'(\*=)'),
 35     ('assign',    r'(=)'),
 36 
 37     ('+',        r'(\+)'),
 38     ('-',        r'(-)'),
 39     ('*',        r'(\*)'),
 40     ('/',        r'(\/)'),
 41     ('not',        r'(!)'),
 42     ('not',        r'\b(not)\b'),
 43     ('print',    r'\b(print)\b'),
 44 
 45     (';',        r'(;)'),
 46     (':',        r'(:)'),
 47     (',',        r'(,)'),
 48     ('.',        r'(\.)'),
 49     ('(',        r'(\()'),
 50     (')',        r'(\))'),
 51     ('[',        r'(\[)'),
 52     (']',        r'(\])'),
 53     ('{',        r'(\{)'),
 54     ('}',        r'(\})'),
 55 
 56     ('if',        r'(\bif\b)'),
 57     ('else',    r'(\belse\b)'),
 58     ('for',        r'(\bfor\b)'),
 59     ('while',    r'(\bwhile\b)'),
 60     ('break',    r'(\bbreak\b)'),
 61     ('continue',r'(\bcontinue\b)'),
 62     ('return',    r'(\breturn\b)'),
 63     ('function',r'(\bfunction\b)'),
 64     ('True',    r'(\bTrue\b)'),
 65     ('False',    r'(\bFalse\b)'),
 66     
 67     ('name',    r'([A-Za-z_][\w_]*)'),
 68 ]
 69 
 70 Token = namedtuple("Token", ["type", "value", "str"])
 71 def _token_repr(self):
 72     return " {0:9} ==>  {1:8} ({2})".format(self.type, self.value.replace("\n", r"\n"), self.str.replace("\n", r"\n"))
 73 Token.__repr__ = _token_repr
 74 Step = namedtuple("Step", ["type", "value"])
 75 def _step_repr(self):
 76     return " {0:14} ({2}){1:<12}".format(self.type, str(self.value).replace("\n", r"\n"), 
 77                                          str(type(self.value)).replace("<type '","").replace("'>",""))
 78 Step.__repr__ = _step_repr
 79 
 80 def tokenize(buf):
 81     if type(buf) == "str":
 82         raise Exception("not a function")
 83     tokens = []
 84     i = 0
 85     l = len(buf)
 86     while i < l:
 87         prestr = ""
 88         while i < l and buf[i] in " \r\n\t":
 89             prestr += buf[i]
 90             i += 1
 91         if i >= l:
 92             break
 93         for t,p in patterns:
 94             m = re.match(p, buf[i:])
 95             if m:
 96                 prestr += m.group(0)
 97                 token = Token(t, m.group(1), prestr)
 98                 tokens.append(token)
 99                 i += len(m.group(0))
100                 if show_tokens:
101                     print(token)
102                 break
103         else:
104             raise Exception("not match any pattern-")
105     return tokens
106 
107 def stepize(tokens):
108     class Trans(object):
109         def __init__(self, tokens):
110             self.i = 0
111             self.tokens = tokens
112             self.l = len(tokens)
113             self.steps = []
114             self.continue_point = -1
115             self.break_point = -1
116         def pos(self):
117             return len(self.steps)
118         def type(self):
119             if self.i >= self.l:
120                 return None
121             return self.tokens[self.i].type
122         def prestr(self):
123             if self.i >= self.l:
124                 return ""
125             return self.tokens[self.i].str
126         def reset(self, pos, value):
127             self.steps[pos] = Step(self.steps[pos].type, value)
128         def value(self):
129             if self.i >= self.l:
130                 return None
131             return self.tokens[self.i].value
132         def push(self, t, v=None):
133             self.steps.append(Step(t,v))
134         def match(self, p):
135             if self.i >= self.l:
136                 raise Exception("unexceptable end")
137             if self.tokens[self.i].type != p:
138                 raise Exception("should be "+p)
139             self.i += 1
140         def stmt(self):
141             if self.type() == "print":
142                 self.i += 1
143                 while 1:
144                     if self.type() == ';':
145                         self.i += 1
146                         break
147                     self.expr5()
148                     if self.type() == ',':
149                         self.i += 1
150                     elif self.type() == ';':
151                         self.i += 1
152                         break
153                     else:
154                         raise Exception("not ok-")
155                 self.push("PRINT")
156             elif self.type() == 'break':
157                 self.break_point = self.pos()
158                 self.push("JUMP")
159                 self.i += 1
160                 self.match(';')
161             elif self.type() == 'continue':
162                 self.continue_point = self.pos()
163                 self.push("JUMP")
164                 self.i += 1
165                 self.match(';')
166             elif self.type() == "return":
167                 self.i += 1
168                 if self.type() == ";":
169                     self.push("PUSH_CONST", None)
170                     self.i += 1
171                 else:
172                     self.expr5()
173                     self.match(';')
174                 self.push("RETURN")
175             elif self.type() == 'function':
176                 self.i += 1
177                 name = ""
178                 if self.type() == 'name':
179                     self.push("PUSH_VAR",self.value())
180                     name = self.value()
181                     self.i += 1
182                 self.match('(')
183                 names = []
184                 while 1:
185                     if self.type() == 'name':
186                         names.append(self.value())
187                         self.i += 1
188                     else:
189                         self.match(')')
190                         break
191                     if self.type() == ',':
192                         self.i += 1
193                     elif self.type() == ')':
194                         self.i += 1
195                         break
196                     else:
197                         raise Exception("bad function")
198                 s = ''
199                 self.match('{')
200                 count = 1
201                 while 1:
202                     if self.type() == '{':
203                         count += 1
204                     elif self.type() == '}':
205                         count -= 1
206                         if count == 0:
207                             self.i += 1
208                             break
209                     s += self.prestr()
210                     self.i += 1
211                 self.push('PUSH_CONST', s)
212                 self.push('PUSH_CONST', names)
213                 self.push('MAKE_FUNCTION')
214                 if name:
215                     self.push("ASSIGN")
216             elif self.type() == 'if':
217                 self.i += 1
218                 self.match('(')
219                 self.expr5()
220                 self.match(')')
221                 jump_pos = self.pos()
222                 self.push('JUMP_IF_FALSE')
223                 self.block()
224                 if self.type() == 'else':
225                     self.i += 1
226                     jump_back_pos = self.pos()
227                     self.push('JUMP')
228                     self.reset(jump_pos, self.pos())
229                     self.block()
230                     self.reset(jump_back_pos, self.pos())
231                 else:
232                     self.reset(jump_pos, self.pos())
233             elif self.type() == 'while':
234                 self.i += 1
235                 self.match('(')
236                 jump_here = self.pos()
237                 self.expr5()
238                 self.match(')')
239                 jump_pos = self.pos()
240                 self.push('JUMP_IF_FALSE')
241                 self.block()
242                 self.push('JUMP', jump_here)
243                 self.reset(jump_pos, self.pos())
244                 if self.break_point != -1:
245                     self.reset(self.break_point, self.pos())
246                     self.break_point = -1
247                 if self.continue_point != -1:
248                     self.reset(self.continue_point, jump_here)
249                     self.continue_point = -1
250             elif self.type() == "name":
251                 self.push("PUSH_VAR", self.value())
252                 name = self.value()
253                 self.i += 1
254                 if self.type() == ';':
255                     self.i += 1
256                 elif self.type() == 'assign':
257                     t = self.value()
258                     if t != '=':
259                         self.push("PUSH_VAR", name)
260                     self.i += 1
261                     if t == '=':
262                         self.expr5()
263                         self.push("ASSIGN")
264                     elif t == '+=':
265                         self.expr5()
266                         self.push("ADD")
267                         self.push("ASSIGN")
268                     elif t == '*=':
269                         self.expr5()
270                         self.push("MUL")
271                         self.push("ASSIGN")
272                     elif t == '-=':
273                         self.expr5()
274                         self.push("SUB")
275                         self.push("ASSIGN")
276                     elif t == '/=':
277                         self.expr5()
278                         self.push("DIV")
279                         self.push("ASSIGN")
280                     else:
281                         raise Exception("bad assign")
282                     self.match(';')
283                 elif self.type() == "inc":
284                     self.i += 1
285                     self.push("INC")
286                     self.match(';')
287                 elif self.type() == "dec":
288                     self.i += 1
289                     self.push("DEC")
290                     self.match(';')
291                 else:
292                     self.name_tail()
293                     self.match(';')
294                     self.push("POP_ALL")
295             else:
296                 self.expr5()
297                 self.push("POP_ALL")
298                 self.match(';')
299         def expr(self):
300             if self.type() == "int":
301                 self.push("PUSH_CONST", int(self.value()))
302                 self.i += 1
303             elif self.type() == "False":
304                 self.push("PUSH_CONST", False)
305                 self.i += 1
306             elif self.type() == "True":
307                 self.push("PUSH_CONST", True)
308                 self.i += 1
309             elif self.type() == "not":
310                 self.i += 1
311                 self.expr()
312                 self.push("NOT")
313             elif self.type() == "-":
314                 self.i += 1
315                 self.expr()
316                 self.push("NEG")
317             elif self.type() == "str":
318                 self.push("PUSH_CONST", str(self.value()))
319                 self.i += 1
320             elif self.type() == "name":
321                 self.push("PUSH_VAR", self.value())
322                 self.i += 1
323             elif self.type() == '(':
324                 self.i += 1
325                 self.expr5()
326                 self.match(")")
327             elif self.type() == '[':
328                 self.i += 1
329                 count = 0
330                 while self.type() != ']':
331                     self.expr5()
332                     count += 1
333                     if self.type() == ']':
334                         break
335                     self.match(',')
336                 self.match(']')
337                 self.push("BUILD_LIST", count)
338             elif self.type() == '{':
339                 self.i += 1
340                 count = 0
341                 while self.type() != '}':
342                     self.expr5()
343                     self.match(':')
344                     self.expr5()
345                     count += 1
346                     if self.type() == '}':
347                         break
348                     self.match(',')
349                 self.match('}')
350                 self.push("BUILD_MAP", count)
351             elif self.type() == 'function':
352                 self.i += 1
353                 name = ""
354                 if self.type() == 'name':
355                     name = self.value()
356                     self.push("PUSH_VAR", name)
357                     self.i += 1
358                 self.match('(')
359                 names = []
360                 while 1:
361                     if self.type() == 'name':
362                         names.append(self.value())
363                         self.i += 1
364                     else:
365                         self.match(')')
366                         break
367                     if self.type() == ',':
368                         self.i += 1
369                     elif self.type() == ')':
370                         self.i += 1
371                         break
372                     else:
373                         raise Exception("bad function")
374                 s = ''
375                 self.match('{')
376                 count = 1
377                 while 1:
378                     if self.type() == '{':
379                         count += 1
380                     elif self.type() == '}':
381                         count -= 1
382                         if count == 0:
383                             self.i += 1
384                             break
385                     s += self.prestr()
386                     self.i += 1
387                 self.push('PUSH_CONST', s)
388                 self.push('PUSH_CONST', names)
389                 self.push('MAKE_FUNCTION')
390                 if name:
391                     self.push('ASSIGN')
392             self.name_tail()
393         def name_tail(self):
394             while True:
395                 if self.type() == "(":
396                     self.i += 1
397                     count = 0
398                     while 1:
399                         if self.type() == ")":
400                             self.i += 1
401                             break
402                         self.expr5()
403                         count += 1
404                         if self.type() == ",":
405                             self.i += 1
406                         elif self.type() == ")":
407                             self.i += 1
408                             break
409                         else:
410                             raise Exception("not ok")
411                     self.push("CALL", count)
412                 elif self.type() == '[':
413                     self.i += 1
414                     self.expr5()
415                     self.match(']')
416                     self.push("GET_ITEM")
417                 elif self.type() == '.':
418                     self.i += 1
419                     if self.type() != 'name':
420                         raise Exception("need a name")
421                     self.push("PUSH_CONST", self.value())
422                     self.i += 1
423                     self.push("GET_METHOD")
424                 elif self.type() == "inc":
425                     self.i += 1
426                     self.push("INC")
427                 else:
428                     break
429         def expr1(self):
430             self.expr()
431             while self.type() == '*' or self.type() == '/':
432                 t = self.type()
433                 self.i += 1
434                 self.expr()
435                 if t == "*":
436                     self.push("MUL")
437                 else:
438                     self.push("DIV")
439         def expr2(self):
440             self.expr1()
441             while self.type() == '+' or self.type() == '-':
442                 t = self.type()
443                 self.i += 1
444                 self.expr1()
445                 if t == "+":
446                     self.push("ADD")
447                 else:
448                     self.push("SUB")
449         def expr3(self):
450             self.expr2()
451             while self.type() == "cmp":
452                 t = self.value()
453                 self.i += 1
454                 self.expr2()
455                 if t == ">=":
456                     self.push("GE")
457                 elif t == "<=":
458                     self.push("LE")
459                 elif t == "<":
460                     self.push("LT")
461                 elif t == ">":
462                     self.push("GT")
463                 elif t == "==":
464                     self.push("EQ")
465                 else:
466                     self.push("NE")
467         def expr4(self):
468             self.expr3()
469             while self.type() == 'or' or self.type() == 'and':
470                 t = self.type()
471                 self.i += 1
472                 self.expr3()
473                 if t == "or":
474                     self.push("OR")
475                 else:
476                     self.push("AND")
477         def expr5(self):
478             self.expr4()
479         def block(self):
480             if self.type() == '{':
481                 self.i += 1
482                 while self.type() != '}':
483                     self.stmt()
484                 self.i += 1
485             else:
486                 self.stmt()
487         def eval(self):
488             while self.i < self.l:
489                 self.stmt()
490     t = Trans(tokens)
491     t.eval();
492     return t.steps
493 
494 class Auto(object):
495     def __init__(self,value=None,belongto=None,argnames=None,name=None,buildin=None):
496         self.value = value
497         self.belongto = belongto
498         self.argnames = argnames or []
499         self.buildin = buildin
500         if name:
501             self.namespace = {'self':self, '__name__':Auto(name)}
502         else:
503             self.namespace = {'self':self}
504     def __str__(self):
505         s = str(self.value).replace("\n",r"\n")
506         if output_short and len(s)>15:
507             return s[:10]+'...'
508         return s
509     def __repr__(self):
510         s = str(self.value).replace("\n",r"\n")
511         if output_short and len(s)>15:
512             return "Auto("+s[:10]+'...'+")"
513         return "Auto("+s+")"
514     def call(self, args=None):
515         if self.buildin != None:
516             return self.buildin(args)
517         self.stack = []
518         if not isinstance(self.value, str):
519             raise Exception("uncallable")
520         if args:
521             for x,y in zip(self.argnames,args):
522                 y.belongto = self
523                 y.namespace['__name__'] = Auto(x)
524                 self.namespace[x] = y
525         funcname = self.namespace.get('__name__')
526         if not funcname:
527             funcname = ''
528         else:
529             funcname = str(funcname.value)
530         if show_tokens:
531             print "\n[TOKENIZE "+funcname+"]"
532         tokens = tokenize(self.value)
533         if show_steps:
534             print "\n[STEPIZE "+funcname+"]"
535         self.steps = stepize(tokens)
536         if show_steps:
537             for i,x in enumerate(self.steps):
538                 print " {0:3}".format(i),x
539         # run steps
540         if show_var:
541             print "\n[CALL "+funcname+"]"
542         self.l = len(self.steps)
543         self.i = 0
544         while self.i < self.l:
545             self.step_once()
546         if show_var:
547             print "[END "+funcname+"]\n"
548         if self.stack:
549             return self.stack[0]
550         else:
551             return Auto(None)
552     def step_once(self):
553         t = self.steps[self.i]
554         if show_var:
555             print self.i,":",t
556         self.i += 1
557         if t.type == "PUSH_VAR":
558             a = self.namespace.get(t.value)
559             b = self.belongto
560             while a == None and b != None:
561                 a = b.namespace.get(t.value, None)
562                 b = b.belongto
563             if a == None:
564                 a = Auto(None)
565                 a.namespace['__name__'] = Auto(t.value)
566                 a.belongto = self
567             self.stack.append(a)
568         elif t.type == "ASSIGN":
569             a = self.stack.pop()
570             b = self.stack.pop()
571             name = b.namespace['__name__']
572             if b.belongto != None:
573                 a.namespace['__name__'] = name
574                 a.belongto = b.belongto
575                 b.belongto.namespace[name.value] = a
576             else:
577                 a.namespace['__name__'] = name
578                 a.belongto = self
579                 self.namespace[name.value] = a
580         elif t.type == "PUSH_CONST":
581             self.stack.append(Auto(t.value))
582         elif t.type == "POP_ALL":
583             self.stack = []
584         elif t.type == "GE":
585             a = self.stack.pop()
586             b = self.stack.pop()
587             self.stack.append(Auto(b.value >= a.value))
588         elif t.type == "GT":
589             a = self.stack.pop()
590             b = self.stack.pop()
591             self.stack.append(Auto(b.value > a.value))
592         elif t.type == "LE":
593             a = self.stack.pop()
594             b = self.stack.pop()
595             self.stack.append(Auto(b.value <= a.value))
596         elif t.type == "LT":
597             a = self.stack.pop()
598             b = self.stack.pop()
599             self.stack.append(Auto(b.value < a.value))
600         elif t.type == "EQ":
601             a = self.stack.pop()
602             b = self.stack.pop()
603             self.stack.append(Auto(b.value == a.value))
604         elif t.type == "NE":
605             a = self.stack.pop()
606             b = self.stack.pop()
607             self.stack.append(Auto(b.value != a.value))
608         elif t.type == "ADD":
609             a = self.stack.pop()
610             b = self.stack.pop()
611             self.stack.append(Auto(b.value + a.value))
612         elif t.type == "SUB":
613             a = self.stack.pop()
614             b = self.stack.pop()
615             self.stack.append(Auto(b.value - a.value))
616         elif t.type == "MUL":
617             b = self.stack.pop()
618             a = self.stack.pop()
619             self.stack.append(Auto(b.value * a.value))
620         elif t.type == "DIV":
621             a = self.stack.pop()
622             b = self.stack.pop()
623             self.stack.append(Auto(b.value / a.value))
624         elif t.type == "AND":
625             a = self.stack.pop()
626             b = self.stack.pop()
627             self.stack.append(Auto(b.value and a.value))
628         elif t.type == "OR":
629             a = self.stack.pop()
630             b = self.stack.pop()
631             self.stack.append(Auto(b.value or a.value))
632         elif t.type == "NOT":
633             a = self.stack.pop()
634             self.stack.append(Auto(not a.value))
635         elif t.type == "NEG":
636             a = self.stack.pop()
637             if isinstance(a.value, str):
638                 self.stack.append(Auto(a.value[::-1]))
639             else:
640                 self.stack.append(Auto(-a.value))
641         elif t.type == "JUMP_IF_FALSE":
642             a = self.stack.pop()
643             if not a.value:
644                 self.i =  int(t.value)
645         elif t.type == "JUMP":
646             self.i = int(t.value)
647         elif t.type == "PRINT":
648             for x in self.stack:
649                 print x,
650             print
651             self.stack = []
652         elif t.type == "GET_METHOD":
653             a = self.stack.pop()
654             b = self.stack.pop()
655             c = b.namespace.get(a.value,Auto(None))
656             c.belongto = b
657             self.stack.append(c)
658         elif t.type == "CALL":
659             args = self.stack[-t.value:]
660             for x in range(t.value):
661                 self.stack.pop()
662             a = self.stack.pop()
663             self.stack.append(a.call(args))
664         elif t.type == "RETURN":
665             a = self.stack.pop()
666             self.stack = [a]
667             self.i = self.l
668         elif t.type == "MAKE_FUNCTION":
669             a = self.stack.pop()
670             b = self.stack.pop()
671             if isinstance(b.value, str) and isinstance(a.value, list):
672                 self.stack.append(Auto(b.value,argnames=a.value))
673             else:
674                 self.stack.append(Auto(None))
675         elif t.type == 'BUILD_LIST':
676             l = self.stack[-t.value:]
677             for x in range(t.value):
678                 self.stack.pop()
679             self.stack.append(Auto(l))
680         elif t.type == 'BUILD_MAP':
681             m = {}
682             for x in range(t.value):
683                 v = self.stack.pop()
684                 i = self.stack.pop()
685                 m[i.value] = v
686             self.stack.append(Auto(m))
687         elif t.type == 'GET_ITEM':
688             a = self.stack.pop()
689             b = self.stack.pop()
690             if isinstance(a.value, int) and isinstance(b.value, list):
691                 if a.value < len(b.value):
692                     c = b.value[a.value]
693                 else:
694                     c = Auto(None)
695             elif isinstance(a.value, int) and isinstance(b.value, str):
696                 if a.value < len(b.value):
697                     c = Auto(b.value[a.value])
698                 else:
699                     c = Auto(None)
700             elif isinstance(a.value, str) and isinstance(b.value, dict):
701                 c = b.value.get(a.value,Auto(None))
702             else:
703                 raise Exception("error in getitem")
704             c.belongto = b
705             self.stack.append(c)
706         else:
707             raise Exception('canot step '+t.type)
708         if show_var:
709             print " "*40,self.stack
710             print " "*40,self.namespace
711     def func_register(self,name,func):
712         self.namespace[name] = Auto("<buildin-function "+name+'>',
713                                     buildin=func, name=name)
714 
715 def function_str(args):
716     return Auto(str(args[0]))
717 def function_int(args):
718     return Auto(int(args[0]))
719 def function_len(args):
720     return Auto(len(args[0].value))
721 
722 if __name__ == '__main__':
723     a = Auto(open("hello.pym").read(), name='__main__')
724     a.func_register('str', function_str)
725     a.func_register('int', function_int)
726     a.func_register('len', function_len)
727     a.call()
View Code

 

posted @ 2017-01-10 15:05  backinfile  阅读(425)  评论(0编辑  收藏  举报