python ast

import ast

import astor

# 初始代码

source = """
index=0
def some_function(param):
    if param == 0:
       return case_0(param)
    elif param < 0:
       return negative_case(param)
    for i in range(5):
        print(i)   
    return all_other_cases(param)
"""


class UseAst(astor.TreeWalk):
    def pre_body_name(self):
        body = self.cur_node
        for i, child in enumerate(body[:]):
            self.__name = None
            # 继续遍历当前的node
            self.walk(child)
            if self.__name is not None:
                #添加日志输出
                logger_statement = ast.Expr(ast.Call(func=(ast.Attribute(value=ast.Name(id='logger'), attr='info')),

                                                    args=[ast.Str("Calling {}".format(self.__name), ctx=ast.Load())],
                                                    keywords=[]
                                                    ))
                body.insert(i, logger_statement)
        self.__name = None
        return True

    def pre_Call(self):
        # 获取调用函数的函数名
        if isinstance(self.cur_node.func, ast.Name):
            self.__name = self.cur_node.func.id
        return True

    def pre_For(self):
        # 所有的For循环的节点都走这
        node = self.cur_node
        body = node.body
        parent = self.parent
        add_statement = ast.parse('items = []')
        parent.insert(1, add_statement)
        add_statement = ast.parse('items.append(i)')
        body.insert(0, add_statement)


tree = ast.parse(source)

walker = UseAst()
walker.walk(tree)
body = tree.body
#加入导包
body.insert(0, ast.ImportFrom(module='loguru', names=[ast.alias(name="logger", asname=None)], level=0))
print(astor.to_source(tree))

"""
最终代码

from loguru import logger
index = 0


def some_function(param):
    if param == 0:
        logger.info('Calling case_0')
        return case_0(param)
    elif param < 0:
        logger.info('Calling negative_case')
        return negative_case(param)
    items = []
    logger.info('Calling all_other_cases')
    for i in range(5):
        items.append(i)
        print i
    return all_other_cases(param)
"""

py2爬虫迁移到py3使用

import _ast
import ast
from _ast import ImportFrom, ClassDef, Call
from typing import Any

import astor

item_key_list = ["object_id", "title", "domain", "link_url", "source", "summary", "content", "publish_time",
                 "crawler_time", "item_type"]


class ReplaceFromBaseSpider(ast.NodeTransformer):
    def visit_ImportFrom(self, node: ImportFrom) -> Any:
        if node.names[0].name == "BaseSpider":
            return ast.ImportFrom(module="crawler.scrapy.base_spiders.common_spider",
                                  names=[ast.alias(name="CommonSpider", asname=None)], level=0)
        elif node.names[0].name == "datetime_str_to_timestamp":
            return None
        return node

    def visit_ClassDef(self, node: ClassDef) -> Any:
        if node.bases[0].id == "BaseSpider":
            new_cls_name = node.name.lower().replace("ejly", "").replace("spider", "").replace("spdier", "").title()
            return ast.ClassDef(name=new_cls_name, bases=[ast.Name(id="CommonSpider", ctx=ast.Load())], body=node.body,
                                decorator_list=node.decorator_list)
        return node


class ReplaceSuper(ast.NodeTransformer):
    def visit_Call(self, node: Call) -> Any:
        try:
            if isinstance(node.func, _ast.Attribute) and node.func.value.func.id == "super":
                return ast.parse("super().__init__(*args, **kwargs)")
        except Exception as e:
            pass
        return node


class ReplacePrototype(ast.NodeTransformer):
    """
    self.model = "parse_article"
    """
    def visit_Assign(self, node: ast.Assign) -> Any:
        if isinstance(node.targets[0], ast.Attribute):
            if node.targets[0].attr == "model":
                return None
            elif node.targets[0].attr == "ip_retry_count":
                return ast.Assign(targets=[ast.Name(id="self.max_retry_times", ctx=ast.Store())],
                                  value=ast.Num(n=10))
        elif isinstance(node.targets[0], ast.Name):
            if node.targets[0].id == "item":
                if isinstance(node.value, ast.Dict):
                    if len(node.value.values) == 0:
                        return ast.parse("item = SparkPlanItem()\n")
                    else:
                        keys = node.value.keys[:]
                        for index, i in enumerate(keys):
                            if i.s not in item_key_list:
                                node.value.keys.remove(i)

                        for value in node.value.values:
                            if isinstance(value, ast.Call):
                                try:
                                    if value.func.id == "datetime_str_to_timestamp":
                                        value.func.id = "self.time.fmt_publish_time"
                                except Exception:
                                    pass

                        return ast.parse(f"item = SparkPlanItem({astor.to_source(node.value)})\n")
            elif node.targets[0].id == "name":
                node.value.s = node.value.s.replace("ejly_", f"content_spark_").replace("_spider", "").replace(
                    "_spdier", "")
            elif node.targets[0].id == "redis_key":
                node.value.s = node.value.s.replace("ejly_", f"content_spark_").replace("_spider", "").replace(
                    "_spdier", "")
        return node

class ReplaceDateParse(ast.NodeTransformer):
    def visit_Call(self, node: Call) -> Any:
        try:
            if isinstance(node.func, _ast.Name) and node.func.id == 'datetime_str_to_timestamp':
                node.func.id = "self.time.fmt_publish_time"
        except Exception as e:
            pass
        return node


def trans_node(target_path):
    tree = astor.parse_file(target_path)
    # walker = UseAst()
    # walker.walk(tree)
    body = tree.body
    body.insert(-1, "\nfrom crawler.scrapy.items.spark_plan import SparkPlanItem")
    ReplaceFromBaseSpider().visit(tree)
    ReplaceSuper().visit(tree)
    ReplacePrototype().visit(tree)
    ReplaceDateParse().visit(tree)
    ast.fix_missing_locations(tree)
    return astor.to_source(tree)
posted @ 2021-05-31 18:02  公众号python学习开发  阅读(225)  评论(0编辑  收藏  举报