python ast
import ast
import astor
# 初始代码
source = """
index=0
def some_function(param):
if param == 0:
return case_0(param)
elif param < 0:
return negative_case(param)
for i in range(5):
print(i)
return all_other_cases(param)
"""
class UseAst(astor.TreeWalk):
def pre_body_name(self):
body = self.cur_node
for i, child in enumerate(body[:]):
self.__name = None
# 继续遍历当前的node
self.walk(child)
if self.__name is not None:
#添加日志输出
logger_statement = ast.Expr(ast.Call(func=(ast.Attribute(value=ast.Name(id='logger'), attr='info')),
args=[ast.Str("Calling {}".format(self.__name), ctx=ast.Load())],
keywords=[]
))
body.insert(i, logger_statement)
self.__name = None
return True
def pre_Call(self):
# 获取调用函数的函数名
if isinstance(self.cur_node.func, ast.Name):
self.__name = self.cur_node.func.id
return True
def pre_For(self):
# 所有的For循环的节点都走这
node = self.cur_node
body = node.body
parent = self.parent
add_statement = ast.parse('items = []')
parent.insert(1, add_statement)
add_statement = ast.parse('items.append(i)')
body.insert(0, add_statement)
tree = ast.parse(source)
walker = UseAst()
walker.walk(tree)
body = tree.body
#加入导包
body.insert(0, ast.ImportFrom(module='loguru', names=[ast.alias(name="logger", asname=None)], level=0))
print(astor.to_source(tree))
"""
最终代码
from loguru import logger
index = 0
def some_function(param):
if param == 0:
logger.info('Calling case_0')
return case_0(param)
elif param < 0:
logger.info('Calling negative_case')
return negative_case(param)
items = []
logger.info('Calling all_other_cases')
for i in range(5):
items.append(i)
print i
return all_other_cases(param)
"""
py2爬虫迁移到py3使用
import _ast
import ast
from _ast import ImportFrom, ClassDef, Call
from typing import Any
import astor
item_key_list = ["object_id", "title", "domain", "link_url", "source", "summary", "content", "publish_time",
"crawler_time", "item_type"]
class ReplaceFromBaseSpider(ast.NodeTransformer):
def visit_ImportFrom(self, node: ImportFrom) -> Any:
if node.names[0].name == "BaseSpider":
return ast.ImportFrom(module="crawler.scrapy.base_spiders.common_spider",
names=[ast.alias(name="CommonSpider", asname=None)], level=0)
elif node.names[0].name == "datetime_str_to_timestamp":
return None
return node
def visit_ClassDef(self, node: ClassDef) -> Any:
if node.bases[0].id == "BaseSpider":
new_cls_name = node.name.lower().replace("ejly", "").replace("spider", "").replace("spdier", "").title()
return ast.ClassDef(name=new_cls_name, bases=[ast.Name(id="CommonSpider", ctx=ast.Load())], body=node.body,
decorator_list=node.decorator_list)
return node
class ReplaceSuper(ast.NodeTransformer):
def visit_Call(self, node: Call) -> Any:
try:
if isinstance(node.func, _ast.Attribute) and node.func.value.func.id == "super":
return ast.parse("super().__init__(*args, **kwargs)")
except Exception as e:
pass
return node
class ReplacePrototype(ast.NodeTransformer):
"""
self.model = "parse_article"
"""
def visit_Assign(self, node: ast.Assign) -> Any:
if isinstance(node.targets[0], ast.Attribute):
if node.targets[0].attr == "model":
return None
elif node.targets[0].attr == "ip_retry_count":
return ast.Assign(targets=[ast.Name(id="self.max_retry_times", ctx=ast.Store())],
value=ast.Num(n=10))
elif isinstance(node.targets[0], ast.Name):
if node.targets[0].id == "item":
if isinstance(node.value, ast.Dict):
if len(node.value.values) == 0:
return ast.parse("item = SparkPlanItem()\n")
else:
keys = node.value.keys[:]
for index, i in enumerate(keys):
if i.s not in item_key_list:
node.value.keys.remove(i)
for value in node.value.values:
if isinstance(value, ast.Call):
try:
if value.func.id == "datetime_str_to_timestamp":
value.func.id = "self.time.fmt_publish_time"
except Exception:
pass
return ast.parse(f"item = SparkPlanItem({astor.to_source(node.value)})\n")
elif node.targets[0].id == "name":
node.value.s = node.value.s.replace("ejly_", f"content_spark_").replace("_spider", "").replace(
"_spdier", "")
elif node.targets[0].id == "redis_key":
node.value.s = node.value.s.replace("ejly_", f"content_spark_").replace("_spider", "").replace(
"_spdier", "")
return node
class ReplaceDateParse(ast.NodeTransformer):
def visit_Call(self, node: Call) -> Any:
try:
if isinstance(node.func, _ast.Name) and node.func.id == 'datetime_str_to_timestamp':
node.func.id = "self.time.fmt_publish_time"
except Exception as e:
pass
return node
def trans_node(target_path):
tree = astor.parse_file(target_path)
# walker = UseAst()
# walker.walk(tree)
body = tree.body
body.insert(-1, "\nfrom crawler.scrapy.items.spark_plan import SparkPlanItem")
ReplaceFromBaseSpider().visit(tree)
ReplaceSuper().visit(tree)
ReplacePrototype().visit(tree)
ReplaceDateParse().visit(tree)
ast.fix_missing_locations(tree)
return astor.to_source(tree)