SQL里提取依赖表-py
一个路径可能会产出多个表,1->n
输入:带sql的文件绝对路径,一行一个路径
输出:文件名\t依赖的表*
# coding=utf-8
import re
def getTable(shName,linesql):
search_index = 0
while len(linesql) > 12 and '_' in linesql and ('from ' in linesql or 'join ' in linesql):
# 从第一个from或join截取
if linesql.find('from ') >= 0 and linesql.find('join ') >= 0:
from_index = linesql.index('from ')
join_index = linesql.index('join ')
search_index = join_index if join_index < from_index else from_index
elif linesql.find('from ') >= 0:
search_index = linesql.index('from ')
elif linesql.find('join ') >= 0:
search_index = linesql.index('join ')
else:
return
# print(search_index)
linesql = linesql[search_index:]
# 判断是否空格后是表
pattern = re.compile(r'(?:from|join)\s+(\S+\_*)\s*')
linesqlli = pattern.findall(linesql)
for table in linesqlli:
if table.find('(') == -1 and table.find('_') >=0:
if table.find('.') == -1:
table = 'portal.'+table
# print(table.lower())
with open(r'D:\MyDesk\已迁移的文件依赖.txt', encoding='utf-8', mode='a') as rf:
rf.write(shName+'\t'+table.lower()+'\n')
else:continue
linesql = linesql[linesql.find(table)+len(table)+1:]
rf.close()
def catFile(filePath):
linestr = ''
# 读取每一行 合为一行
break_pattern = r'(\s|\n|\r|\t)+'
sh_name = 'portal.'+re.sub(r'D:\\GitProjects\\offlineexperiment\\dev\\hive.*\\sh_','',filePath).replace('.sh','').lower()
with open(filePath, encoding='utf-8', mode='r') as sf:
for line in sf:
line = re.sub(break_pattern,' ',line.replace('\n', ' '))
linestr += line
getTable(sh_name,linestr)
if __name__ == '__main__':
# 准备文件路径列表
sh_list = []
with open(r'D:\MyDesk\已迁移的文件.txt', encoding='utf-8', mode='r') as path_file:
for path in path_file:
sh_list.append(path.replace('\n', ''))
path_file.close()
for sh in sh_list:
catFile(sh)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人