qq-22432832

SQL里提取依赖表-py

一个路径可能会产出多个表,1->n
输入:带sql的文件绝对路径,一行一个路径
输出:文件名\t依赖的表*

# coding=utf-8
import re

def getTable(shName,linesql):
    search_index = 0
    while len(linesql) > 12 and '_' in linesql and ('from ' in linesql or 'join ' in linesql):
        # 从第一个from或join截取
        if linesql.find('from ') >= 0 and linesql.find('join ') >= 0:
            from_index = linesql.index('from ')
            join_index = linesql.index('join ')
            search_index = join_index if join_index < from_index else from_index
        elif linesql.find('from ') >= 0:
            search_index = linesql.index('from ')
        elif linesql.find('join ') >= 0:
            search_index = linesql.index('join ')
        else:
            return
        # print(search_index)
        linesql = linesql[search_index:]
        # 判断是否空格后是表
        pattern = re.compile(r'(?:from|join)\s+(\S+\_*)\s*')
        linesqlli = pattern.findall(linesql)
        for table in linesqlli:
            if table.find('(') == -1 and table.find('_') >=0:
                if table.find('.') == -1:
                    table = 'portal.'+table
                # print(table.lower())
                with open(r'D:\MyDesk\已迁移的文件依赖.txt', encoding='utf-8', mode='a') as rf:
                    rf.write(shName+'\t'+table.lower()+'\n')
            else:continue
        linesql = linesql[linesql.find(table)+len(table)+1:]
        rf.close()

def catFile(filePath):
    linestr = ''
    # 读取每一行 合为一行
    break_pattern = r'(\s|\n|\r|\t)+'
    sh_name = 'portal.'+re.sub(r'D:\\GitProjects\\offlineexperiment\\dev\\hive.*\\sh_','',filePath).replace('.sh','').lower()
    with open(filePath, encoding='utf-8', mode='r') as sf:
        for line in sf:
            line = re.sub(break_pattern,' ',line.replace('\n', ' '))
            linestr += line
    getTable(sh_name,linestr)


if __name__ == '__main__':
    # 准备文件路径列表
    sh_list = []
    with open(r'D:\MyDesk\已迁移的文件.txt', encoding='utf-8', mode='r') as path_file:
        for path in path_file:
            sh_list.append(path.replace('\n', ''))
    path_file.close()
    for sh in sh_list:
        catFile(sh)

posted on   春马与夏  阅读(18)  评论(0编辑  收藏  举报  

相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示