python读取Oracle相关表生成sqlldr脚本文件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | # -*- coding: utf-8 -*- import os import pandas as pd import cx_Oracle as cx # os.environ['path'] = r'D:\Program Files\plsql developer8.0\instantclient_19_12' # 设置oci路径 os.environ[ 'path' ] = './instantclient_19_12' class create_sqlldr_ctl: def __init__( self ,sql: str ,out_ctl_path: str ,ctl_data_path: str ,seq: str ): self .table_columns_sql = sql self .out_ctl_path = os.path.abspath(out_ctl_path) self .ctl_data_path = ctl_data_path self .seq = seq self .ft = False # Oracle数据库连接IP和用户密码 self .user = 'xxx' self .passwd = 'xxx' self .url = 'xx.xx.xx.xx.1521/xx' # sh脚本配置 self .sh_txt = 'sqlldr userid=' + self .user + '/' + self .passwd + '@//' + self .url self .control = ' control=' + self .ctl_data_path + 'cmdpath/ctl/{}.ctl' self .bad = ' bad=' + self .ctl_data_path + 'cmdpath/bad/LOAD_{}.bad' self .log = ' log=' + self .ctl_data_path + 'cmdpath/log/LOAD_{}.log errors=99999999' #结尾符号:str : 737472274127422743270A strX : 73747258274127422743270A # ctl脚本配置 direct=TRUE, self .ctl_txt = \ '''options(parallel=TRUE,bindsize=20480000,readsize=51200000,errors=0,rows=60000) LOAD DATA CHARACTERSET AL32UTF8 INFILE ''' + "'" + self .ctl_data_path + "workpath/" + \ '''{}_YYYYMMDD_000001.del' "str x'73747258274127422743270A'" DISCARDMAX 999999999999 TRUNCATE INTO TABLE {} FIELDS TERMINATED BY '{}' TRAILING NULLCOLS ( ''' def _del_all_file( self ,path): ls = os.listdir(path) for i in ls: c_path = os.path.join(path, i) if os.path.isdir(c_path): self ._del_all_file(c_path) else : os.remove(c_path) def __delete__( self ): dirPath = self .out_ctl_path # 判断文件是否存在 if (os.path.exists(dirPath)): self ._del_all_file(dirPath) else : # print("要删除的文件不存在!") os.mkdir(dirPath + '/ctl' ) os.mkdir(dirPath + '/log' ) os.mkdir(dirPath + '/bad' ) def _pd_connDB( self ): try : db = cx.connect( self .user, self .passwd, self .url) df: pd.DataFrame = pd.read_sql_query( self .table_columns_sql, db) db.close() self .ft = True return df except Exception as e: print (e) self .ft = False def create__ctl( self ): # 删除路径下所有 ctl、sh、log等文件 self .__delete__() user_table_columns:pd.DataFrame = self ._pd_connDB() # user_table_columns.to_csv('./data/tab_columns.csv',sep=',') if self .ft: w, d = user_table_columns.shape table_name = user_table_columns[ 'TABLE_NAME' ].drop_duplicates().tolist() for tb_name in table_name: ftp_name = tb_name.replace( 'GNS_' ,' ')+' _D_ADD' # _ALL # ftp_name = tb_name.replace('GNS_','')+'_D_ALL' #创建sh脚本 _control = self .control. format (ftp_name) _bad = self .bad. format (ftp_name) _log = self .log. format (ftp_name) _sh_txt = self .sh_txt + _control + _bad + _log with open ( self .out_ctl_path + '/' + ftp_name + '.sh' , 'w' , encoding = 'utf-8' ) as f: f.write(_sh_txt) with open ( self .out_ctl_path + '/bad/LOAD_' + ftp_name + '.bad' , 'w' , encoding = 'utf-8' ) as f: f.write('') with open ( self .out_ctl_path + '/log/LOAD_' + ftp_name + '.log' , 'w' , encoding = 'utf-8' ) as f: f.write('') # 创建ctl脚本 _ctl_txt: str = self .ctl_txt. format (ftp_name,tb_name, self .seq) # (表名,表名,分隔符) _sh_txt_s: str = '' for i in range (w): if tb_name = = user_table_columns.TABLE_NAME[i]: # TO_DATE(TRIM(:MER_BUSINESSOPENDATE),'yyyy-mm-dd hh24:mi:ss') if user_table_columns.DATA_TYPE[i] = = 'DATE' : _sh_txt_s + = user_table_columns.COLUMN_NAME[i] + ''' "TO_DATE(TRIM(:{}),'yyyy-mm-dd hh24:mi:ss')",''' . format ( user_table_columns.COLUMN_NAME[i]) + '\n' # if user_table_columns.DATA_TYPE[i]== 'DATE': # _sh_txt_s += user_table_columns.COLUMN_NAME[i] + ''' "TO_DATE(RTRIM(:{},'.000000'),'yyyy-mm-dd hh24:mi:ss')",'''.format( # user_table_columns.COLUMN_NAME[i]) + '\n' # SELECT TO_TIMESTAMP('2021-02-20 11:08:31','yyyy-mm-dd hh24:mi:ss.ff') FROM DUAL # _sh_txt_s += user_table_columns.COLUMN_NAME[ # i] + ''' "TO_TIMESTAMP(TRIM(:{}),'yyyy-mm-dd hh24:mi:ss.ff')",'''.format( # user_table_columns.COLUMN_NAME[i]) + '\n' else : _sh_txt_s + = user_table_columns.COLUMN_NAME[i] + ' "TRIM(:{})",' . format (user_table_columns.COLUMN_NAME[i]) + '\n' _ctl_txt + = _sh_txt_s + ')' _ctl_txt = _ctl_txt.replace( ',\n)' , '\n)' ) with open ( self .out_ctl_path + '/ctl/' + ftp_name + '.ctl' , 'w' , encoding = 'utf-8' ) as f: f.write(_ctl_txt) print ( '{}.sh/{}.ctl/LOAD_{}.bad/LOAD_{}.log 文件创建成功' . format (ftp_name,ftp_name,ftp_name,ftp_name)) else : print ( '连接失败' ) if __name__ = = '__main__' : sql1 = '''SELECT * FROM USER_TAB_COLUMNS T WHERE T.TABLE_NAME IN('table_name') ORDER BY TABLE_NAME,COLUMN_ID ASC ''' path = '/data/' ctl_model = create_sqlldr_ctl(sql = sql1, out_ctl_path = './cmdpath' , ctl_data_path = path, seq = '|@|' ) ctl_model.create__ctl() |
自动化学习。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统