奇葩的报表(连更-对,昨天发生大事了。。。。)
- 背景:
- A、B2个IDC,A地全为windows服务器(游戏业务);B地为linux(数据统计、充值、站点业务)
- A地每个区服每天运行会产生很多日志,为玩家账号数据变动信息(装备更新、角色属性等)
- 需要将A地日志拉倒B地进行报表输出后给运营团队分析
- 奇葩点:
- 游戏业务团队只提供FTP(真TM安全),余事不管
- 日志为GBK,OS、DB、APP处理的都是utf8,so不能通过shell来处理,且不同名称日志导入不同表中...
- 时间紧急,早上的需求下午就要看报表(想什么来什么.....)
- 思路:
- 用python(写的是脚本,连对象都不要),先不管性能,先实现功能后后续优化
- 姿势上码:
-
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 # author: qiaogy 4 5 from ftplib import FTP 6 import os 7 import sys 8 import shutil 9 import subprocess 10 import zipfile 11 12 13 root = os.path.dirname(os.path.abspath(__file__)) 14 sys.path.append(root) 15 16 dest_dir = [] # 要处理的日志目录 17 table_dic = { 18 '01': 'gamereport.tb_game_detail_info_30012_1_01"', 19 '02': 'gamereport.tb_game_detail_info_30012_1_02"', 20 '03': 'gamereport.tb_game_detail_info_30012_1_03"', 21 '04': 'gamereport.tb_game_detail_info_30012_1_04"', 22 '05': 'gamereport.tb_game_detail_info_30012_1_05"', 23 '06': 'gamereport.tb_game_detail_info_30012_1_06"', 24 '07': 'gamereport.tb_game_detail_info_30012_1_07"', 25 '08': 'gamereport.tb_game_detail_info_30012_1_08"', 26 '09': 'gamereport.tb_game_detail_info_30012_1_09"', 27 '10': 'gamereport.tb_game_detail_info_30012_1_10"', 28 '11': 'gamereport.tb_game_detail_info_30012_1_11"', 29 '12': 'gamereport.tb_game_detail_info_30012_1_12"', 30 } 31 32 33 # 登陆 34 ftp = FTP('x.x.x.x') 35 ftp.login('qiaogy', 'x.x.x.x') 36 ftp.cwd('/chuanqi/lxtx1qu') 37 38 # 创建 zip 归档下载目录 39 zip_dir = os.path.join(root, 'zip_dir') 40 if not os.path.exists(zip_dir): 41 os.mkdir(zip_dir) 42 43 44 # 求差集,避免下载已经处理过得zip包 45 already_zip = os.listdir(zip_dir) 46 des_zip_dir = ftp.nlst() 47 for item in already_zip: 48 if item in des_zip_dir: 49 des_zip_dir.remove(item) 50 51 # 从ftp下载 zip文件,放入 zip_dir 52 for file in des_zip_dir: 53 if file.endswith('.zip'): 54 dest_file = os.path.join(zip_dir, file) 55 ftp.retrbinary('RETR {}'.format(file), open(dest_file, 'wb').write) 56 print('download cucess:{} '.format(file).center(50, '=')) 57 # 从zip_dir 解压至根目录下 58 z = zipfile.ZipFile(dest_file, 'r') 59 z.extractall(path=root) 60 61 62 63 64 65 # 找到指定目录['2016-08-30', '2016-08-31'], 只要是以指定格式开头的都算 66 for file in os.listdir(root): 67 if file.startswith('2016'): 68 dest_dir.append(file) # 以2016开头的所有文件 69 70 71 # 创建转码后的目录 utf8/utf8.2016-08-30 72 for dir_name in dest_dir: 73 utf8_root = os.path.join(root, 'utf8') 74 utf8_dir = os.path.join(utf8_root, 'utf8.' + dir_name) 75 if os.path.exists(utf8_root): 76 if not os.path.exists(utf8_dir): 77 os.mkdir(utf8_dir) 78 else: 79 print(utf8_dir, 'already exsit') 80 else: 81 os.mkdir(utf8_root) 82 os.mkdir(utf8_dir) 83 84 85 86 # 转码后保存至 utf8/utf8.2016-08-30/filename-utf8 87 for dir_name in dest_dir: 88 print('begin to update DB ,dir is {}'.format(dir_name).center(50, '=')) 89 dir_abs = os.path.join(root, dir_name) 90 for file in os.listdir(dir_abs): 91 logfile_abs = os.path.join(dir_abs, file) 92 utf8_file = os.path.join(root, utf8_dir, file+'-utf8') 93 if os.path.exists(utf8_file): 94 print('utf8 file already exists') 95 continue 96 with open(logfile_abs, 'r', encoding='GBK') as f1, open(utf8_file, 'w', encoding='utf-8') as f2: 97 for line in f1: 98 f2.write(line) 99 100 # 开始导入数据 101 sql_file = '\'' + utf8_file + '\'' 102 table = table_dic.get(dest_dir[0][5:7]) 103 sql_cmd = 'mysql -h127.0.0.1 --local-infile -uroot -p\'redhat123\' -e "load data local infile '\ 104 + sql_file + ' into table ' + table 105 obj = subprocess.Popen(sql_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 106 stdout_data, stderro_data = obj.communicate() 107 108 if len(stdout_data) == 0: 109 print('sql load data ok, file:{}'.format(utf8_file)) 110 else: 111 print('sql load error, file:{}'.format(utf8_file)) 112 113 else: 114 print('sql update cuccess:{}'.format(dir_name).center(50, '=')) 115 116 # 清除解压后的目录 117 118 for name in dest_dir: 119 shutil.rmtree(name) 120 print('{} dir cleand'.format(name).center(50, '='))