奇葩的报表(连更-对,昨天发生大事了。。。。)

  • 背景:
    • A、B2个IDC,A地全为windows服务器(游戏业务);B地为linux(数据统计、充值、站点业务)
    • A地每个区服每天运行会产生很多日志,为玩家账号数据变动信息(装备更新、角色属性等)
    • 需要将A地日志拉倒B地进行报表输出后给运营团队分析

 

  • 奇葩点:
    • 游戏业务团队只提供FTP(真TM安全),余事不管
    • 日志为GBK,OS、DB、APP处理的都是utf8,so不能通过shell来处理,且不同名称日志导入不同表中...
    • 时间紧急,早上的需求下午就要看报表(想什么来什么.....)

 

  • 思路:
    • 用python(写的是脚本,连对象都不要),先不管性能,先实现功能后后续优化

 

  • 姿势上码:
  •   1 #!/usr/bin/env python
      2 # -*- coding:utf-8 -*-
      3 # author: qiaogy
      4 
      5 from ftplib import FTP
      6 import os
      7 import sys
      8 import shutil
      9 import subprocess
     10 import zipfile
     11 
     12 
     13 root = os.path.dirname(os.path.abspath(__file__))
     14 sys.path.append(root)
     15 
     16 dest_dir = []  # 要处理的日志目录
     17 table_dic = {
     18     '01': 'gamereport.tb_game_detail_info_30012_1_01"',
     19     '02': 'gamereport.tb_game_detail_info_30012_1_02"',
     20     '03': 'gamereport.tb_game_detail_info_30012_1_03"',
     21     '04': 'gamereport.tb_game_detail_info_30012_1_04"',
     22     '05': 'gamereport.tb_game_detail_info_30012_1_05"',
     23     '06': 'gamereport.tb_game_detail_info_30012_1_06"',
     24     '07': 'gamereport.tb_game_detail_info_30012_1_07"',
     25     '08': 'gamereport.tb_game_detail_info_30012_1_08"',
     26     '09': 'gamereport.tb_game_detail_info_30012_1_09"',
     27     '10': 'gamereport.tb_game_detail_info_30012_1_10"',
     28     '11': 'gamereport.tb_game_detail_info_30012_1_11"',
     29     '12': 'gamereport.tb_game_detail_info_30012_1_12"',
     30 }
     31 
     32 
     33 # 登陆
     34 ftp = FTP('x.x.x.x')
     35 ftp.login('qiaogy', 'x.x.x.x')
     36 ftp.cwd('/chuanqi/lxtx1qu')
     37 
     38 # 创建 zip 归档下载目录
     39 zip_dir = os.path.join(root, 'zip_dir')
     40 if not os.path.exists(zip_dir):
     41     os.mkdir(zip_dir)
     42 
     43 
     44 # 求差集,避免下载已经处理过得zip包
     45 already_zip = os.listdir(zip_dir)
     46 des_zip_dir = ftp.nlst()
     47 for item in already_zip:
     48     if item in des_zip_dir:
     49         des_zip_dir.remove(item)
     50 
     51 # 从ftp下载 zip文件,放入 zip_dir
     52 for file in des_zip_dir:
     53     if file.endswith('.zip'):
     54         dest_file = os.path.join(zip_dir, file)
     55         ftp.retrbinary('RETR {}'.format(file), open(dest_file, 'wb').write)
     56         print('download cucess:{} '.format(file).center(50, '='))
     57         # 从zip_dir 解压至根目录下
     58         z = zipfile.ZipFile(dest_file, 'r')
     59         z.extractall(path=root)
     60 
     61 
     62 
     63 
     64 
     65 # 找到指定目录['2016-08-30', '2016-08-31'], 只要是以指定格式开头的都算
     66 for file in os.listdir(root):
     67     if file.startswith('2016'):
     68         dest_dir.append(file)  # 以2016开头的所有文件
     69 
     70 
     71 # 创建转码后的目录    utf8/utf8.2016-08-30
     72 for dir_name in dest_dir:
     73     utf8_root = os.path.join(root, 'utf8')
     74     utf8_dir = os.path.join(utf8_root, 'utf8.' + dir_name)
     75     if os.path.exists(utf8_root):
     76         if not os.path.exists(utf8_dir):
     77             os.mkdir(utf8_dir)
     78         else:
     79             print(utf8_dir, 'already exsit')
     80     else:
     81         os.mkdir(utf8_root)
     82         os.mkdir(utf8_dir)
     83 
     84 
     85 
     86 # 转码后保存至 utf8/utf8.2016-08-30/filename-utf8
     87 for dir_name in dest_dir:
     88     print('begin to update DB ,dir is {}'.format(dir_name).center(50, '='))
     89     dir_abs = os.path.join(root, dir_name)
     90     for file in os.listdir(dir_abs):
     91         logfile_abs = os.path.join(dir_abs, file)
     92         utf8_file = os.path.join(root, utf8_dir, file+'-utf8')
     93         if os.path.exists(utf8_file):
     94             print('utf8 file already exists')
     95             continue
     96         with open(logfile_abs, 'r', encoding='GBK') as f1, open(utf8_file, 'w', encoding='utf-8') as f2:
     97             for line in f1:
     98                 f2.write(line)
     99 
    100         # 开始导入数据
    101         sql_file = '\'' + utf8_file + '\''
    102         table = table_dic.get(dest_dir[0][5:7])
    103         sql_cmd = 'mysql -h127.0.0.1 --local-infile -uroot -p\'redhat123\' -e "load data local infile '\
    104                   + sql_file + ' into table ' + table
    105         obj = subprocess.Popen(sql_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    106         stdout_data, stderro_data = obj.communicate()
    107 
    108         if len(stdout_data) == 0:
    109             print('sql load data ok, file:{}'.format(utf8_file))
    110         else:
    111             print('sql load error, file:{}'.format(utf8_file))
    112 
    113     else:
    114         print('sql update cuccess:{}'.format(dir_name).center(50, '='))
    115 
    116 # 清除解压后的目录
    117 
    118 for name in dest_dir:
    119     shutil.rmtree(name)
    120     print('{} dir cleand'.format(name).center(50, '='))

     

posted on 2016-09-05 10:56  台灯不太亮  阅读(211)  评论(0编辑  收藏  举报

导航