历史等权算法

 

 编程 规范  ,提高效率 ;  程序的用户使用高效性、健壮性、维护友好性   命令行传参 命令行输入  引入控制台参数 任务的总量 执行的进度  预估执行时间

 

# coding:utf-8
from tools import *


def del_data_before_write(singleDayDate):
    '''
    每次执行,需要一个完整的数据源;重置目标表;
    :param singleDayDate:
    :return:
    '''
    sql_without_limit = 'DELETE FROM sta_addr WHERE  date="{}"'.format(singleDayDate)
    for i in range(10000):
        print(i)
        if i % 10 == 0:
            sql = 'SELECT id  FROM sta_addr WHERE  date="{}" LIMIT 1'.format(singleDayDate)
            r = mysql_fetch(sql)
            if len(r) == 0:
                break
        sql = sql_without_limit + ' LIMIT 30000;'
        try:
            print(sql)
            mysql_write(sql)
        except Exception as e:
            print(e)


def write_(singleDayTable, singleDayDate):
    del_data_before_write(singleDayDate)

    sql_loop = 'SELECT id,uid,address,modify_time,pv,ip FROM {} LIMIT 10000'.format(singleDayTable)

    sql_head = 'INSERT INTO  sta_addr (uid,address,date,modify_time,pv,ip) VALUES ';

    for _ in range(3000):
        try:
            r = mysql_fetch(sql_loop)
            if r == False:
                continue
            if len(r) == 0:
                break
            del_id_l, batch_insert_l = [], []
            for i in r:
                id_, uid, address, modify_time, pv, ip = i
                del_id_l.append(id_)
                ll = [uid,
                      address,
                      singleDayDate,
                      modify_time,
                      pv,
                      ip]
                batch_insert_l.append(' ( "{}" ) '.format('","'.join([str(i) for i in ll])))

            batch_insert_sql = '{}{}'.format(sql_head, ','.join(batch_insert_l))
            try:
                print(_)
                mysql_write(batch_insert_sql)
            except Exception as e:
                print(e)
                print(batch_insert_sql)

            batch_del_sql = 'DELETE FROM {} WHERE id IN ({});'.format(singleDayTable,
                                                                      ','.join([str(i) for i in del_id_l]))
            try:
                mysql_write(batch_del_sql)
            except Exception as e:
                print(e)
                print(batch_del_sql)
        except Exception as e:
            print(e)


if __name__ == '__main__':
    singleDayTable = input("请输入数据源表:\n")
    singleDayDate = input("请输入处理日期,例如:2018-10-17:\n")

    print('您的输入结果:', singleDayTable, singleDayDate, '默认判断输入合法,下面开始执行')

    write_(singleDayTable, singleDayDate)

   

 

# coding:utf-8
from tools import *
import math

dateFrom = input("请输入处理日期闭区间的左端点,例如:2018-10-17:\n")
dateEnd = input("请输入处理日期闭区间的右端点,例如:2018-10-18:\n")

mktimeFrom, mktimeEnd = date2mktime(dateFrom), date2mktime(dateEnd)


def get_target_date():
    timestamp_init = int(time.mktime(time.strptime('2018-09-18', '%Y-%m-%d')))  # 只有从18号开始的日志
    d = []
    ii = timestamp_init - 60 * 60 * 24
    today = time.strftime('%Y-%m-%d', time.localtime(int(time.time())))
    for _ in range(60):
        ii += 60 * 60 * 24
        day = time.strftime('%Y-%m-%d', time.localtime(ii))
        if ii == int(time.mktime(time.strptime(today, '%Y-%m-%d'))):
            break
        d.append(day)
    d = sorted(d, reverse=True)
    return d


def progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time,
                 estimatedRemainderMinutesAlgorithm='Historical equal weight algorithm',
                 date_format='%Y-%m-%d %H:%M:%S'):
    '''

    :param info: 业务信息
    :param raw_data_num: 总局数据总条数
    :param db_operation_counter: 进度计数
    :param start_time: 开始时间
    :param this_time: 统计时间
    :param estimatedRemainderMinutesAlgorithm:  剩余时间估计采用历史等权算法
    :return:
    '''
    console_log = {}
    console_log['info'], console_log['raw_data_num'], console_log['db_operation_counter'], console_log['start_time'], \
    console_log[
        'this_time'] = info, raw_data_num, db_operation_counter, start_time, this_time
    if db_operation_counter == 0:
        estimatedRemainderMinutes = -1
        percent = 0
    elif raw_data_num == db_operation_counter:
        estimatedRemainderMinutes = 0
        percent = 1
    else:
        percent = db_operation_counter / (raw_data_num - db_operation_counter)
        if estimatedRemainderMinutesAlgorithm == 'Historical equal weight algorithm':
            estimatedRemainderMinutes = (1 / percent - 1) / db_operation_counter * (
                date2mktime(this_time, date_format) - date2mktime(start_time, date_format)) / 60

    console_log['percent'], console_log['estimatedRemainderMinutes'] = percent, estimatedRemainderMinutes

    print(console_log)


sql_head = 'INSERT INTO  sta_addr (uid,address,date,modify_time,pv,ip) VALUES ';
target_date_l = get_target_date()
for target_date in target_date_l:
    chk_mktime = date2mktime(target_date)
    if chk_mktime < mktimeFrom or chk_mktime > mktimeEnd:
        continue
    mR = {}
    for f in file_list:
        print(f)
        if target_date not in f:
            continue
        mR_ = mapReduceFile(f)
        mR = mergeDic(mR, mR_)
    address_l, insert_l = [], []

    info, raw_data_num, db_operation_counter, start_time, this_time = target_date, len(mR), 0, getNow(), 0,
    progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)

    db_operation_step = 1000
    db_raw_data_num = len(mR)
    db_operation_times = math.ceil(db_raw_data_num / db_operation_step)
    for k in mR:
        try:
            uid, address, dt = k.split(',')
            counter, request_time, ip = mR[k]['counter'], mR[k]['request_time'], mR[k]['ip']
            dt = time.strftime('%Y-%m-%d', time.localtime(request_time))
            ll = [uid,
                  address,
                  dt,
                  request_time,
                  counter,
                  ip]
            address_l.append('"{}"'.format(address))
            insert_l.append(' ( "{}" ) '.format('","'.join([str(i) for i in ll])))
            if len(insert_l) == db_operation_step:
                sql = 'DELETE FROM sta_addr WHERE  date="{}" AND address IN ({})'.format(target_date,
                                                                                         ','.join(address_l))
                try:
                    print('del')
                    mysql_write(sql)
                except Exception as e:
                    print(e)
                    print(sql)
                sql = '{}{}'.format(sql_head, ','.join(insert_l))
                try:
                    print('insert')
                    mysql_write(sql)
                except Exception as e:
                    print(e)
                    print(sql)
                address_l, insert_l = [], []
                db_operation_counter += db_operation_step
                info, raw_data_num, db_operation_counter, start_time, this_time = info, raw_data_num, db_operation_counter, start_time, getNow()
                progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)
        except Exception as e:
            print(k)
            print(e)
    try:
        if db_operation_times * db_operation_step > db_raw_data_num:
            sql = 'DELETE FROM sta_addr WHERE  date="{}" AND address IN ({})'.format(target_date,
                                                                                     ','.join(address_l))
            try:
                print('del')
                mysql_write(sql)
            except Exception as e:
                print(e)
                print(sql)
            sql = '{}{}'.format(sql_head, ','.join(insert_l))
            try:
                print('insert')
                mysql_write(sql)
            except Exception as e:
                print(e)
                print(sql)
            db_operation_counter += len(insert_l)
            info, raw_data_num, db_operation_counter, start_time, this_time = info, raw_data_num, db_operation_counter, start_time, getNow()
            progress_bar(info, raw_data_num, db_operation_counter, start_time, this_time)
            del address_l, insert_l
    except Exception as e:
        print(k)
        print(e)

  

{'this_time': '2018-10-23 10:28:57', 'raw_data_num': 623933, 'db_operation_counter': 94000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 3.0423873924852874e-05, 'info': '2018-10-22', 'percent': 0.17738091419103924}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 95000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 3.045143859649123e-05, 'info': '2018-10-22', 'percent': 0.17960686892290706}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 96000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.9682895688657406e-05, 'info': '2018-10-22', 'percent': 0.18184125637154716}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:58', 'raw_data_num': 623933, 'db_operation_counter': 97000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.89394090764162e-05, 'info': '2018-10-22', 'percent': 0.18408412454714357}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:59', 'raw_data_num': 623933, 'db_operation_counter': 98000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.8962562473969184e-05, 'info': '2018-10-22', 'percent': 0.18633552182502333}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:28:59', 'raw_data_num': 623933, 'db_operation_counter': 99000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.8247775737169676e-05, 'info': '2018-10-22', 'percent': 0.18859549694913447}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 100000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.82622e-05, 'info': '2018-10-22', 'percent': 0.1908640990355637}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 101000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.7574616867627353e-05, 'info': '2018-10-22', 'percent': 0.19314137757609484}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:00', 'raw_data_num': 623933, 'db_operation_counter': 102000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.6908432654107393e-05, 'info': '2018-10-22', 'percent': 0.19542738244180766}
del
write-ok
insert
write-ok
{'this_time': '2018-10-23 10:29:01', 'raw_data_num': 623933, 'db_operation_counter': 103000, 'start_time': '2018-10-23 10:28:20', 'estimatedRemainderMinutes': 2.6919365632953152e-05, 'info': '2018-10-22', 'percent': 0.19772216388671865}
del
write-ok
insert
write-ok

  

 

posted @ 2018-10-18 14:08  papering  阅读(261)  评论(0编辑  收藏  举报