Python:使用Jupytrer Notebook操作数据库

一、前提

1.预装Python3环境

2.安装相关库

二、代码如下

import time
import pymysql
import pandas as pd
from datetime import datetime,timedelta

t1=datetime.strftime(datetime.now()-timedelta(days=1),"%Y-%m-%d") # T-1 
t2=datetime.strftime(datetime.now(),"%Y-%m-%d") # T like '2021-06-11'
a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000
a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000
a1,a2,t1,t2

cuor = pymysql.connect(host='', user='',
                       passwd="", db='')
curor = cuor.cursor()
ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',')

def daily_user(t1,t2,stage='senior'): #注册
    sql1="select platform,count(platform) from app_user_base where account_source<>'DEVICE' and "
    sql2="user_register_stage='{0}' and create_time BETWEEN ".format(stage)
    sql3="'{0} 00:00:00' AND '{1} 00:00:00' group by platform".format(t1,t2)
    cur.execute(sql1+sql2+sql3)
    data = cur.fetchall()
    bss=[i for i in data]
    print(stage,sum([i[1] for i in bss]),bss)
print(t1,t2)
#高中
daily_user(t1,t2,stage='senior')
#初中
daily_user(t1,t2,stage='junior')

#收入表 收入流水

cuor = pymysql.connect(host='', user='',
                       passwd="", db='')
curor = cuor.cursor()
ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',')
a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000
a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000
msql ="select {0} from mall_pay_record where response_time>={1} and response_time<{2}".format(ucol,a1,a2) #根据时间取得数据
curor.execute(msql)
data = curor.fetchall()
btd=pd.DataFrame([i for i in data],columns=ucol.split(','))
btd['brt']=btd['response_time'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000))
print(len(btd),'#len(btd)')

btd.to_excel('{0}-{1}-收入情况-原表-1.xlsx'.format(t1,t2))
btd['pay_amount']=btd['pay_amount'].apply(lambda x:float(x))
#btd=btd.loc[btd['pay_status']=='SUCCESS'] 
btd=btd.loc[btd['pay_amount']>1]

def cds(x):
    if 'tyzx' in x:
        return 2
    if '-' in x:
        if '充值' in x:
            return 1
        else:
            a1=x.split('-')
            if '订单编号' in a1[1]:
                a2=a1[1].replace('订单编号','')
                return int(a2)
            else:
                return int(a1[1])
    return 0
btd['order_id']=btd['pay_description'].apply(lambda x:cds(x))
oid=set(btd['order_id'])
oid=[i for i in oid if i>1]
print(len(oid),'#len(oid)')
#根据订单号得到具体的购买信息
def mall_sql(oid,curor,ucol,od='mall_order'):
    res=[]
    olen=len(oid)
    if olen<1:
        return res
    elif olen<=2000:
        msql="select {0} from {1} where cash_part>0 and order_id in {2}".format(ucol,od,str(tuple(oid)))
        if olen==1:
            msql="select {0} from {1} where cash_part>0 and order_id={2}".format(ucol,od,oid[0])
        curor.execute(msql)
        data = curor.fetchall()
        res=[i for i in data]
        return res
    else:
        a1=oid[:2000]
        a2=oid[2000:]
        return mall_sql(a1,curor,ucol)+mall_sql(a2,curor,ucol)
ucol='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',')

data1=mall_sql(oid,curor,ucol,od='mall_order_detail')
c01='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'
btdt=pd.DataFrame(data1,columns=c01.split())
print(len(btdt),'#len(btdt)')
bd=btd.merge(btdt,on='order_id',how='left')

ucol='order_id order_number user_nick order_source order_type employee_id employee_department_id order_channel actually_paid create_by'.replace(' ',',')
data2=mall_sql(oid,curor,ucol,od='mall_order')
c02='order_id order_number user_nick order_source order_type employee_id did order_channel actually_paid1 create_by1'
btdtw=pd.DataFrame(data2,columns=c02.split())
print(len(btdtw))
bd=bd.merge(btdtw,on='order_id',how='left')
bd.rename(columns={'user_id_x': 'uid'},inplace=True)
jlst=list(set(bd['uid']))


conn = pymysql.connect(host='', user='',
                       passwd="", db='')
cur = conn.cursor()
ucol='user_id,le_id,user_register_channel,user_register_time,mobile_location,user_nick'
def uid_sql(oid,cur,ucol,od='app_user_base'):
    res=[]
    olen=len(oid)
    print(olen)
    if olen<1:
        return res
    elif olen<=2000:
        msql="select {0} from {1} where user_id in {2}".format(ucol,od,str(tuple(oid)))
        if olen==1:
            msql="select {0} from {1} where user_id = {2}".format(ucol,od,oid[0])
        cur.execute(msql)
        data = cur.fetchall()
        res=[i for i in data]
        return res
    else:
        a1=oid[:2000]
        a2=oid[2000:]
        return uid_sql(a1,cur,ucol)+uid_sql(a2,cur,ucol)

data =uid_sql(jlst,cur,ucol,od='app_user_base')
btdk=pd.DataFrame(data,columns='uid 公司号 注册渠道 注册时间 手机归属地 昵称'.split())
btdk['注册时间']=btdk['注册时间'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000))
bd=bd.merge(btdk,on='uid',how='left')

#goods_id,gln,sln 
glst=list(set(bd['goods_id']))
glst=[i for i in glst if i>0]
print(len(glst),'#len(glst)')
gsql='''
select goods_id,
       max(case category_keyword when 'Grade'
           then label_name end) as gln,
       max(case category_keyword when 'Subject'
           then label_name end) as sln
from user_order.gds_goods_label gg
left join user_order.gds_label gb on gg.label_id=gb.label_id
where  gg.goods_id in {0}
group by goods_id'''.format(str(tuple(glst)))

curor.execute(gsql)
gdata = curor.fetchall()
gbtd=pd.DataFrame([i for i in gdata],columns='goods_id gln sln'.split())
print(len(gbtd),'#len(gbtd)')
bd=bd.merge(gbtd,on='goods_id',how='left')

kwsh=[i for i in set(btdtw['employee_id']) if i!=None]

conncrm = pymysql.connect(host='', user='',
                          passwd="", db='')
curcrm = conncrm.cursor()
ucks='employee_id,real_name,department_name,leader_name,department_id'
sql01="select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh)))
if len(kwsh)==1: #只有一个eid
    sql01="select {0} from crm_employee_user where employee_id in ({1})".format(ucks,kwsh[0])
curcrm.execute(sql01)
data = curcrm.fetchall()
ceu=pd.DataFrame([i for i in data],columns=ucks.split(','))
ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x))
kdsh=[i for i in set(btdtw['did']) if i!=None]
cur = conn.cursor()
ucks='department_id,business_line_name'
sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh)))
if len(kwsh)==1: #只有一个department_id
    sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0])
curcrm.execute(sql02)
data = curcrm.fetchall()
ceu1=pd.DataFrame([i for i in data],columns='did,business_line_name'.split(','))
ceu1['did']=ceu1['did'].apply(lambda x:str(x))
#ceu=ceu.merge(ceu1,on='department_id',how='left')
bd=bd.merge(ceu,on='employee_id',how='left')
bd=bd.merge(ceu1,on='did',how='left')

t3=t1.replace('-','')

#做分摊处理和改列顺序

def filterShourdfw(sdf): #对收入表进行过滤处理
    sdf['订单总金额'] = sdf['pay_amount']
    #sdf['dropidx']=sdf.apply(self.filterToDrop,axis=1)
    #sdf.drop(sdf.loc[sdf['dropidx']==1].index,inplace=True)
    uids = set(sdf['biz_serial_no'])  # 不用订单号因为充值的订单号为0
    for u in uids:
        one = sdf.loc[sdf['biz_serial_no'] == u]  # 获取所有uid等于u的行，之后只会保存一行
        # 在这里写if然后只保留一行，然后concat到ndf上，实现只保留一行
        olen = len(one)
        if olen > 1:
            avg = list(one['pay_amount'])[0] / olen
            print(avg, type(avg))
            for i, v in one.iterrows():
                sdf.loc[i, 'pay_amount'] = avg
    return sdf
bd=filterShourdfw(bd)
ptt={'junior':'初中','senior':'高中'}
bd['stage']=bd['stage'].apply(lambda x:ptt.get(x,x))
bd.rename(columns={'brt': '支付时间', 'create_time': '创建时间', 'pay_channel': '支付平台', 'leid': '公司号',
                   'stage':'平台','business_line_name':'业务部',
                   'goods_name': '名称','order_id':'订单号','pay_amount':'收入金额','goods_stage':'商品所在平台'},
                      inplace=True)

s_idx = '支付时间 uid 公司号 名称 收入金额 平台 gln real_name department_name 业务部 订单总金额 订单号 注册时间 支付平台'.split()
for c in bd.columns:
    if c in ['user_id','cuser_id','stage']:
        continue
    if c not in s_idx:
        s_idx.append(c)
bd=bd.loc[:, s_idx]  # 更改列的顺序
#bdg=bd.loc[bd['业务部']=='在线电销']
#bd.to_excel('{0}-{1}-收入情况-收入表-1.xlsx'.format(t1,t2),index=False)

def srs_classify(d):
    # 平台 业务部  gln  order_type 辅助的 注册渠道
    if d['order_type']=='exp_center':
        return '体验中心'
    if d['平台']=='初中':
        if d['业务部']=='辅导老师工作台':
            return '辅导-小初'
        elif '昆山' in str(d['department_name']):
            if '年级' in d['gln']: return '昆山-小学'
            elif '初' in d['gln']: return '昆山-初中'
            elif '高' in d['gln']: return '昆山-高中' #理论上不存在
            else: print(d['gln'],d['订单号'])
            return '昆山-初中'
        elif d['业务部']=='在线电销':
            if '年级' in d['gln']: return '电销-小学'
            elif '初' in d['gln']: return '电销-初中'
            elif '高' in d['gln']: return '电销-高中' #理论上不存在
            else: print(d['gln'],d['订单号'])
            return '电销-高中'
        elif '政哥阿米巴' in str(d['业务部']):
            if '年级' in d['gln']: return '政哥阿米巴-小学'
            elif '初' in d['gln']: return '政哥阿米巴-初中'
            else: print(d['gln'],d['订单号'])
            return '政哥阿米巴-高中'
        elif d['业务部']=='社群运营':
            if '年级' in d['gln']: return '社群-小学'
            elif '初' in d['gln']: return '社群-初中'
            elif '高' in d['gln']: return '社群-高中'
            else: print(d['gln'],d['订单号'])
            return '社群-小学'
        elif '阿米巴' in str(d['业务部']): #社群
            if '年级' in d['gln']: return '社群-小学'
            elif '初' in d['gln']: return '社群-初中'
            elif '高' in d['gln']: return '社群-高中'
            else: print(d['gln'],d['订单号'])
            return '社群-小学'
    elif d['平台']=='高中' or '高中' in str(d['平台']):
        if d['业务部']=='辅导老师工作台':
            return '辅导-高中'
        elif '昆山' in str(d['department_name']):
            return '昆山-高中'
        elif d['业务部']=='在线电销':
            #if '年级' in d['gln']: return '电销-小学'
            return '电销-高中'
        elif '政哥阿米巴' in str(d['业务部']) or d['业务部']=='政哥阿米巴':
            return '政哥阿米巴-高中'
        elif d['业务部']=='社群运营':
            return '社群-高中'
        elif '阿米巴' in str(d['业务部']): #社群
            return '社群-高中'
    else:
        print(d['平台'])
    return '空'

bd['tt']=bd.apply(lambda x:srs_classify(x),axis=1)

def sru_pretty_pr(bd):
    res={'gmv':dict(bd.groupby('tt')['收入金额'].sum())}
    bd2=bd.loc[bd['订单总金额']>100]
    res['ucount']=dict(bd2.groupby('tt')['uid'].nunique())
    res['all']=bd['收入金额'].sum()
    #for k,v in res.items():print(k,v)
    return res
sru_pretty_pr(bd)

#消费表
#t1='2020-12-31'
print(t1)
ucol='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',')
msql="select {0} from mall_order_detail where mall_price>0 and cash_part>0 and date_format(create_time,'%Y%m%d')='{1}'".format(ucol,t1.replace('-',''))
curor.execute(msql)
data = curor.fetchall()
btda=pd.DataFrame([i for i in data],columns='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.split())
print(len(btda))
olst=list(set(btda['order_id']))
print(len(olst))
#btda.to_excel('{0}-1212111-消费1.xlsx'.format(t1))

u2='order_id stage order_type order_channel pay_type actually_paid cash_part order_status pay_status employee_id paid_time express_status is_second_kill is_offline_site is_shopping_cart remark create_time update_time'.replace(' ',',')
data1=mall_sql(olst,curor,u2,od='mall_order')
btdw=pd.DataFrame(data1,columns=u2.split(','))
print(len(btdw))
def emp(x):
    try:return int(x)
    except:return 0
btdw['employee_i']=btdw['employee_id'].apply(lambda x:emp(x))
kwsh=list(set(btdw['employee_i']))
ucks='employee_id,real_name,department_name,leader_name,department_id'
curcrm.execute("select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh))))
data = curcrm.fetchall()
ceu=pd.DataFrame([i for i in data],columns=ucks.split(','))
ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x))
kdsh=list(set(ceu['department_id']))
cur = conn.cursor()
ucks='department_id,business_line_name'
sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh)))
if len(kwsh)==1: #只有一个department_id
    sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0])
curcrm.execute(sql02)
data = curcrm.fetchall()
ceu1=pd.DataFrame([i for i in data],columns=ucks.split(','))
ceu=ceu.merge(ceu1,on='department_id',how='left')
btd=btdw.merge(ceu,on='employee_id',how='left')
btd.to_excel('{0}-1212111-消费2-2.xlsx'.format(t1))

#消费表的筛选统计
btd1=btd.loc[btd['stage']=='senior']
btd1=btd1.loc[btd1['pay_status']=='paid']
btd1=btd1.loc[btd1['order_type']!='exp_center'] #那这个条件其实不需要
btd1=btd1.loc[btd1['business_line_name']=='在线电销'] #可能会有eid为空的情况也算到电销？
btd1['actually_paid']=btd1['actually_paid'].apply(lambda x:float(x))
print(btd1['actually_paid'].sum())

posted @ 2021-08-13 18:30 明明就- 阅读(63) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

明明就-

Python:使用Jupytrer Notebook操作数据库

公告