Python:使用Jupytrer Notebook操作数据库
一、前提
1.预装Python3环境
2.安装相关库
二、代码如下
import time import pymysql import pandas as pd from datetime import datetime,timedelta
t1=datetime.strftime(datetime.now()-timedelta(days=1),"%Y-%m-%d") # T-1 t2=datetime.strftime(datetime.now(),"%Y-%m-%d") # T like '2021-06-11' a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000 a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000 a1,a2,t1,t2
cuor = pymysql.connect(host='', user='', passwd="", db='') curor = cuor.cursor() ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',')
def daily_user(t1,t2,stage='senior'): #注册 sql1="select platform,count(platform) from app_user_base where account_source<>'DEVICE' and " sql2="user_register_stage='{0}' and create_time BETWEEN ".format(stage) sql3="'{0} 00:00:00' AND '{1} 00:00:00' group by platform".format(t1,t2) cur.execute(sql1+sql2+sql3) data = cur.fetchall() bss=[i for i in data] print(stage,sum([i[1] for i in bss]),bss) print(t1,t2) #高中 daily_user(t1,t2,stage='senior') #初中 daily_user(t1,t2,stage='junior')
#收入表 收入流水 cuor = pymysql.connect(host='', user='', passwd="", db='') curor = cuor.cursor() ucol='record_id pay_channel pay_config_key biz_scenario biz_serial_no pay_amount pay_description response_time pay_status stage user_id our_trans_number third_trans_number receipt create_time update_time'.replace(' ',',') a1=int(time.mktime(time.strptime("{0} 0:0:0".format(t1), '%Y-%m-%d %H:%M:%S')))*1000 a2=int(time.mktime(time.strptime("{0} 0:0:0".format(t2), '%Y-%m-%d %H:%M:%S')))*1000 msql ="select {0} from mall_pay_record where response_time>={1} and response_time<{2}".format(ucol,a1,a2) #根据时间取得数据 curor.execute(msql) data = curor.fetchall() btd=pd.DataFrame([i for i in data],columns=ucol.split(',')) btd['brt']=btd['response_time'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000)) print(len(btd),'#len(btd)') btd.to_excel('{0}-{1}-收入情况-原表-1.xlsx'.format(t1,t2)) btd['pay_amount']=btd['pay_amount'].apply(lambda x:float(x)) #btd=btd.loc[btd['pay_status']=='SUCCESS'] btd=btd.loc[btd['pay_amount']>1] def cds(x): if 'tyzx' in x: return 2 if '-' in x: if '充值' in x: return 1 else: a1=x.split('-') if '订单编号' in a1[1]: a2=a1[1].replace('订单编号','') return int(a2) else: return int(a1[1]) return 0 btd['order_id']=btd['pay_description'].apply(lambda x:cds(x)) oid=set(btd['order_id']) oid=[i for i in oid if i>1] print(len(oid),'#len(oid)') #根据订单号得到具体的购买信息 def mall_sql(oid,curor,ucol,od='mall_order'): res=[] olen=len(oid) if olen<1: return res elif olen<=2000: msql="select {0} from {1} where cash_part>0 and order_id in {2}".format(ucol,od,str(tuple(oid))) if olen==1: msql="select {0} from {1} where cash_part>0 and order_id={2}".format(ucol,od,oid[0]) curor.execute(msql) data = curor.fetchall() res=[i for i in data] return res else: a1=oid[:2000] a2=oid[2000:] return mall_sql(a1,curor,ucol)+mall_sql(a2,curor,ucol) ucol='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',') data1=mall_sql(oid,curor,ucol,od='mall_order_detail') c01='order_id goods_id goods_stage goods_type goods_resource goods_name create_by authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time' btdt=pd.DataFrame(data1,columns=c01.split()) print(len(btdt),'#len(btdt)') bd=btd.merge(btdt,on='order_id',how='left') ucol='order_id order_number user_nick order_source order_type employee_id employee_department_id order_channel actually_paid create_by'.replace(' ',',') data2=mall_sql(oid,curor,ucol,od='mall_order') c02='order_id order_number user_nick order_source order_type employee_id did order_channel actually_paid1 create_by1' btdtw=pd.DataFrame(data2,columns=c02.split()) print(len(btdtw)) bd=bd.merge(btdtw,on='order_id',how='left') bd.rename(columns={'user_id_x': 'uid'},inplace=True) jlst=list(set(bd['uid'])) conn = pymysql.connect(host='', user='', passwd="", db='') cur = conn.cursor() ucol='user_id,le_id,user_register_channel,user_register_time,mobile_location,user_nick' def uid_sql(oid,cur,ucol,od='app_user_base'): res=[] olen=len(oid) print(olen) if olen<1: return res elif olen<=2000: msql="select {0} from {1} where user_id in {2}".format(ucol,od,str(tuple(oid))) if olen==1: msql="select {0} from {1} where user_id = {2}".format(ucol,od,oid[0]) cur.execute(msql) data = cur.fetchall() res=[i for i in data] return res else: a1=oid[:2000] a2=oid[2000:] return uid_sql(a1,cur,ucol)+uid_sql(a2,cur,ucol) data =uid_sql(jlst,cur,ucol,od='app_user_base') btdk=pd.DataFrame(data,columns='uid 公司号 注册渠道 注册时间 手机归属地 昵称'.split()) btdk['注册时间']=btdk['注册时间'].apply(lambda x:pd.Timestamp.fromtimestamp(x/1000)) bd=bd.merge(btdk,on='uid',how='left') #goods_id,gln,sln glst=list(set(bd['goods_id'])) glst=[i for i in glst if i>0] print(len(glst),'#len(glst)') gsql=''' select goods_id, max(case category_keyword when 'Grade' then label_name end) as gln, max(case category_keyword when 'Subject' then label_name end) as sln from user_order.gds_goods_label gg left join user_order.gds_label gb on gg.label_id=gb.label_id where gg.goods_id in {0} group by goods_id'''.format(str(tuple(glst))) curor.execute(gsql) gdata = curor.fetchall() gbtd=pd.DataFrame([i for i in gdata],columns='goods_id gln sln'.split()) print(len(gbtd),'#len(gbtd)') bd=bd.merge(gbtd,on='goods_id',how='left') kwsh=[i for i in set(btdtw['employee_id']) if i!=None] conncrm = pymysql.connect(host='', user='', passwd="", db='') curcrm = conncrm.cursor() ucks='employee_id,real_name,department_name,leader_name,department_id' sql01="select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh))) if len(kwsh)==1: #只有一个eid sql01="select {0} from crm_employee_user where employee_id in ({1})".format(ucks,kwsh[0]) curcrm.execute(sql01) data = curcrm.fetchall() ceu=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x)) kdsh=[i for i in set(btdtw['did']) if i!=None] cur = conn.cursor() ucks='department_id,business_line_name' sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh))) if len(kwsh)==1: #只有一个department_id sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0]) curcrm.execute(sql02) data = curcrm.fetchall() ceu1=pd.DataFrame([i for i in data],columns='did,business_line_name'.split(',')) ceu1['did']=ceu1['did'].apply(lambda x:str(x)) #ceu=ceu.merge(ceu1,on='department_id',how='left') bd=bd.merge(ceu,on='employee_id',how='left') bd=bd.merge(ceu1,on='did',how='left') t3=t1.replace('-','') #做分摊处理和改列顺序 def filterShourdfw(sdf): #对收入表进行过滤处理 sdf['订单总金额'] = sdf['pay_amount'] #sdf['dropidx']=sdf.apply(self.filterToDrop,axis=1) #sdf.drop(sdf.loc[sdf['dropidx']==1].index,inplace=True) uids = set(sdf['biz_serial_no']) # 不用订单号因为充值的订单号为0 for u in uids: one = sdf.loc[sdf['biz_serial_no'] == u] # 获取所有uid等于u的行,之后只会保存一行 # 在这里写if然后只保留一行,然后concat到ndf上,实现只保留一行 olen = len(one) if olen > 1: avg = list(one['pay_amount'])[0] / olen print(avg, type(avg)) for i, v in one.iterrows(): sdf.loc[i, 'pay_amount'] = avg return sdf bd=filterShourdfw(bd) ptt={'junior':'初中','senior':'高中'} bd['stage']=bd['stage'].apply(lambda x:ptt.get(x,x)) bd.rename(columns={'brt': '支付时间', 'create_time': '创建时间', 'pay_channel': '支付平台', 'leid': '公司号', 'stage':'平台','business_line_name':'业务部', 'goods_name': '名称','order_id':'订单号','pay_amount':'收入金额','goods_stage':'商品所在平台'}, inplace=True) s_idx = '支付时间 uid 公司号 名称 收入金额 平台 gln real_name department_name 业务部 订单总金额 订单号 注册时间 支付平台'.split() for c in bd.columns: if c in ['user_id','cuser_id','stage']: continue if c not in s_idx: s_idx.append(c) bd=bd.loc[:, s_idx] # 更改列的顺序 #bdg=bd.loc[bd['业务部']=='在线电销'] #bd.to_excel('{0}-{1}-收入情况-收入表-1.xlsx'.format(t1,t2),index=False)
def srs_classify(d): # 平台 业务部 gln order_type 辅助的 注册渠道 if d['order_type']=='exp_center': return '体验中心' if d['平台']=='初中': if d['业务部']=='辅导老师工作台': return '辅导-小初' elif '昆山' in str(d['department_name']): if '年级' in d['gln']: return '昆山-小学' elif '初' in d['gln']: return '昆山-初中' elif '高' in d['gln']: return '昆山-高中' #理论上不存在 else: print(d['gln'],d['订单号']) return '昆山-初中' elif d['业务部']=='在线电销': if '年级' in d['gln']: return '电销-小学' elif '初' in d['gln']: return '电销-初中' elif '高' in d['gln']: return '电销-高中' #理论上不存在 else: print(d['gln'],d['订单号']) return '电销-高中' elif '政哥阿米巴' in str(d['业务部']): if '年级' in d['gln']: return '政哥阿米巴-小学' elif '初' in d['gln']: return '政哥阿米巴-初中' else: print(d['gln'],d['订单号']) return '政哥阿米巴-高中' elif d['业务部']=='社群运营': if '年级' in d['gln']: return '社群-小学' elif '初' in d['gln']: return '社群-初中' elif '高' in d['gln']: return '社群-高中' else: print(d['gln'],d['订单号']) return '社群-小学' elif '阿米巴' in str(d['业务部']): #社群 if '年级' in d['gln']: return '社群-小学' elif '初' in d['gln']: return '社群-初中' elif '高' in d['gln']: return '社群-高中' else: print(d['gln'],d['订单号']) return '社群-小学' elif d['平台']=='高中' or '高中' in str(d['平台']): if d['业务部']=='辅导老师工作台': return '辅导-高中' elif '昆山' in str(d['department_name']): return '昆山-高中' elif d['业务部']=='在线电销': #if '年级' in d['gln']: return '电销-小学' return '电销-高中' elif '政哥阿米巴' in str(d['业务部']) or d['业务部']=='政哥阿米巴': return '政哥阿米巴-高中' elif d['业务部']=='社群运营': return '社群-高中' elif '阿米巴' in str(d['业务部']): #社群 return '社群-高中' else: print(d['平台']) return '空'
bd['tt']=bd.apply(lambda x:srs_classify(x),axis=1)
def sru_pretty_pr(bd): res={'gmv':dict(bd.groupby('tt')['收入金额'].sum())} bd2=bd.loc[bd['订单总金额']>100] res['ucount']=dict(bd2.groupby('tt')['uid'].nunique()) res['all']=bd['收入金额'].sum() #for k,v in res.items():print(k,v) return res sru_pretty_pr(bd)
#消费表 #t1='2020-12-31' print(t1) ucol='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.replace(' ',',') msql="select {0} from mall_order_detail where mall_price>0 and cash_part>0 and date_format(create_time,'%Y%m%d')='{1}'".format(ucol,t1.replace('-','')) curor.execute(msql) data = curor.fetchall() btda=pd.DataFrame([i for i in data],columns='order_id goods_stage goods_type goods_resource goods_name authorized_status user_id mall_price settle_price cash_part balance_cash_part need_express create_time update_time'.split()) print(len(btda)) olst=list(set(btda['order_id'])) print(len(olst)) #btda.to_excel('{0}-1212111-消费1.xlsx'.format(t1)) u2='order_id stage order_type order_channel pay_type actually_paid cash_part order_status pay_status employee_id paid_time express_status is_second_kill is_offline_site is_shopping_cart remark create_time update_time'.replace(' ',',') data1=mall_sql(olst,curor,u2,od='mall_order') btdw=pd.DataFrame(data1,columns=u2.split(',')) print(len(btdw)) def emp(x): try:return int(x) except:return 0 btdw['employee_i']=btdw['employee_id'].apply(lambda x:emp(x)) kwsh=list(set(btdw['employee_i'])) ucks='employee_id,real_name,department_name,leader_name,department_id' curcrm.execute("select {0} from crm_employee_user where employee_id in {1}".format(ucks,str(tuple(kwsh)))) data = curcrm.fetchall() ceu=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu['employee_id']=ceu['employee_id'].apply(lambda x:str(x)) kdsh=list(set(ceu['department_id'])) cur = conn.cursor() ucks='department_id,business_line_name' sql02="select {0} from crm_department where department_id in {1}".format(ucks,str(tuple(kdsh))) if len(kwsh)==1: #只有一个department_id sql02="select {0} from crm_department where department_id in ({1})".format(ucks,kdsh[0]) curcrm.execute(sql02) data = curcrm.fetchall() ceu1=pd.DataFrame([i for i in data],columns=ucks.split(',')) ceu=ceu.merge(ceu1,on='department_id',how='left') btd=btdw.merge(ceu,on='employee_id',how='left') btd.to_excel('{0}-1212111-消费2-2.xlsx'.format(t1))
#消费表的筛选统计 btd1=btd.loc[btd['stage']=='senior'] btd1=btd1.loc[btd1['pay_status']=='paid'] btd1=btd1.loc[btd1['order_type']!='exp_center'] #那这个条件其实不需要 btd1=btd1.loc[btd1['business_line_name']=='在线电销'] #可能会有eid为空的情况也算到电销? btd1['actually_paid']=btd1['actually_paid'].apply(lambda x:float(x)) print(btd1['actually_paid'].sum())
不要为了追逐,而忘记当初的样子。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 按钮权限的设计及实现
· 25岁的心里话