import os
from flask import Flask, request, render_template, send_file
from utils.json_format import CJsonEncoder
import json
import pandas as pd
import numpy as np
import time
from datetime import datetime,timedelta
import re
import typing as t
import json
def tpml():
result = {
"billReport": {
"activeTime": { # 活跃时间
"activeTimeSeries": [
# {
# "time": "00", #时间段 00 代表凌晨00:00:00 ~ 00:50:59
# "count": 10, #这个时段的总活跃次数
# "rate": 0.1 # 占比
# },
],
"activeTimeTag": "工作日/非工作日;夜间..." #活跃时间标签
},
"tradeType": { # 交易类型
"tradeTypeRate": [ #交易类型占比
{
"tradeTypeName": "转账", #类型名称
"rate": 0.1, #类型占比
"topList":[
{
"no": "1", # 序号
"tradeTime": "2024-04-30 01:34:53", # 交易时间
"tradeType": "", #交易类型
"amount": "0.01" #交易金额
}
]
}
]
},
"tradeMethod": { # 交易方式
"tradeMethodRate": [
{
"method": "零钱/银行卡/信用卡...", # 交易方式
"rate": 0.11 # 占比
}
],
"bankCardList": [
{
"bankName": "招商银行/中国银行", #银行名称
"cardNoEnd": "0248", # 尾号
"tradeCount": 3490, #交易次数
"useRate": 0.3 #使用率相对于所有银行卡交易来说
}
]
},
"capital": { #资金
"monthlyAnalysis": [ # 月度分析,基于客户发送的报告数据,最长周期可能12个月,最短不确定,根据月份升序排列
{
"month": "2023-03", #月份
"inCount": 100, #收入笔数
"inAmount": 100.00, #收入总金额
"outCount": 80, #收入总金额
"outAmount": 400.00 #收入总金额
}
],
"specialTrade": [ #特殊交易清单命中特殊交易金额、特殊交易对象的交易记录
{
"no": "1280", #交易序号
"tradeTime": "2024-04-30 01:34:53", #交易时间
"tradeType": "", #交易类型
"amount": "0.01", #交易金额
"hitReason": "命中租机关键字/命中先用后付平台/命中网贷平台..." #命中原因
}
],
"last7days": [ #进7天收支
],
"last14days": [ #进14天收支
]
},
"counterparty": { #交易对方
"topSeries": [ #交易对象统计top5,可筛选7,15天/1个月(30)/2个月(60)/3个月(90)/1年(365)
# {
# "days": 7, #进7天数据,
# "list": [
# "与”张三“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”李四“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人信“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人行“重复交易10次,收入金额:1000.00元,支出金额:0.01元"
# ]
# },
# {
# "days": 15, #进7天数据,
# "list": [
# "与”张三“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”李四“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人信“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人行“重复交易10次,收入金额:1000.00元,支出金额:0.01元"
# ]
# },
# {
# "days": 30, #进7天数据,
# "list": [
# "与”张三“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”李四“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人信“重复交易10次,收入金额:1000.00元,支出金额:0.01元",
# "与”人人行“重复交易10次,收入金额:1000.00元,支出金额:0.01元"
# ]
# }
],
"counterpartyTag": [ #交易对方关键之次数
{
"tagName": "租机", #标签名称
"num": 0
}
],
"wordCloud": {
"words": [
{
"word": "张三",
"count": 1
},
{
"word": "人人租",
"count": 10
},
{
"word": "张三",
"count": 1
}
],
"png": "base 64 字符串" # 词云图片base64字符串
}
}
},
"sceneReport": {
"orderTimeline": { # 订单生命线
},
"payLater": [ # 先用后付
# {
# "logo":"", #产品logo没有的时候给一个默认头像
# "platformName":"",# 平台名称
# "tag":"额度/黄金/E卡",# 等该产品特有的标签,多个标签以,分割
# "list":[ # 命中的交易记录列表,最多
# {
# "tradeNo":"",#交易记录序号
# "tradeTime":"2024-04-30 12:11:24",# 交易时间
# "tradeType":"",#交易类型
# "amount":"0.01",#交易金额
# "status":""#状态,目前能计算出来的事初购和复购
# }
# ]
# }
],
"rent":[ # 手机租赁记录
# {
# "logo":"", #产品logo没有的时候给一个默认头像
# "platformName":"",# 平台名称
# "tag":"短租/监管机/长租...",# 等该产品特有的标签,多个标签以,分割
# "list":[ # 命中的交易记录列表,最多
# {
# "tradeNo":"",#交易记录序号
# "tradeTime":"2024-04-30 12:11:24",# 交易时间
# "tradeType":"",#交易类型
# "amount":"0.01",#交易金额
# "status":""#状态,目前能计算出来的事初购和复购
# }
# ]
# }
],
"load":[ # 贷款(包括各类以现金发放贷款的贷款记录)
# {
# "logo":"", #产品logo没有的时候给一个默认头像
# "platformName":"",# 平台名称
# "tag":"备用金/ID贷/借条/房贷/....",# 等该产品特有的标签,多个标签以,分割
# "list":[ # 命中的交易记录列表,最多
# {
# "tradeNo":"",#交易记录序号
# "tradeTime":"2024-04-30 12:11:24",# 交易时间
# "tradeType":"",#交易类型
# "amount":"0.01",#交易金额
# "status":""#状态,目前能计算出来的事初购和复购
# }
# ]
# }
],
"recycling":[ # 回收中介
# {
# "logo":"", #产品logo没有的时候给一个默认头像
# "platformName":"",# 平台名称
# "tag":"个人/平台/撸贷团伙/奥特曼...",# 等该产品特有的标签,多个标签以,分割
# "list":[ # 命中的交易记录列表,最多
# {
# "tradeNo":"",#交易记录序号
# "tradeTime":"2024-04-30 12:11:24",# 交易时间
# "tradeType":"",#交易类型
# "amount":"0.01",#交易金额
# "status":""#状态,目前能计算出来的事初购和复购
# }
# ]
# }
]
},
"contractReport": {},
"newfield":{}
}
return result
def getBankCardInfo(x):
if "原花呗" in x or "信用购" in x or "亲属卡" in x:
return ""
i = ''.join(re.findall(r"[\u4E00-\u9FA5]+(\d{4})",x))
if not i:
i = ''.join(re.findall(r"[\u4E00-\u9FA5]+银行",x))
return i
def getBankName(r):
if not r:
return ""
p = r"[\u4E00-\u9FA5]+"
return re.match(p,r).group()
def getBankCardNo(r):
if not r:
return ""
# 使用正则表达式找到第一个匹配的四位数字
match = re.search(r"\d{4}", r)
if match:
return match.group(0)
return ""
def getTradeFuncNew(r):
if r['isCreditCard']:
return '信用卡'
elif r['bankName']:
return '银行卡'
else:
if '亲情卡' in r['tradeFunc'] or '亲属卡' in r['tradeFunc']:
return '亲情卡'
elif '他人代付' in r['tradeFunc']:
return '他人代付'
elif '余额宝' in r['tradeFunc']:
return '余额宝'
elif '花呗' in r['tradeFunc']:
return '花呗'
elif '红包' in r['tradeFunc'] or '现金抵价券' in r['tradeFunc']:
return '红包'
elif '小荷包' in r['tradeFunc']:
return '小荷包'
elif '分付' in r['tradeFunc']:
return '分付'
elif '晚点付' in r['tradeFunc']:
return '晚点付'
elif '账户余额' in r['tradeFunc']:
return '账户余额'
elif '信用购' in r['tradeFunc']:
return '信用购'
elif '经营账户' in r['tradeFunc']:
return '余额'
elif '储值卡' in r['tradeFunc']:
return '储值卡'
elif r['tradeFunc'] in ['/','']:
return '(无)'
return "(其他)"
def formatTradeType(r):
if r['tradeType'] in ['/','不计收支','其他']:
return '不计收支'
return r['tradeType']
def counterpartyKeyWord(r,rule):
a = re.search(rule,r)
if a:
return a.group()
return ''
交易记录中特殊交易金额
specialAmountList = [int(a*rate) for a in [3000,2000,1000,500] for rate in [0.97,0.96,0.95,0.94,0.93,0.92,0.91]]
def hitSpecialTrade(r,rules):
# 命中特殊交易记录
# 命中租机关键字
# 命中先用后付平台
# 命中回收平台
# 命中特殊金额
# 命中网贷平台s
# 命中疑似借条平台
# 命中疑似个人回收中介
# 命中疑似撸贷中介
for (ruleType,rule,hitReason,hitType,keywordType) in rules:
if not rule:
continue
if ruleType=='keyword':
rule= "("+rule+")"
# 交易描述有限搜索
r2 = re.search(rule,r['tradeDesc'])
if r2 and r2.group():
fk2 = r2.group()
return rule,hitReason,fk2,hitType
# 交易对象其次搜索
r1 = re.search(rule,r['counterparty'])
if r1 and r1.group():
fk1 = r1.group()
if (len(fk1)==2 and r['counterparty']==fk1) or len(fk1)!=2:
return rule,hitReason,fk1,hitType
if '回收' in r['counterparty'] and r['tradeType']=='收入' and r['tradeAmount']>100:
return '回收','命中疑似回收(个人)中介',r['counterparty'],"回收"
# if r['tradeAmount'] in specialAmountList and r['tradeType']=='收入':
# # 这里需要做比较复杂的判断,单纯通过金额判断误判率太高
# return '特殊金额','命中特殊金额',r['counterparty'],"回收"
return "","","",""
def specialTradeRule(platforms,brokers)->t.Tuple[t.List[t.Tuple],t.Dict]:
rules = []
rule_map = {p['name']:p for p in platforms}
rule_map.update({br['company']:br for br in platforms if br['company']})
rule_map.update({("*"+br['company'][1:]):br for br in platforms if br['company']})
rule_map.update({br['name']:br for br in brokers})
rule_map.update({br['company']:br for br in brokers if br['company']})
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='租机',platforms)]),'命中疑似租机平台','租机','name'))
rules.append(('keyword','|'.join([p['company'] for p in filter(lambda p:p['type']=='租机',platforms) if p['company']]),'命中疑似租机平台','租机','company'))
rules.append(('keyword','|'.join([p['company'][1:] for p in filter(lambda p:p['type']=='租机',platforms) if p['company']]),'命中疑似租机平台','租机','company'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='先用后付',platforms)]),'命中疑似先用后付平台','先用后付','name'))
rules.append(('keyword','|'.join([p['company'] for p in filter(lambda p:p['type']=='先用后付',platforms) if p['company']]),'命中疑似先用后付平台','先用后付','company'))
rules.append(('keyword','|'.join([("\*"+p['company'][1:]) for p in filter(lambda p:p['type']=='先用后付',platforms) if p['company']]),'命中疑似先用后付平台','先用后付','company'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='回租',platforms)]),'命中疑似网贷(回租)平台','网贷','name'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='借条',platforms)]),'命中疑似网贷(借条)平台','网贷','name'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='网贷',platforms)]),'命中疑似网贷平台','网贷','name'))
rules.append(('keyword','|'.join([p['company'] for p in filter(lambda p:p['type']=='网贷',platforms) if p['company']]),'命中疑似网贷平台','网贷','company'))
rules.append(('keyword','|'.join([("\*"+p['company'][1:]) for p in filter(lambda p:p['type']=='网贷',platforms) if p['company']]),'命中疑似网贷平台','网贷','company'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='回收',platforms)]),'命中疑似回收平台','回收','name'))
rules.append(('keyword','|'.join([p['company'] for p in filter(lambda p:p['type']=='回收',platforms) if p['company']]),'命中疑似回收平台','回收','company'))
rules.append(('keyword','|'.join([p['company'][1:] for p in filter(lambda p:p['type']=='回收',platforms) if p['company']]),'命中疑似回收平台','回收','company'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='卡超',platforms)]),'命中疑似卡超平台','回收','name'))
rules.append(('keyword','|'.join([p['name'] for p in filter(lambda p:p['type']=='贷超',platforms)]),'命中疑似贷超平台','回收','name'))
rules.append(('keyword','|'.join([p['name'] for p in brokers]),'命中疑似个人中介','回收','name'))
return rules,rule_map
def last(df:pd.DataFrame):
s6_a = df[df['tradeType']=='收入'].groupby(['ymd'])['tradeAmount']
s6_b = df[df['tradeType']=='支出'].groupby(['ymd'])['tradeAmount']
s6_c = df[df['tradeType']=='不计收支'].groupby(['ymd'])['tradeAmount']
df6 = pd.DataFrame(s6_a.sum().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'inAmount'})
df6 = pd.merge(df6,pd.DataFrame(s6_a.count().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'inCount'}),on='day',how='left')
df6 = pd.merge(df6,pd.DataFrame(s6_b.sum().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'outAmount'}),on='day',how='left')
df6 = pd.merge(df6,pd.DataFrame(s6_b.count().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'outCount'}),on='day',how='left')
df6 = pd.merge(df6,pd.DataFrame(s6_c.sum().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'ignoreoAmount'}),on='day',how='left')
df6 = pd.merge(df6,pd.DataFrame(s6_c.count().reset_index(level=0)).rename(columns={'ymd':'day','tradeAmount':'ignoreCount'}),on='day',how='left')
df6['inOutdiff'] = df6['inAmount'] -df6['outAmount']
df6 = df6.round(2)
df6 = df6.fillna(0)
return df6.to_dict(orient='records')
def capital(df:pd.DataFrame,result):
applydate = applydate_endtime(result)
df7 = df[df['specialTradehHitReason']!='']
df7 = df7.rename(columns={"specialTradehHitReason":"hitReason"})
df7 = df7[['tradeType','tradeTime', 'tradeDesc', 'tradeFunc', 'tradeAmount','counterparty', 'tradeNo','hitReason']]
df7 = df7.sort_values(by=['tradeTime'],ascending=False)
# 确保applydate是YYYY-MM-DD格式
applydate = datetime.strptime(applydate, '%Y-%m-%d')
# 生成前12个月的日期列表,格式为YYYY-MM
months = pd.date_range(applydate - pd.offsets.MonthBegin(12), applydate, freq='MS').strftime('%Y-%m').tolist()
# 对yearMonth列进行格式化,确保它是YYYY-MM格式
df['yearMonth'] = pd.to_datetime(df['yearMonth'], format='%Y-%m').dt.strftime('%Y-%m')
# 计算收入、支出和不计收支的金额和数量
s6_a = df[df['tradeType'] == '收入'].groupby(['yearMonth'])['tradeAmount'].sum().reindex(months, fill_value=0)
s6_b = df[df['tradeType'] == '支出'].groupby(['yearMonth'])['tradeAmount'].sum().reindex(months, fill_value=0)
s6_c = df[df['tradeType'] == '不计收支'].groupby(['yearMonth'])['tradeAmount'].sum().reindex(months, fill_value=0)
s6_a_count = df[df['tradeType'] == '收入'].groupby(['yearMonth'])['tradeAmount'].count().reindex(months, fill_value=0)
s6_b_count = df[df['tradeType'] == '支出'].groupby(['yearMonth'])['tradeAmount'].count().reindex(months, fill_value=0)
s6_c_count = df[df['tradeType'] == '不计收支'].groupby(['yearMonth'])['tradeAmount'].count().reindex(months, fill_value=0)
# 合并数据
df6 = pd.DataFrame({
'yearMonth': months,
'inAmount': s6_a,
'inCount': s6_a_count,
'outAmount': s6_b,
'outCount': s6_b_count,
'ignoreoAmount': s6_c,
'ignoreCount': s6_c_count
})
# 计算收支差额
df6['inOutdiff'] = round(df6['inAmount'] - df6['outAmount'], 2)
# 四舍五入并填充NaN为0
df6 = df6.round(2).fillna(0)
last7day = (datetime.strptime(df['tradeTime'].max(),'%Y-%m-%d %H:%M:%S')-timedelta(days=7)).strftime('%Y-%m-%d')+' 00:00:00'
last14day = (datetime.strptime(df['tradeTime'].max(),'%Y-%m-%d %H:%M:%S')-timedelta(days=14)).strftime('%Y-%m-%d')+' 00:00:00'
df7day = df[df['tradeTime']>= last7day]
df14day = df[df['tradeTime']>= last14day]
return dict(monthlyAnalysis=df6.to_dict(orient='records'),specialTrade=df7.to_dict(orient='records'),last7days=last(df7day),last14days=last(df14day))
def counterparty(df:pd.DataFrame):
s0 = df[df['counterpartyKeyWord']!=''].groupby('counterpartyKeyWord')
s1 = s0.count()
s1 = s1.reset_index(level=[0])
s11 = s0['tradeNo'].apply(lambda x:list(set(x.str.cat(sep=',').split(',')))).reset_index()
s11 = s11.rename(columns={'tradeNo':'tradeNos'})
s1 = pd.merge(s1,s11,on='counterpartyKeyWord',how='left')
s1['tagName'] = s1['counterpartyKeyWord']
s1['num'] = s1['id']
s1 = s1[['tagName','num','tradeNos']]
topSeriesDays = [7,15,30,90,180,365]
topSeries = []
maxDay = datetime.strptime(df['tradeTime'].max(),'%Y-%m-%d %H:%M:%S')
for d in topSeriesDays:
lastday = (maxDay-timedelta(days=d)).strftime('%Y-%m-%d')+' 00:00:00'
lastdf = df[df['tradeTime']>=lastday]
lastdf = lastdf[lastdf['counterparty']!='/']
lastdf = lastdf[lastdf['counterparty']!='']
s2 = lastdf.groupby('counterparty')
s20 = s2['tradeNo'].apply(lambda x:list(set(x.str.cat(sep=',').split(',')))).reset_index()
s20 = s20.rename(columns={'tradeNo':'tradeNos'})
s21 = s2['tradeAmount'].count().reset_index(level=0)
s22 = s2['tradeAmount'].sum().reset_index(level=0)
s3 = pd.merge(pd.DataFrame(s21).rename(columns={'counterparty':'counterparty','tradeAmount':'num'}),
pd.DataFrame(s22).rename(columns={'counterparty':'counterparty','tradeAmount':'amount'}),
on='counterparty',how='left')
s3 = pd.merge(s3,s20,on='counterparty',how='left')
s3 = s3[s3['num']>3]
s3 = s3.sort_values(by='num',ascending=False).head(15)
s3 = s3[['counterparty','num','amount','tradeNos']]
s3 = s3.round(2)
topSeries.append(dict(day=d,list=s3.to_dict(orient='records')))
return dict(topSeries=topSeries,counterpartyTag=s1.to_dict(orient='records'),wordCloud=[])
def mailInfo(df:pd.DataFrame,mailInfo:dict):
aliapyDF = df[df['flowType']=='ALIPAY']
mailInfo.setdefault('alipay',{})
mailInfo.setdefault('wechat',{})
if mailInfo['alipay']:
s1 = aliapyDF.groupby('ymd')['ymd'].count()
s11 = pd.DataFrame({'ymd':s1.index,'count':s1.values})
s11 = s11[s11['count']>0]
mailInfo['alipay']['activateDay'] = len(s11)
mailInfo['alipay']['activateRate'] = round(min(len(s11)/mailInfo['alipay']['day'],1)*100,2)
if mailInfo['wechat']:
wechatDF = df[df['flowType']=='WECHAT']
s2 = wechatDF.groupby('ymd')['ymd'].count()
s21 = pd.DataFrame({'ymd':s2.index,'count':s2.values})
s21 = s21[s21['count']>0]
mailInfo['wechat']['activateDay'] = len(s21)
mailInfo['wechat']['activateRate'] = round(min(len(s21)/mailInfo['wechat']['day'],1)*100,2)
return mailInfo
def activeTime(df:pd.DataFrame,mailInfo:dict):
# 活跃时间分析
s3 = df.groupby('hour')['hour'].count()
df3 = pd.DataFrame({'hour':s3.index,'count':s3.values})
df3['rate'] =round(df3['count']/s3.sum()*100,2)
s4 = df[df['flowType']=='ALIPAY'].groupby('ymd')['ymd'].count()
df4 = pd.DataFrame({'ymd':s4.index,'count':s4.values})
s6 = df[df['flowType']=='WECHAT'].groupby('ymd')['ymd'].count()
df6 = pd.DataFrame({'ymd':s6.index,'count':s6.values})
# days = {d:y for d,y in zip(s4.index,s4.values)}
# endDay = max([d for d in [mailInfo.get("alipay",{}).get("endTime",""),mailInfo.get("wechat",{}).get("endTime","")] if d])
s5 = df.groupby('day')['day'].count()
df5 = pd.DataFrame({'day':s5.index,'count':s5.values})
return dict(activeTimeSeries=df3.to_dict(orient='records'),activeTimeTag=""
,alipayYmdActiveSeries=df4.to_dict(orient='records')
,wechatYmdActiveSeries=df6.to_dict(orient='records')
,dayActiveSeries=df5.to_dict(orient='records')
)
def tradeType(df:pd.DataFrame):
return dict(tradeTypeRate=[])
"""def getTradeMethod(df):
# 银行卡-使用率
s4 = df[df['isBank']!=''].groupby(['bankName','bankCardNo']).count()
s4 = s4.reset_index(level=[0,1])
df4 = pd.DataFrame({'bankName':s4['bankName'],'bankCardNo':s4['bankCardNo'],'tradeCount':s4['id']})
df4['useRate'] = round(df4['tradeCount']/s4['id'].sum()*100,2)
df4 = df4.sort_values(by='useRate',ascending=False)
# 交易方式-使用率
s5 = df.groupby(['tradeType','tradeFuncNew']).count()
s5 = s5.reset_index(level=[0,1])
s5 = s5[s5['tradeType'] == '支出']
df5 = pd.DataFrame({'method':s5['tradeFuncNew'],'tradeCount':s5['id']})
df5['useRate'] = round(df5['tradeCount']/s5['id'].sum()*100,2)
return dict(bankCardList=df4.to_dict(orient='records'),tradeMethodRate=df5.to_dict(orient='records'))"""
def getPlatform(df:pd.DataFrame,ptype:str):
s1 = df[df['platformType']==ptype]
keywords_to_exclude = '糯米饭|锅巴|充电宝|水产|水果|肉夹馍|凉皮|便利|面|超市|私房菜|简茶|饰品|挖机|网咖|网吧'
s1 = s1[~s1['counterparty'].str.contains(keywords_to_exclude) & ~s1['tradeDesc'].str.contains(keywords_to_exclude)]
platformNames = {p for p in set(s1['platformName'].values) if p}
platforms = []
for k in platformNames:
item = {}
s0= s1[s1['platformName']==k]
rec = s0.to_dict(orient='records')[0]
if rec['platform']:
platform = rec['platform']
item.setdefault('logo',platform.get('logo',''))
item.setdefault('platformName',platform.get('name',''))
item.setdefault('tag', platform.get('tags',''))
item.setdefault('category', "平台")
else:
item.setdefault('logo',"")
item.setdefault('platformName',rec["counterparty"])
item.setdefault('tag', "")
item.setdefault('category', "个人")
item.setdefault('list',[])
sl = s0[['tradeNo','tradeTime','tradeType','tradeAmount','flowType','tradeDesc','counterparty']]
sl = sl.sort_values(by='tradeTime',ascending=False)
tradeRecords = sl.to_dict(orient='records')
item['list'] = tradeRecords
item.setdefault('report',{})
item['report']['tradeOutAmount'] = round(s0[s0['tradeType']=='支出']['tradeAmount'].sum(),2)
item['report']['tradeInAmount'] = round(s0[s0['tradeType']=='收入']['tradeAmount'].sum(),2)
item['report']['tradeIgnoreoAmount'] = round(s0[s0['tradeType']=='不计收支']['tradeAmount'].sum(),2)
item['report']['tradeNum'] = len(tradeRecords)
item['report']['firstUseTime'] = tradeRecords[0]['tradeTime']
item['report']['lastUseTime'] = tradeRecords[-1]['tradeTime']
# 判断是否在租:不计收支金额+收入金额<支出金额
item['report']['renting'] = True if (item['report']['tradeInAmount']+item['report']['tradeIgnoreoAmount'])<item['report']['tradeOutAmount'] else False
if item['report']['renting'] and ptype=='租机':
item['tag'] =(item['tag'] +',在租') if item['tag'] else '在租'
platforms.append(item)
if ptype=='租机':
platforms = list(sorted(platforms,key=lambda p:(p['report']['renting'],p['list'][0]['tradeTime']),reverse=True))
else:
platforms = list(sorted(platforms,key=lambda p:p['list'][0]['tradeTime'],reverse=True))
return platforms
def recyclingPlatform(df:pd.DataFrame):
s1 = df[df['platformType']=='回收']
platformNames = {p for p in set(s1['platformName'].values) if p}
platforms = []
for k in platformNames:
rec = s1[s1['platformName']==k].to_dict(orient='records')[0]
item = {}
if rec['platform']:
platform = rec['platform']
item.setdefault('logo',platform.get('logo',''))
item.setdefault('platformName',platform.get('name',''))
item.setdefault('tag', platform.get('tags',''))
item.setdefault('category', "平台")
else:
item.setdefault('logo',"")
item.setdefault('platformName',rec["counterparty"])
item.setdefault('tag', "个人")
item.setdefault('category', "个人")
item.setdefault('list',[])
item['list'] = s1[s1['platformName']==k][['tradeNo','tradeTime','tradeType','tradeAmount','flowType','tradeDesc','counterparty']].to_dict(orient='records')
platforms.append(item)
platforms = list(sorted(platforms,key=lambda p:(p['category'],p['list'][0]['tradeTime']),reverse=True))
return platforms
def preProcessing(df:pd.DataFrame,keywords,platforms,brokers)->pd.DataFrame:
datas = df.to_dict(orient='records')
rules,rmap = specialTradeRule(platforms,brokers)
for r in datas:
# 计算标准字段
isBank =getBankCardInfo(r['tradeFunc'])
bankName = getBankName(isBank)
r.update({
"yearMonth":r['tradeTime'][:7],
"ymd":r['tradeTime'][:10],
"day":r['tradeTime'][8:10],
"hour":r['tradeTime'][11:13],
"isBank": isBank,
"bankName":bankName,
"bankCardNo":getBankCardNo(isBank),
"isCreditCard":'信用卡' in bankName,
})
rule,hitReason,hitK,hitType = hitSpecialTrade(r,rules)
if (
not ('皮卡' == hitK and len(r['counterparty'])>2)
and not ('二师兄' == hitK and len(r['counterparty'])>3)
and "超市" not in r['counterparty']
):
r['platform'] = rmap.get(hitK,'')
r['platformName'] = rmap.get(hitK,{}).get('name',hitK)
r['platformType'] = hitType
if re.findall(r'^回收单号:\d{19}$',r['counterparty']) or re.findall(r'^回收单:\d{7}支付款项$',r['counterparty']):
r['platform'] = dict(name='回收平台A',type='回收',tags='平台')
r['platformName'] = '回收平台A'
r['platformType'] = '回收'
hitReason = '命中疑似回收平台'
if re.findall(r'^手机回收款项,单号:\d{7}$',r['counterparty']):
r['platform'] = dict(name='手机回收',type='回收',tags='平台')
r['platformName'] = '手机回收'
r['platformType'] = '回收'
hitReason = '命中疑似回收平台'
if not hitReason and re.findall(r'\d{16}第\d+期账单交租\(商家ID:\d+\)',r['tradeDesc']):
r['platform'] = dict(name="租机(疑似)",type='租机',tags='商家:'+r['counterparty'],logo='https://img2.imgtp.com/2024/05/09/HL2V5i5m.png')
r['platformName'] = "租机(疑似)"
r['platformType'] = '租机'
hitReason = '命中疑似租机平台'
item = {
"tradeFuncNew":getTradeFuncNew(r),
"tradeType":formatTradeType(r),
"specialTradehHitReason": hitReason,
"counterpartyKeyWord":counterpartyKeyWord(r['counterparty'],"("+"|".join([k['keyword']for k in keywords])+"){1}"),
}
r.update(item)
df = pd.DataFrame(datas)
return df
def xxx(df:pd.DataFrame):
dx = df[df['specialTradehHitReason']'命中疑似网贷(借条)平台']
dx = dx.sort_values(by='tradeTime',ascending=False)
datas = dx.to_dict(orient='records')
for x in datas:
inMaxDay = (datetime.strptime(x['tradeTime'],'%Y-%m-%d %H:%M:%S')+timedelta(hours=1)).strftime('%Y-%m-%d %H:%M:%S')
outMaxDay = (datetime.strptime(x['tradeTime'],'%Y-%m-%d %H:%M:%S')+timedelta(days=30)).strftime('%Y-%m-%d %H:%M:%S')
xx = df[(df['tradeType']'收入') & (df['tradeTime'] < inMaxDay) & (df['tradeTime']>=x['tradeTime'])]
xx.sort_values(by='tradeTime',inplace=True)
x['借款人'] = ''
x['借款金额'] = 0
x['打款时间'] = ''
x['打款交易序列号'] = ''
x['还款收款人'] = ''
x['还款金额'] = 0
x['还款'] = ''
x['还款交易序列号'] = ''
if len(xx):
xx = xx.iloc[0]
x['借款人'] = xx['counterparty']
x['借款金额'] = xx['tradeAmount']
x['打款时间'] = xx['tradeTime']
x['打款交易序列号'] = xx['tradeNo']
# 还款记录
xy = df[(df['tradeType']=='支出') & (df['tradeTime'] < outMaxDay) & (df['tradeTime']>=xx['tradeTime']) & (df['tradeAmount']==xx['tradeAmount'])]
if len(xy):
xy = xy.iloc[0]
x['还款收款人'] = xy['counterparty']
x['还款金额'] = xy['tradeAmount']
x['还款'] = xy['tradeTime']
x['还款交易序列号'] = xy['tradeNo']
pass
return pd.DataFrame(datas)
def gd(df:pd.DataFrame,day:int,result):
df2 = df[df['specialTradehHitReason']'命中疑似网贷(借条)平台']
if len(df2)0:
return None
apply_date=applydate(result)
apply_date = datetime.strptime(apply_date, '%Y-%m-%d')
minDay = (apply_date-timedelta(days=day)).strftime('%Y-%m-%d')
df3 = df2[df2['ymd']>=minDay]
df4 = xxx(df)
df4 = df4[df4['ymd']>=minDay]
#12
item = {
'天数':day,
'签约次数': len(df3),
'签约平台数': len(df3.groupby('platformName').count()) if len(df3) >0 else 0,
'签约费总额': float(round(df3['tradeAmount'].sum(),2)),
'平均每次签约费': float(round(df3['tradeAmount'].sum()/len(df3),2)) if len(df3) > 0 else 0.00,
'收到借款金额':float(round(df4['借款金额'].sum(),2)),
'收到借款次数':len(df4[df4['借款人']!='']),
'平均每笔借款金额': float(round(df4[df4['借款人']!='']['借款金额'].mean(),2) if len(df4[df4['借款人']!='']) else 0.00),
'打卡次数': '',
'打卡人数': '',
'打卡总金额': '',
'偿还本金次数': '',
'偿还本金人数': '',
'偿还本金总额': ''
}
return item
def jtReport(df:pd.DataFrame,result):
days = [7,15,30,60,90,180,365]
recentReports = []
for d in days:
item = gd(df,d,result)
if not item:
continue
recentReports.append(item)
return dict(recentReports=recentReports,summary=None)
1、新增银行卡张数
import pandas as pd
def banknumber(SJ):
# 筛选出非空'isBank'的行
SJ1 = SJ[SJ['isBank']!=''][['bankName','bankCardNo','isBank']]
# 如果SJ1为空,则直接返回默认值
if SJ1.empty:
return [{'bankname': '储蓄', 'number': 0}, {'bankname': '信用', 'number': 0}]
# 移除重复的银行卡号
SJ1.drop_duplicates(subset='bankCardNo', inplace=True)
# 获取'isBank'值的计数并重置索引
SJ1 = SJ1['isBank'].value_counts().reset_index()
SJ1.columns = ['isbank', 'count']
# 标记储蓄和信用卡
SJ1['card_type'] = SJ1['isbank'].str.contains('储蓄|借记', regex=True)
SJ1.loc[SJ1['card_type'], 'category'] = '储蓄'
SJ1['card_type1'] = SJ1['isbank'].str.contains('信用', regex=True)
SJ1.loc[SJ1['card_type1'], 'category'] = '信用'
SJ1['category'] = SJ1['category'].fillna('储蓄')
# 计算每个类别的数量
banks = SJ1['category'].value_counts().reset_index()
banks.columns = ['bankname', 'number']
# 确保每个类别都有记录,如果没有则添加默认值
for category in ['储蓄', '信用']:
if category not in banks['bankname'].values:
new_row = pd.DataFrame({'bankname': [category], 'number': [0]})
banks = pd.concat([banks, new_row], axis=0, ignore_index=True)
# 根据类别名称排序
banks.sort_values(by='bankname', inplace=True, ascending=False)
# 转换为字典列表
datas = banks.to_dict(orient='records')
return datas
2、常用卡
def oftencard(df):
# 过滤掉非银行卡交易
bank_transactions = df[df['isBank']!='']
# 过滤掉包含"信用卡"的行
non_credit_cards = bank_transactions[~bank_transactions['bankName'].str.contains('信用')]
# 如果没有储蓄卡,返回特定消息
if non_credit_cards.empty:
return "没有储蓄常用卡"
# 银行卡-使用率
s4 = non_credit_cards.groupby(['bankName', 'bankCardNo']).count()
s4 = s4.reset_index(level=[0, 1])
df4 = pd.DataFrame({'bankName': s4['bankName'], 'bankCardNo': s4['bankCardNo'], 'tradeCount': s4['id']})
# 计算使用率
total_trades = non_credit_cards['id'].count()
df4['useRate'] = round(df4['tradeCount'] / total_trades * 100, 2)
# 为了之后选择使用率最高的bankName,我们需要在每个bankCardNo下对bankName进行排序
df4_sorted = df4.sort_values(by=['bankCardNo', 'useRate'], ascending=[True, False])
# 合并相同bankCardNo的银行卡
df4_grouped = df4_sorted.groupby('bankCardNo').agg({
'bankName': 'first', # 选择使用率最高的bankName
'tradeCount': 'sum', # 相加交易次数
'useRate': 'sum' # 相加使用率
}).reset_index()
# 对useRate进行排序
df4_grouped = df4_grouped.sort_values(by='useRate', ascending=False).head(1)
df4_grouped=df4_grouped.to_dict(orient='records')
return df4_grouped
3、活跃时间优化
def activeTime_month(df, result):
endtime = applydate_endtime(result)
s3 = df.groupby('yearMonth')['yearMonth'].count()
df3 = pd.DataFrame({'yearMonth': s3.index, 'count': s3.values})
df3['rate'] = round(df3['count'] / s3.sum() * 100, 2)
s4 = df[df['flowType'] == 'ALIPAY'].groupby('yearMonth')['yearMonth'].count()
df4 = pd.DataFrame({'yearMonth': s4.index, 'alipay_count': s4.values})
df4 = df4.fillna(0)
s6 = df[df['flowType'] == 'WECHAT'].groupby('yearMonth')['yearMonth'].count()
df6 = pd.DataFrame({'yearMonth': s6.index, 'wechat_count': s6.values})
df6 = df6.fillna(0)
df_active_times = pd.merge(df3, df4, on='yearMonth', how='left')
df_active_times = pd.merge(df_active_times, df6, on='yearMonth', how='left')
end_date = pd.to_datetime(endtime).date()
start_date = end_date - pd.DateOffset(months=12)
all_months = pd.date_range(start=start_date, end=end_date, freq='MS').strftime('%Y-%m').tolist()
df_active_times['yearMonth'] = pd.to_datetime(df_active_times['yearMonth'])
df_active_times = df_active_times.set_index('yearMonth').reindex(all_months).reset_index()
df_active_times = df_active_times.fillna({'alipay_count': 0, 'wechat_count': 0, 'count': 0})
total_count_sum = df_active_times['count'].sum()
df_active_times['rate'] = df_active_times['count'].apply(lambda x: round(x / total_count_sum * 100, 2) if total_count_sum > 0 else 0)
active_time_series = df_active_times.to_dict(orient='records')
return {'activeTimeSeries': active_time_series}
def activeTime_hour(df):
# 确保小时字段是以字符串形式,并且是两位数(例如 '02' 而不是 2)
df['hour'] = df['hour'].astype(str).str.zfill(2)
# 按小时统计总交易量
hour_counts = df.groupby('hour')['hour'].count()
df_counts = pd.DataFrame({'hour': hour_counts.index, 'count': hour_counts.values})
# 计算每个小时的交易比例
df_counts['rate'] = round(df_counts['count'] / hour_counts.sum() * 100, 2)
# 按小时统计支付宝交易量
alipay_counts = df[df['flowType'] == 'ALIPAY'].groupby('hour')['hour'].count()
df_alipay_counts = pd.DataFrame({'hour': alipay_counts.index, 'alipay_count': alipay_counts.values})
df_alipay_counts = df_alipay_counts.fillna(0)
# 按小时统计微信交易量
wechat_counts = df[df['flowType'] == 'WECHAT'].groupby('hour')['hour'].count()
df_wechat_counts = pd.DataFrame({'hour': wechat_counts.index, 'wechat_count': wechat_counts.values})
df_wechat_counts = df_wechat_counts.fillna(0)
# 合并所有统计信息到一个DataFrame中
df_active_times = pd.merge(df_counts, df_alipay_counts, on='hour', how='left')
df_active_times = pd.merge(df_active_times, df_wechat_counts, on='hour', how='left')
# 用0填充所有NaN值以确保数据一致性
df_active_times = df_active_times.fillna({'alipay_count': 0, 'wechat_count': 0})
# 确保所有24小时都被表示,缺失的小时用0填充
all_hours = [f"{i:02d}" for i in range(24)] # 创建小时列表,从 '00' 到 '23'
df_active_times = df_active_times.set_index('hour').reindex(all_hours).fillna(0).reset_index()
# 将DataFrame转换为字典列表
active_time_series = df_active_times.to_dict(orient='records')
# 返回包含字典列表的字典
return {'activeTimeSeries1': active_time_series}
4、借条计算时间以发送为准
def applydate(result):
current_date = datetime.now().date().strftime('%Y-%m-%d')
alipaytime= result.get('mailInfo', {}).get('alipay', {}).get('sendTime', current_date)
wechattime= result.get('mailInfo', {}).get('wechat', {}).get('sendTime', current_date)
if alipaytime>wechattime:
return alipaytime
else: return wechattime
def applydate_endtime(result):
alipaytime= result.get('mailInfo', {}).get('alipay', {}).get('endTime', "2020-01-01")
wechattime= result.get('mailInfo', {}).get('wechat', {}).get('endTime', "2020-01-01")
if alipaytime>=wechattime:
return alipaytime
else: return wechattime
5、优化用一张卡叫法不一样
def getTradeMethod(df):
# 银行卡-使用率
s4 = df[df['isBank']!=''].groupby(['bankName','bankCardNo']).count()
s4 = s4.reset_index(level=[0,1])
df4 = pd.DataFrame({'bankName':s4['bankName'],'bankCardNo':s4['bankCardNo'],'tradeCount':s4['id']})
total_trades = s4['id'].sum() # 总交易次数
df4['useRate'] = round(df4['tradeCount']/total_trades*100,2)
# 为了之后选择使用率最高的bankName,我们需要在每个bankCardNo下对bankName进行排序
df4_sorted = df4.sort_values(by=['bankCardNo', 'useRate'], ascending=[True, False])
# 合并相同bankCardNo的银行卡
df4_grouped = df4_sorted.groupby('bankCardNo').agg({
'bankName': 'first', # 选择使用率最高的bankName
'tradeCount': 'sum', # 相加交易次数
'useRate': 'sum' # 相加使用率
}).reset_index()
# 对useRate进行排序
flow_types = df[df['isBank']!=''].groupby('bankCardNo')['flowType'].unique()
flow_types = flow_types.apply(lambda x: ', '.join(sorted(set(x))))
df4_grouped = pd.merge(df4_grouped, flow_types.rename('flowType'), on='bankCardNo', how='left')
df4_grouped = df4_grouped.sort_values(by='useRate', ascending=False)
# 交易方式-使用率
s5 = df.groupby(['tradeType','tradeFuncNew']).count()
s5 = s5.reset_index(level=[0,1])
s5 = s5[s5['tradeType'] == '支出']
df5 = pd.DataFrame({'method':s5['tradeFuncNew'],'tradeCount':s5['id']})
df5['useRate'] = round(df5['tradeCount']/s5['id'].sum()*100,2)
return dict(bankCardList=df4_grouped.to_dict(orient='records'),tradeMethodRate=df5.to_dict(orient='records'))
优化最长未使用天数
import pandas as pd
def calculate_longest_inactive_days(df, result):
# 提取时间戳的辅助函数
def extract_time(result, key):
return pd.to_datetime(result.get('mailInfo', {}).get(key, {}).get('startTime', "")),
pd.to_datetime(result.get('mailInfo', {}).get(key, {}).get('endTime', ""))
# 提取支付宝和微信的时间范围
ALIPAY_starttime, ALIPAY_endtime = extract_time(result, 'alipay')
WECHAT_starttime, WECHAT_endtime = extract_time(result, 'wechat')
# 确保 'ymd' 列为日期时间类型并设置为索引
df.drop_duplicates(subset=['flowType','ymd'],inplace=True)
df['ymd'] = pd.to_datetime(df['ymd'])
df.set_index('ymd', inplace=True)
# 去重并保持索引
def longest_inactive_for_flow(flow_type, starttime, endtime):
# 按流程类型和时间范围筛选数据
df_flow = df[df['flowType'] == flow_type]
df_flow = df_flow[(df_flow.index >= starttime) & (df_flow.index <= endtime)]
# 如果没有数据,则返回0
if df_flow.empty:
return 0
# 添加开始时间和结束时间到交易日期列表中
transaction_dates = [starttime] + df_flow.index.tolist() + [endtime]
# 确保交易日期列表是有序的
transaction_dates.sort()
# 计算连续交易之间的天数差异
diff = pd.Series(transaction_dates).diff().dt.days
# 找出最大的间隔(最长未活跃天数)
longest_inactive = diff.max()
# 返回结果,确保不会出现负数或NaN
return longest_inactive - 1 if pd.notnull(longest_inactive) else 0
# 计算支付宝和微信的最长未活跃天数
longest_inactive_alipay = longest_inactive_for_flow('ALIPAY', ALIPAY_starttime, ALIPAY_endtime)
longest_inactive_wechat = longest_inactive_for_flow('WECHAT', WECHAT_starttime, WECHAT_endtime)
def empty(x):
if pd.isnull(x):
return "0"
elif float(x) < 0:
return "0"
elif str(x) == "nan":
return "0"
else:
return x
longest_inactive_wechat = empty(longest_inactive_wechat)
longest_inactive_alipay = empty(longest_inactive_alipay)
# 返回结果
return {
'ALIPAY': longest_inactive_alipay,
'WECHAT': longest_inactive_wechat
}
场景报告
def scene_dzsm(SJ):
phone = {
"苹果": "iphone",
"Apple": "iphone",
"iphone": "iphone",
"vivo": "vivo",
"OPPO": "OPPO",
"荣耀": "荣耀",
"Honor": "荣耀",
"小米": "小米",
"Xiaomi": "小米",
"华为": "华为",
"Huawei": "华为",
"三星": "三星",
"Samsung": "三星",
"红米": "红米",
"Redmi": "红米",
"MEIZU": "魅族",
"魅族": "魅族",
"一加": "一加",
"realme": "realme",
"IQOO": "IQOO"}
SJ['type'] = np.where(SJ['tradeDesc'].str.contains('安卓|王者荣耀|第一|王者送荣耀|荣耀积分|水果'), None, SJ['tradeDesc'].str.extract('(' + '|'.join(phone.keys()) + ')', expand=False).map(phone))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"电子数码",
"num": len(SJ1),
"explain":"目前主要是手机,后续将新增其他电子产品,包含主流手机品牌,根据相关产品推算。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_home(SJ):
home = ["物业","房东", "房租", "租房", "家政","水费","电费","燃气","天然气"]
SJ['type'] = np.where(SJ['tradeDesc'].str.contains('锅'), None, SJ['tradeDesc'].str.extract('(' + '|'.join(home) + ')', expand=False))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"住房情况",
"num": len(SJ1),
"explain":"通过用户住房相关信息进行推算。可大致判断用户住房情况。如租房还是自有以及房间大小。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_cxjt(SJ):
exclude_keywords = ["交通银行", "店", "学", "路"]
for keyword in exclude_keywords:
SJ = SJ[~SJ['counterparty'].str.contains(keyword, na=False)]
SJ = SJ[~SJ['tradeDesc'].str.contains(keyword, na=False)]
important_jt = ["停车","加油站","加油卡","洗车","高速","ETC","交警","4S店","石油","石化","行车记录仪","车贷","汽车金融"]
other_jt = ["轻轨","共享单车","打车","哈啰单车","哈啰出行","摩拜单车","滴滴平台","滴滴出行","滴滴代驾","滴滴顺风车","滴滴网约车","高德地图打车",
"高德代驾","顺风车","机票","租车","轮船","携程","同程","飞猪旅行","铁路12306","途牛旅行","中铁","交通"]
important_SJ = SJ[
SJ['counterparty'].str.contains('|'.join(important_jt)) |
SJ['tradeDesc'].str.contains('|'.join(important_jt))
]
important_tradeNos = set(important_SJ['tradeNo'].tolist())
other_SJ = SJ[
SJ['counterparty'].str.contains('|'.join(other_jt)) |
SJ['tradeDesc'].str.contains('|'.join(other_jt))
]
other_tradeNos = set(other_SJ['tradeNo'].tolist())
# 合并两个集合,并保持重要交易编号的优先级
sorted_tradeNos = list(important_tradeNos)+list(important_tradeNos.union(other_tradeNos) - important_tradeNos)
jg = {
"type": "交通出行",
"num": len(sorted_tradeNos),
"explain": "考虑用户出行的交通工具,也可判断用户是否有车",
"tradeNo": sorted_tradeNos
}
return jg
def scene_dspt(SJ):
ds = ["淘宝","tb","抖音小店","抖音生活","拼多多","京东商城","京东到家","唯品会","苏宁易购"]
SJ['type'] = np.where(SJ['counterparty'].str.contains('现金|提现|店|运费|补偿金'), None, SJ['counterparty'].str.extract('(' + '|'.join(ds) + ')', expand=False))
SJ['tradeDesc'].fillna('', inplace=True)
SJ = SJ[~SJ['tradeDesc'].str.contains('现金|提现|店|运费|补偿金')]
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"电商网购",
"num": len(SJ1),
"explain":"展示用户从主流购物平台购买记录,通过商品正常价格,对比用户购买价格可以推算其消费水平。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_ylbx(SJ):
bx = ["社保",
"医疗",
"医保",
"保险",
"事故理赔"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ['type'] = SJ['counterparty'].apply(lambda x: find_first_keyword(x, bx))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"保险理赔",
"num": len(SJ1),
"explain":"主要分析用户在医疗以及保险类情况,出现大量医疗交易或理赔交易请注意。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_qssj(SJ):
SJ2=SJ[SJ['tradeDesc'].str.contains("红包|转账")]
keywords=["母后","母亲大人","母上大人","母亲","妈","妈妈","老妈","妈咪","发给妈妈","发给妈","A妈","发给老妈","A妈妈","老妈子","我妈","美女妈","俺妈","孩子妈","父皇","父亲",
"爸","爸爸","老爸","A爸爸","爸比","发给爸","我爸爸","发给爸爸","孩子爸","发给老爸","老爸。","哥","哥哥","大哥","二哥","三哥","弟","弟弟","老弟","小老弟","小弟","三弟","姐","姐姐",
"大姐","二姐","老姐","妹妹","老妹","妹","爷爷","奶奶",'爹','老爹','老娘','娘','爹地']
SJ2=SJ2[SJ2['counterparty'].isin(keywords)]
jg = {
"type":"亲属社交",
"num": len(SJ2),
"explain":"目前主要包含直系亲属的红包转账记录。",
"tradeNo":SJ2['tradeNo'].tolist()}
return jg
def scene_jysj(SJ):
sj = ["陌陌","觅伊","Tinder","探探","Soul","世纪佳缘","百合网","珍爱网","有缘网","花椒直播","Blued","Grindr"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ['type'] = SJ['counterparty'].apply(lambda x: find_first_keyword(x, sj))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"交友社交",
"num": len(SJ1),
"explain":"包含非主流交友平台以及直播平台。若出现大量交易。请注意。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_bc(SJ):
bc = ["赌博","彩票"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ['type'] = SJ['counterparty'].apply(lambda x: find_first_keyword(x, bc))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"博彩类",
"num": len(SJ1),
"explain":"包含目前主流彩票、赌博类网站及软件。若出现大额大量交易记录。请注意。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def scene_wy(SJ):
wy = ["腾讯计算机","腾讯天游","网易藏宝阁","网易游戏","网易雷火","上海米哈游","盛天网络","巨人网络","江西贪玩","竞技世界","益世界","莉莉丝游戏","边锋网络","Steam"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ['type'] = SJ['counterparty'].apply(lambda x: find_first_keyword(x, wy))
SJ=SJ[SJ['type'].notna()]
jg = {
"type":"网游类",
"num": len(SJ),
"explain":"包含市面上主流游戏厂商,若出现大量以及大额交易请注意。",
"tradeNo": SJ['tradeNo'].tolist()}
return jg
def scene_zb(SJ):
import pandas as pd
conditions = [
(SJ['counterparty'].str.contains('抖音')) & (SJ['tradeDesc'].str.contains('抖币')),
(SJ['counterparty'].str.contains('微播')) & (SJ['tradeDesc'].str.contains('抖币')),
(SJ['counterparty'].str.contains('快手')) & (SJ['tradeDesc'].str.contains('快币')),
(SJ['counterparty'].str.contains('YY直播')) & (SJ['tradeDesc'].str.contains('充值')),
(SJ['counterparty'] == '九秀直播'),
(SJ['counterparty'] == '花椒直播'),
(SJ['counterparty'] == '广州虎牙信息科技有限公司'),
(SJ['counterparty'] == '斗鱼直播'),
(SJ['counterparty'] == '奇秀直播'),
(SJ['counterparty'] == '觅秀直播'),
(SJ['counterparty'] == '映客直播')]
通过列表推导式筛选数据
filtered_data = [SJ[condition] for condition in conditions]
合并筛选后的数据
combined_data = pd.concat(filtered_data, ignore_index=True)
jg = {
"type":"直播打赏",
"num": len(combined_data),
"explain":"主要分析用户直播充值行为",
"tradeNo": combined_data['tradeNo'].tolist()}
return jg
def scene_repay(SJ):
bx = ["还款"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ['type'] = SJ['tradeDesc'].apply(lambda x: find_first_keyword(x, bx))
SJ1=SJ[SJ['type'].notna()]
jg = {
"type":"履约还款",
"num": len(SJ1),
"explain":"评估用户还款意愿和还款能力。若出现大量还款记录,评估用户还款压力。",
"tradeNo": SJ1['tradeNo'].tolist()}
return jg
def error(df):
df1=df.copy()
df1 = df1[df1['platformType']"租机"]
df1 = df1[df1['tradeType'] == '不计收支']
df1 = df1[df1['tradeAmount'] > 100]
df1 = df1[~df1['tradeDesc'].str.contains('|'.join(['冻结', '退款', '解冻', '押金', '撤销', '取消', '完成', '驳回', '预授权', '预授权冻结']), case=False, na=False)]
df1=df1[["tradeTime","counterparty","tradeDesc","tradeAmount","flowType","tradeFunc","tradeType"]]
df2=df.copy()
df2 = df2[df2['platformType']"租机"]
df2 = df2[df2['tradeAmount'] > 0]
df2 = df2[df2['tradeDesc'].str.contains('|'.join(['滞纳金','罚款','逾期','违约','罚息']), case=False, na=False)]
df2=df2[["tradeTime","counterparty","tradeDesc","tradeAmount","flowType","tradeFunc","tradeType"]]
return dict(errortrade=df1.to_dict(orient='records'),overday=df2.to_dict(orient='records'))
def preprocess_columns(df):
# 替换'counterparty'列中的特定值
replacements = {
"商家精选": "人人租(商家精选)",
"南鑫数码": "得心租物(南鑫数码)",
"永圣科技": "永圣租机(永圣科技)",
"苏州品趣": "迅瓜租物(苏州品趣)"
}
df['counterparty'] = df['counterparty'].replace(replacements)
# 移除'counterparty'和'tradeDesc'列中的双引号和反斜杠
df['counterparty'] = df['counterparty'].str.replace('"', '')
df['tradeDesc'] = df['tradeDesc'].str.replace('"', '')
df['counterparty'] = df['counterparty'].replace('\', '')
df['tradeDesc'] = df['tradeDesc'].replace('\', '')
return df
租机报告
import re
提取当前期数
def currentterm(text):
# Case 1: 第 + 数字
match = re.search(r'第(\d+)', text)
if match:
return int(match.group(1))
# Case 2: 数字/数字
match = re.search(r'(\d+)/\d+', text)
if match:
return int(match.group(1))
# Case 3: 关键词'期'后紧跟冒号':'的数字
match = re.search(r'租期:(\d+)', text)
if match:
return int(match.group(1))
# Case 4: 冒号加中括号'[]'内的数字
match = re.search(r'期次:\s[(\d+(?:,\s\d+)*)]', text)
if match:
numbers_str = match.group(1)
numbers = re.findall(r'\d+', numbers_str)
if numbers:
return int(numbers[0])
# Case 5: 关键词'计划'后面的数字
match = re.search(r'计划\s+(\d+)', text)
if match:
return int(match.group(1))
# Case 6: 如果以上情况都不符合,返回默认值1
return 1
提取总期数
import re
def totalterm(text):
# 情况1: 匹配 "共+数字+期" 格式并提取数字
match1 = re.search(r'共(\d+)期', text)
if match1:
return int(match1.group(1))
# 情况2: 匹配 "数字+/+数字" 格式并提取第二个数字
match2 = re.search(r'(\d+)/(\d+)', text)
if match2:
return int(match2.group(2))
# 如果没有匹配到任何情况,返回None
return "先暂定"
根据当前期数填充总期数
import pandas as pd
def fill_total_periods(df):
# 定义总期数的规则
periods_rules = [(12, 12), (24, 24), (36, 36), (48, 48), (96, 96), (120, 120), (240, 240), (360, 360), (720, 720)]
# 遍历每一行数据,根据规则填充总期数
for index, row in df.iterrows():
current_period = row['term']
if row['toadterm'] == '先暂定':
for period in periods_rules:
if current_period <= period[0]:
df.at[index, 'toadterm'] = period[1]
break
return df
找订单号,一般都是最前面
import re
def extract_data(s):
long_number_match = re.search(r'\d{9,}', s)
if long_number_match:
return long_number_match.group()
# 如果没有找到任何匹配项,返回空列表
return "暂定"
订单号自增id,除了暂定往后增加
def replace_zanding(df, column_name):
counter = 1
for index, row in df.iterrows():
if "暂定" in row[column_name]:
df.at[index, column_name] = counter
counter += 1
return df
import pandas as pd
定义单个平台里面有几个手机,也就是看哪些手机能够合并。
from datetime import datetime
示例数据
def label_phones(group):
start_keywords = {'冻结', '预授权', '押金'}
end_keywords = {'解冻', '退款', '取消', '撤销', '驳回', '关闭'}
labels = {}
label = 1
# 按时间排序
group = group.sort_values(by='tradeTime')
marked = pd.Series(False, index=group.index)
# 处理优先级最高的条件(关键词)
i = 0
while i < len(group):
if marked.iloc[i]:
i += 1
continue
current_row = group.iloc[i]
if any(keyword in current_row['tradeDesc'] for keyword in start_keywords):
j = i + 1
marked.iloc[i] = True
labels[group.index[i]] = f'第{label}台手机'
while j < len(group):
next_row = group.iloc[j]
if any(keyword in next_row['tradeDesc'] for keyword in end_keywords):
labels[group.index[j]] = f'第{label}台手机'
marked.iloc[j] = True
break
else:
labels[group.index[j]] = f'第{label}台手机'
marked.iloc[j] = True
j += 1
label += 1
i += 1
# 处理剩下的记录
remaining_group = group[~group.index.isin(labels.keys())]
# 第一步:按金额标记
tradeAmount_based_phones = remaining_group.drop_duplicates(subset='tradeAmount')
tradeAmount_labels = {}
for index, row in tradeAmount_based_phones.iterrows():
tradeAmount_labels[row['tradeAmount']] = f'第{label}台手机'
label += 1
for index, row in remaining_group.iterrows():
if index not in labels:
labels[index] = tradeAmount_labels.get(row['tradeAmount'], f'第{label}台手机')
label += 1
# 第二步:按订单号标记
remaining_group = remaining_group[~remaining_group.index.isin(labels.keys())]
order_counts = remaining_group['单号'].value_counts()
shared_orders = order_counts[order_counts > 1].index
for order in shared_orders:
for index in remaining_group[remaining_group['单号'] == order].index:
if index not in labels:
labels[index] = f'第{label}台手机'
label += 1
return labels
def apply_labels_to_platforms(data):
platforms = data['platformName'].unique()
results = []
for platform in platforms:
platform_data = data[data['platformName'] == platform].copy()
labels = label_phones(platform_data)
platform_data['phone_label'] = platform_data.index.map(labels).fillna('未标记')
results.append(platform_data)
final_result = pd.concat(results)
return final_result
import pandas as pd
计算单个平台取消手机数
def count_canceled_classes(df):
# 定义取消关键词
cancel_keywords = {'取消', '退款', '撤销', '驳回','解冻','关闭'}
# 检查描述中是否包含取消关键词
def is_canceled(description):
return any(keyword in description for keyword in cancel_keywords)
# 获取平台和手机编号的唯一组合
unique_class_student_combinations = df[['platformName', 'phone_label']].drop_duplicates().values.tolist()
# 记录被取消的手机编号组合
canceled_students = set()
for class_name, student_name in unique_class_student_combinations:
# 筛选出该手机编号的所有描述
descriptions = df[(df['platformName'] == class_name) & (df['phone_label'] == student_name)]['tradeDesc']
# 检查该手机编号是否存在的所有描述是否都包含取消关键词
if any(is_canceled(desc) for desc in descriptions):
canceled_students.add((class_name, student_name))
return len(canceled_students)
定义中间数据,主要是计算在租和剩余本金。所以去掉一些押金和取消的。
def middle(df):
# 定义取消关键词
cancel_keywords = {'取消', '退款', '撤销', '驳回','解冻','关闭'}
# 检查描述中是否包含取消关键词
def is_canceled(description):
return any(keyword in description for keyword in cancel_keywords)
# 创建一个新的DataFrame来保存不包含取消的数据
filtered_df = pd.DataFrame(columns=df.columns)
# 获取平台和手机编号的唯一组合
unique_class_student_combinations = df[['platformName', 'phone_label']].drop_duplicates().values.tolist()
for class_name, student_name in unique_class_student_combinations:
# 筛选出该手机编号的所有描述
descriptions = df[(df['platformName'] == class_name) & (df['phone_label'] == student_name)]['tradeDesc']
# 检查该手机编号是否所有描述都不包含取消关键词
if not any(is_canceled(desc) for desc in descriptions):
# 如果没有取消,则将该学生的所有相关行添加到新的DataFrame中
filtered_df = pd.concat([filtered_df, df[(df['platformName'] == class_name) & (df['phone_label'] == student_name)]])
return filtered_df[['tradeType','tradeDesc','ymd','tradeAmount','platformName','platformType','term','toadterm','单号','tradeTime']]
定义在租
def rent(df_with_phone_counts):
current_time = datetime.now()
df_with_phone_counts['时间'] = pd.to_datetime(df_with_phone_counts['ymd'])
df_with_phone_counts['天数差'] = (current_time - df_with_phone_counts['时间']).dt.days
# 将天数差转换为月份差,30天或以上算作一个月
df_with_phone_counts['月份差'] = df_with_phone_counts['天数差'] // 30
# 将月份差加到期数列中
df_with_phone_counts['新的期数'] = df_with_phone_counts['term'] + df_with_phone_counts['月份差']
condition1 = df_with_phone_counts['新的期数'] >= df_with_phone_counts['toadterm']
# 检查描述中是否包含"完成"或"提前缴费"
condition2 = df_with_phone_counts['tradeDesc'].str.contains('完成|买断|结清', na=False)
# 计算满足任一条件的学生数量,确保每个学生只被计算一次
completed_payment_students = df_with_phone_counts[condition1 | condition2].drop_duplicates(subset=['platformName', 'phone_label'])
return completed_payment_students.shape[0]
押金
def sum_amount_with_keywords(df):
# 检查描述列是否包含关键词列表中的任意一个
keywords = ['冻结', '解冻', '押金', '预授权']
mask = df['tradeDesc'].str.contains('|'.join(keywords), na=False)
# 使用mask过滤出符合条件的金额,并进行求和
total_amount = df.loc[mask, 'tradeAmount'].sum()
return total_amount
已经支付金额
def sum_amount_with_keywords_used(df):
# 检查描述列是否包含关键词列表中的任意一个
keywords = ['取消','退款','撤销','驳回']
mask = ~df['tradeDesc'].str.contains('|'.join(keywords), na=False)
# 使用mask过滤出符合条件的金额,并进行求和
total_amount = df.loc[mask, 'tradeAmount'].sum()
return total_amount
调用异常金额
def errors(df):
df1=df.copy()
df1 = df1[df1['platformType']"租机"]
df1 = df1[df1['tradeType'] == '不计收支']
df1 = df1[df1['tradeAmount'] > 100]
df1 = df1[~df1['tradeDesc'].str.contains('|'.join(['冻结', '退款', '解冻', '押金', '撤销', '取消', '完成', '驳回', '预授权', '预授权冻结']), case=False, na=False)]
df1=df1[["ymd","platformName","tradeDesc","tradeAmount","tradeType"]]
df2=df.copy()
df2 = df2[df2['platformType']"租机"]
df2 = df2[df2['tradeDesc'].str.contains('|'.join(['滞纳金','罚款','逾期','违约','罚息']), case=False, na=False)]
df2=df2[["ymd","platformName","tradeDesc","tradeAmount","tradeType"]]
dd=pd.concat([df1,df2],axis=0)
amount=dd['tradeAmount'].sum()
return amount
计算剩余总金额
def process_dataframe(df):
# 定义需要去除的关键词
keywords_to_remove = ['取消', '退款', '撤销', '驳回']
# 创建一个函数,用于检查描述是否包含关键词
def contains_keywords(description):
return any(keyword in description for keyword in keywords_to_remove)
# 过滤掉描述中包含关键词的行
df_filtered = df[~df['tradeDesc'].apply(contains_keywords)]
# 计算每个学生的缴费总金额和缴费次数
current_time = datetime.now()
df['时间'] = pd.to_datetime(df['ymd'])
df['天数差'] = (current_time - df['时间']).dt.days
# 将天数差转换为月份差,30天或以上算作一个月
df['月份差'] = df['天数差'] // 30
# 将月份差加到期数列中
df['新的期数'] = df['term'] + df['月份差']
df['maxterm']=df.groupby(['platformName','phone_label'])['新的期数'].transform(max)
df['maxtoadterm']=df.groupby(['platformName','phone_label'])['toadterm'].transform(max)
condition1 = df['maxterm'] >= df['maxtoadterm']
# 检查描述中是否包含"完成"或"提前缴费"
condition2 = df['tradeDesc'].str.contains('完成|结清', na=False)
# 计算满足任一条件的学生数量,确保每个学生只被计算一次
completed_payment_students = df[condition1 | condition2].drop_duplicates(subset=['platformName', 'phone_label'])
df1_unique = completed_payment_students.drop_duplicates(subset=['platformName', 'phone_label'])[['platformName','phone_label']]
# 执行合并操作,并添加一个指示器列,找出df2中存在于df1_unique中的行
merged_df = df_filtered.merge(df1_unique, on=['platformName', 'phone_label'], how='left', indicator=True)
# 筛选出只在df2中存在的行
df_filtered = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])
df_filtered['缴费总金额'] = df_filtered.groupby(['platformName', 'phone_label'])['tradeAmount'].transform('sum')
df_filtered['缴费次数'] = df_filtered.groupby(['platformName', 'phone_label'])['tradeAmount'].transform('count')
# 计算平均金额
df_filtered['平均金额'] = df_filtered['缴费总金额'] / df_filtered['缴费次数']
# 删除中间列
df_filtered = df_filtered.drop(columns=['缴费总金额', '缴费次数'])
df_filtered['时间'] = pd.to_datetime(df_filtered['ymd'])
df_filtered['天数差'] = (current_time - df_filtered['时间']).dt.days
# 将天数差转换为月份差,30天或以上算作一个月
df_filtered['月份差'] = df_filtered['天数差'] // 30
# 将月份差加到期数列中
df_filtered['新的期数'] = df_filtered['term'] + df_filtered['月份差']
df_filtered['maxterm']=df_filtered.groupby(['platformName','phone_label'])['新的期数'].transform(max)
df_filtered['maxtoadterm']=df_filtered.groupby(['platformName','phone_label'])['toadterm'].transform(max)
df_filtered['maxterm'] = df_filtered.apply(lambda row: row['maxtoadterm'] if row['maxterm'] > row['maxtoadterm'] else row['maxterm'],axis=1)
df_filtered['unuseeterm']=df_filtered['maxtoadterm']-df_filtered['maxterm']
df_filtered['unuseamount']=df_filtered['unuseeterm']*df_filtered['平均金额']
return df_filtered
def round_to_range(number):
# 四舍五入到最接近的百位数
if number < 100:
number += 100
rounded_number = round(number / 100) * 100
# 计算范围
lower_bound = rounded_number - 100
upper_bound = rounded_number + 100
return f"{lower_bound}~{upper_bound}"
def update_total_issues(df):
# 通过平台和手机号分组
grouped = df.groupby(['platformName', 'phone_label'])
# 初始化一个空的DataFrame,用于存储更新后的数据
updated_df = pd.DataFrame(columns=df.columns)
for (platformName, phone_label), group in grouped:
# 检查期数是否严格递增
if not group['term'].is_monotonic_increasing:
# 如果不是,则取此平台此手机最大期数作为总期数
max_issue = group['term'].max()
# 更新总期数
group['toadterm'] = max_issue
# 将更新后的组数据添加到结果DataFrame中
updated_df = pd.concat([updated_df, group])
return updated_df
def update_periods(df):
new_periods = []
# 遍历每个分组
for (platformName, phone_label), group in df.groupby(['platformName', 'phone_label']):
group = group.reset_index(drop=True)
updated_group = group.copy()
# 遍历每组中的记录
for i in range(len(group)):
if i > 0 and group.iloc[i-1]['term'] > group.iloc[i]['term']:
updated_group.at[i-1, 'term'] = 1
new_periods.extend(updated_group['term'])
df['term'] = new_periods
return df
def is_df_empty_or_null(df):
"""
检查 DataFrame 是否为空或者只包含空值。
参数:
df (pd.DataFrame): 要检查的 DataFrame。
返回:
bool: 如果 DataFrame 为空或者只包含空值,则返回 True,否则返回 False。
"""
if df is None:
return True
if df.empty:
return True
if df.isnull().all().all():
return True
return False
def zj(SJ1,day:int,result):
global pd
SJ=SJ1[SJ1['platformType']'租机']
keywords_to_exclude = '糯米饭|锅巴|充电宝|水产|水果|肉夹馍|凉皮|便利|面|超市|私房菜|简茶|饰品|挖机|网咖|网吧'
SJ = SJ[~SJ['counterparty'].str.contains(keywords_to_exclude) & ~SJ['tradeDesc'].str.contains(keywords_to_exclude)]
if is_df_empty_or_null(SJ):
return None
SJ['tradeAmount'] = pd.to_numeric(SJ['tradeAmount'], errors='coerce')
SJ['tradeAmount'].fillna(0, inplace=True)
#计算当前期数,没有则为1
SJ['term']=SJ['tradeDesc'].apply(currentterm)
#针对异常当前期数进行修正
SJ['term'].fillna('1',inplace=True)
SJ['term']=SJ['term'].apply(lambda x:1 if float(x) <=0 else 1 if len(str(x))>3 else x)
#提取总期数
SJ['toadterm']=SJ['tradeDesc'].apply(totalterm)
#提取单号
SJ['单号'] = SJ['tradeDesc'].apply(extract_data)
#针对总期数进行优化
SJ = fill_total_periods(SJ)
#针对单号进行优化,增加自增id。
SJ = replace_zanding(SJ,'单号')
#SJ2计算平台、申请次数、取消次数、通过次数四个指标。
SJ2 = SJ[~SJ['tradeDesc'].str.contains('收藏|福利')]
if is_df_empty_or_null(SJ2):
return None
SJ2.sort_values(by=['platformName','tradeTime'],inplace=True)
df_with_phone_counts_sq = apply_labels_to_platforms(SJ2)
# 填充空值
df_with_phone_counts_sq['phone_label'].fillna('未标记', inplace=True)
df_with_phone_counts_sq['tradeTime'] = pd.to_datetime(df_with_phone_counts_sq['tradeTime'])
df_with_phone_counts_sq['第一次申请时间'] = df_with_phone_counts_sq.groupby(['platformName', 'phone_label'])['tradeTime'].transform('min').dt.date
df_with_phone_counts_sq['第一次申请时间']=pd.to_datetime(df_with_phone_counts_sq['第一次申请时间'],format="yyyy-mm-dd")
# 设置应用日期
apply_date = applydate(result)
df_with_phone_counts_sq['发送时间'] = pd.to_datetime(apply_date)
# 计算与第一次申请时间的日期差异
df_with_phone_counts_sq['diff'] = (df_with_phone_counts_sq['发送时间'] - df_with_phone_counts_sq['第一次申请时间']).dt.days
df_with_phone_counts_sq=df_with_phone_counts_sq[df_with_phone_counts_sq['diff']<=day]
if is_df_empty_or_null(df_with_phone_counts_sq):
return None
applyptai=df_with_phone_counts_sq['platformName'].nunique()
applynum=df_with_phone_counts_sq.drop_duplicates(subset=['platformName','phone_label'])['phone_label'].count()
applyqxiao=count_canceled_classes(df_with_phone_counts_sq)
applytguo=applynum-applyqxiao
middleSJ=middle(df_with_phone_counts_sq)
if is_df_empty_or_null(middleSJ):
item = {
'day':day,
'apply_ptai': applyptai,
'apply_num':applynum,
'remove_num': applyqxiao,
'pass_num': applytguo,
'renting': 0,
'pre_amount':0,
'errors_amount':0,
'used_amound':0,
'remain_amount': 0,
'next_amount':0}
return item
middleSJ.sort_values(by=['platformName','tradeTime'],inplace=True)
middle_with_phone_counts = apply_labels_to_platforms(middleSJ)
middle_with_phone_counts = update_total_issues(middle_with_phone_counts)
middle_with_phone_counts = update_periods(middle_with_phone_counts)
rents = rent(middle_with_phone_counts)
total_sum = sum_amount_with_keywords(middle_with_phone_counts)
erroramount=errors(middle_with_phone_counts)
usedamount=sum_amount_with_keywords_used(middle_with_phone_counts)
df_processed = process_dataframe(middle_with_phone_counts)
totalamount=df_processed.drop_duplicates(subset=['platformName','phone_label'])['unuseamount'].sum()
nextamount=round_to_range(df_processed.drop_duplicates(subset=['platformName','phone_label'])['平均金额'].sum())
item = {
'day':day,
'apply_ptai': applyptai,
'apply_num':applynum,
'remove_num': applyqxiao,
'pass_num': applytguo,
'renting': applytguo-rents,
'pre_amount':round(total_sum,2),
'errors_amount':round(erroramount,2),
'used_amound':round(usedamount,2),
'remain_amount': round(totalamount,2),
'next_amount':nextamount}
return item
def zjReport(SJ1:pd.DataFrame,result):
import datetime as dt
import pandas as pd
global pd
days = [7,15,30,90,180,365]
zjReports = []
for d in days:
item = zj(SJ1,d,result)
if not item:
continue
zjReports.append(item)
df = pd.json_normalize(zjReports)
if len(df)0:
return []
all_days = pd.Series([7, 15, 30, 90, 180, 365])
# 重新索引数据框,确保所有必需的day值都存在
df = df.set_index('day').reindex(all_days).reset_index()
# 用反斜杠填充缺失的指标列
df.fillna("/",inplace=True)
df.rename({"index":"day"},axis=1,inplace=True)
return df.to_dict(orient='records')
def reportV20240501(records,keywords,platforms,brokers,result):
result.update(tpml())
df = pd.DataFrame(records).fillna('')
df=preprocess_columns(df)
df2=df[['counterparty','tradeDesc','tradeFunc','tradeNo']].copy()
scene_functions = [scene_qssj, scene_dzsm, scene_home, scene_cxjt, scene_dspt, scene_ylbx, scene_jysj, scene_bc,scene_wy, scene_zb, scene_repay]
merged_json = []
for func in scene_functions:
df3 = df2.copy()
scene_result = func(df3)
merged_json.append(scene_result)
result['newfield']['scences'] = merged_json
df = preProcessing(df,keywords,platforms,brokers)
df1=df.copy()
df4=df.copy()
result['newfield']['banknumbers']=banknumber(df)
result['newfield']['oftencard']=oftencard(df)
result['newfield']['longdays']=calculate_longest_inactive_days(df1,result)
result['newfield']['activeTimeCategory']=activeTime_month(df,result)
result['newfield']['activeTimeCategory1']=activeTime_hour(df)
result['newfield']['error']=error(df)
result['newfield']['zjreport']=zjReport(df4,result)
#result['newfield']['zjreport']=[]
result['sceneReport']['jtReport'] = jtReport(df,result)
result['mailInfo'] = mailInfo(df,result.get('mailInfo',{}))
result['billReport']['activeTime'] = activeTime(df,result.get('mailInfo',{}))
result['billReport']['tradeMethod'] =getTradeMethod(df)
result['billReport']['capital'] = capital(df,result)
result['billReport']['tradeType'] = tradeType(df)
result['billReport']['counterparty'] = counterparty(df)
result['sceneReport']['payLater'] = getPlatform(df,'先用后付')
result['sceneReport']['rent'] = getPlatform(df,'租机')
result['sceneReport']['load'] = getPlatform(df,"网贷")
result['sceneReport']['recycling'] = recyclingPlatform(df)
userTag = []
if len(result['sceneReport']['payLater'])>0:
userTag.append('先用后付')
if len(result['sceneReport']['rent'])>0:
userTag.append('租机')
if len(result['sceneReport']['load'])>0:
userTag.append('网贷')
result.setdefault('userInfo',{'userTag':'','performanceTag':''})
result['userInfo']['userTag'] = ','.join(userTag)
result = json.dumps(result,ensure_ascii=False)
return result
app = Flask(name, template_folder="")
app.json_encoder = CJsonEncoder
@app.post('/api/report/v1')
def report20240501():
data = request.json
# with open ('data.json','w',encoding='UTF-8') as fp:
# fp.write(json.dumps(data,ensure_ascii=False,indent=4))
reocrds = data['records']
keywords = data.get('keywords',[])
platforms = data.get('platform',[])
brokers = data.get('broker',[])
result = data.get('result',{})
return reportV20240501(records=reocrds,keywords=keywords,platforms=platforms,brokers=brokers,result=result)