航空公司客户价值分析

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
data=pd.read_csv('../air_data.csv')
data
#数据的描述性统计
explore=data.describe(percentiles=[],include='all').T
explore
count unique top freq mean std min 50% max
MEMBER_NO 62988.0 NaN NaN NaN 31494.5 18183.213715 1.0 31494.5 62988.0
FFP_DATE 62988 3068 2011/1/13 184 NaN NaN NaN NaN NaN
FIRST_FLIGHT_DATE 62988 3406 2013/2/16 96 NaN NaN NaN NaN NaN
GENDER 62985 2 48134 NaN NaN NaN NaN NaN
FFP_TIER 62988.0 NaN NaN NaN 4.102162 0.373856 4.0 4.0 6.0
WORK_CITY 60719 3309 广州 9385 NaN NaN NaN NaN NaN
WORK_PROVINCE 59740 1183 广东 17507 NaN NaN NaN NaN NaN
WORK_COUNTRY 62962 118 CN 57748 NaN NaN NaN NaN NaN
AGE 62568.0 NaN NaN NaN 42.476346 9.885915 6.0 41.0 110.0
LOAD_TIME 62988 1 2014/3/31 62988 NaN NaN NaN NaN NaN
FLIGHT_COUNT 62988.0 NaN NaN NaN 11.839414 14.049471 2.0 7.0 213.0
BP_SUM 62988.0 NaN NaN NaN 10925.081254 16339.486151 0.0 5700.0 505308.0
EP_SUM_YR_1 62988.0 NaN NaN NaN 0.0 0.0 0.0 0.0 0.0
EP_SUM_YR_2 62988.0 NaN NaN NaN 265.689623 1645.702854 0.0 0.0 74460.0
SUM_YR_1 62437.0 NaN NaN NaN 5355.376064 8109.450147 0.0 2800.0 239560.0
SUM_YR_2 62850.0 NaN NaN NaN 5604.026014 8703.364247 0.0 2773.0 234188.0
SEG_KM_SUM 62988.0 NaN NaN NaN 17123.878691 20960.844623 368.0 9994.0 580717.0
WEIGHTED_SEG_KM 62988.0 NaN NaN NaN 12777.152439 17578.586695 0.0 6978.255 558440.14
LAST_FLIGHT_DATE 62988 731 2014/3/31 959 NaN NaN NaN NaN NaN
AVG_FLIGHT_COUNT 62988.0 NaN NaN NaN 1.542154 1.786996 0.25 0.875 26.625
AVG_BP_SUM 62988.0 NaN NaN NaN 1421.440249 2083.121324 0.0 752.375 63163.5
BEGIN_TO_FIRST 62988.0 NaN NaN NaN 120.145488 159.572867 0.0 50.0 729.0
LAST_TO_END 62988.0 NaN NaN NaN 176.120102 183.822223 1.0 108.0 731.0
AVG_INTERVAL 62988.0 NaN NaN NaN 67.749788 77.517866 0.0 44.666667 728.0
MAX_INTERVAL 62988.0 NaN NaN NaN 166.033895 123.39718 0.0 143.0 728.0
ADD_POINTS_SUM_YR_1 62988.0 NaN NaN NaN 540.316965 3956.083455 0.0 0.0 600000.0
ADD_POINTS_SUM_YR_2 62988.0 NaN NaN NaN 814.689258 5121.796929 0.0 0.0 728282.0
EXCHANGE_COUNT 62988.0 NaN NaN NaN 0.319775 1.136004 0.0 0.0 46.0
avg_discount 62988.0 NaN NaN NaN 0.721558 0.185427 0.0 0.711856 1.5
P1Y_Flight_Count 62988.0 NaN NaN NaN 5.766257 7.210922 0.0 3.0 118.0
L1Y_Flight_Count 62988.0 NaN NaN NaN 6.073157 8.175127 0.0 3.0 111.0
P1Y_BP_SUM 62988.0 NaN NaN NaN 5366.72055 8537.773021 0.0 2692.0 246197.0
L1Y_BP_SUM 62988.0 NaN NaN NaN 5558.360704 9351.956952 0.0 2547.0 259111.0
EP_SUM 62988.0 NaN NaN NaN 265.689623 1645.702854 0.0 0.0 74460.0
ADD_Point_SUM 62988.0 NaN NaN NaN 1355.006223 7868.477 0.0 0.0 984938.0
Eli_Add_Point_Sum 62988.0 NaN NaN NaN 1620.695847 8294.398955 0.0 0.0 984938.0
L1Y_ELi_Add_Points 62988.0 NaN NaN NaN 1080.378882 5639.857254 0.0 0.0 728282.0
Points_Sum 62988.0 NaN NaN NaN 12545.7771 20507.8167 0.0 6328.5 985572.0
L1Y_Points_Sum 62988.0 NaN NaN NaN 6638.739585 12601.819863 0.0 2860.5 728282.0
Ration_L1Y_Flight_Count 62988.0 NaN NaN NaN 0.486419 0.319105 0.0 0.5 1.0
Ration_P1Y_Flight_Count 62988.0 NaN NaN NaN 0.513581 0.319105 0.0 0.5 1.0
Ration_P1Y_BPS 62988.0 NaN NaN NaN 0.522293 0.339632 0.0 0.514252 0.999989
Ration_L1Y_BPS 62988.0 NaN NaN NaN 0.468422 0.338956 0.0 0.476747 0.999993
Point_NotFlight 62988.0 NaN NaN NaN 2.728155 7.364164 0.0 0.0 140.0
from datetime import datetime
ffp=data['FFP_DATE'].apply(lambda x:datetime.strptime(x,'%Y/%m/%d'))
ffp_year=ffp.map(lambda x : x.year)
#绘制各年份会员入会人数直方图
fig=plt.figure(figsize=(8,5))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False
plt.hist(ffp_year, bins='auto', color='#111111')
plt.xlabel('年份')
plt.ylabel('入会人数')
plt.title('各年份会员入会人数 2019320143322李之琛')
plt.show()
plt.close

<function matplotlib.pyplot.close(fig=None)>
male=pd.value_counts(data['GENDER'])['男']
female=pd.value_counts(data['GENDER'])['女']
fig = plt.figure(figsize = (7 ,4))  # 设置画布大小
plt.pie([ male, female], labels=['男','女'], colors=['lightskyblue', 'lightcoral'],
       autopct='%1.1f%%')
plt.title('会员性别比例2019320143322李之琛')
plt.show()
plt.close

<function matplotlib.pyplot.close(fig=None)>
 #提取属性并合并为新数据集
data_corr = data[['FFP_TIER','FLIGHT_COUNT','LAST_TO_END',
                  'SEG_KM_SUM','EXCHANGE_COUNT','Points_Sum']]
age1 = data['AGE'].fillna(0)
data_corr['AGE'] = age1.astype('int64')
data_corr['ffp_year'] = ffp_year

# 计算相关性矩阵
dt_corr = data_corr.corr(method = 'pearson')
print('相关性矩阵为:\n',dt_corr)

# 绘制热力图
import seaborn as sns
plt.subplots(figsize=(10, 10)) # 设置画面大小
sns.heatmap(dt_corr, annot=True, vmax=1, square=True, cmap='Blues')
plt.title('热力3322李之琛')
plt.show()
plt.close()
C:\Users\reion\AppData\Local\Temp\ipykernel_21832\391820817.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_corr['AGE'] = age1.astype('int64')
C:\Users\reion\AppData\Local\Temp\ipykernel_21832\391820817.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_corr['ffp_year'] = ffp_year


相关性矩阵为:
                 FFP_TIER  FLIGHT_COUNT  LAST_TO_END  SEG_KM_SUM  \
FFP_TIER        1.000000      0.582447    -0.206313    0.522350   
FLIGHT_COUNT    0.582447      1.000000    -0.404999    0.850411   
LAST_TO_END    -0.206313     -0.404999     1.000000   -0.369509   
SEG_KM_SUM      0.522350      0.850411    -0.369509    1.000000   
EXCHANGE_COUNT  0.342355      0.502501    -0.169717    0.507819   
Points_Sum      0.559249      0.747092    -0.292027    0.853014   
AGE             0.076245      0.075309    -0.027654    0.087285   
ffp_year       -0.116510     -0.188181     0.117913   -0.171508   

                EXCHANGE_COUNT  Points_Sum       AGE  ffp_year  
FFP_TIER              0.342355    0.559249  0.076245 -0.116510  
FLIGHT_COUNT          0.502501    0.747092  0.075309 -0.188181  
LAST_TO_END          -0.169717   -0.292027 -0.027654  0.117913  
SEG_KM_SUM            0.507819    0.853014  0.087285 -0.171508  
EXCHANGE_COUNT        1.000000    0.578581  0.032760 -0.216610  
Points_Sum            0.578581    1.000000  0.074887 -0.163431  
AGE                   0.032760    0.074887  1.000000 -0.242579  
ffp_year             -0.216610   -0.163431 -0.242579  1.000000  

posted @ 2023-03-12 21:53  里列昂遗失的记事本  阅读(33)  评论(0编辑  收藏  举报