淘宝双12购物数据分析

一、选题的背景

  随着社会的发展,数据化时代,线上购物比例增幅巨大,各种的线上购物节,6.18、双十一、双十二等等。因此分析双十二活动日时段各行为数量的走势和对比,以及活动前后的每日流量对比。

二、大数据分析设计方案

1.pv、uv的趋势对比1212活动前后的每日流量对比流量影响因素。
2.活动前后购买率用户行为偏好(时间维度)日时段各行为数量的走势和对比。
3.活动前后用户转化率对比。

三、数据分析步骤

  数据源:https://pan.baidu.com/s/10NsxiSeQekFzELgWFP7-xg?pwd=lfqm

  导入必要的模块

1
2
3
4
5
6
7
8
import pandas as pd
import numpy as np
import datetime
from pyecharts.charts import *
from pyecharts.globals import ThemeType
from pyecharts.charts import Bar
from pyecharts.charts import Funnel
from pyecharts import options as opts

 

  导入和查看数据

1
2
df = pd.read_csv(r'tianchi_fresh_comp_train_user.csv')
df.info()

  

1
df.head()

  

 

  

1
df.describe()

  

 

  数据清洗

  删除重复值

1
2
df.drop_duplicates(inplace=True)
df.head()

  

 

 

   删除null值 因本项目不涉及地区方面的分析,所以可以不删user_geohash 的null值

1
df.isnull().sum()

  

 

   将df.time转成日期格式

1
2
df['time']=pd.to_datetime(df['time'])
df.info()

  

 

   增加辅助列:日期date、时段hour (datetime模块)

1
2
3
df['date'] = df.time.dt.date
df['hour'] = df.time.dt.hour
df.head()

 

 

   将某些字段转换成字符串格式

1
2
3
4
df['user_id'] = df.user_id.values.astype('str')
df['item_id'] = df.item_id.values.astype('str')
df['behavior_type'] = df.behavior_type.values.astype('str')
df.info()

  

 

 

1
df['item_category'] = df.item_category.values.astype('str')

  

  数据可视化

  每日pv、uv

  双y轴双曲线图

1
2
3
4
5
6
7
8
9
#提取数据
pv_day = df[df.behavior_type =='1'].groupby('date')['behavior_type'].count()
uv_day = df[df.behavior_type =='1'].drop_duplicates(['user_id','date']).groupby('date')['user_id'].count()
#转换成图表所需的格式(list)
#1、日期(list.index)
date = pv_day.index
#2、pv、uv(list.values)
pv = np.around(pv_day.values/10000,decimals=2)
uv = np.around(uv_day.values/10000,decimals=2)

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#制作图表
x=col_hour.index.tolist()
y1=np.around(view_hour.values/29,decimals=0).tolist()
y8=np.around(view_active_ahour.values/2,decimals=0).tolist()
 
bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add_xaxis(xaxis_data=x)
    .add_yaxis(
    "日常PV",
        y1,
        stack='stack1',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .add_yaxis(
    "双12PV",
        y8,
        stack='stack1',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="日常和双12每日时段PV走势对比"),
        legend_opts=opts.LegendOpts(pos_top='5%'),
        yaxis_opts=opts.AxisOpts(name='人次',
                                axislabel_opts=opts.LabelOpts(formatter="{value}"))
    )
)
 
bar.render_notebook()

  

 

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 制作图表
x=list(date)
y1=pv
y2=uv
pvuv_day_line = (Line(init_opts=opts.InitOpts(theme=ThemeType.DARK)) #主题设置
       .add_xaxis(x) #x轴数据源
       .add_yaxis('pv',#图例名字
                  y1, #y1轴数据源
                  label_opts=opts.LabelOpts(is_show=False) #不显示数据标签
                 )
       .add_yaxis('uv',#图例名字
                  yaxis_index=1, #Y的双轴1号索引(区别于y1轴)
                  y_axis=y2, #y2轴数据源
                  label_opts=opts.LabelOpts(is_show=False) #不显示数据标签
                 )
        .extend_axis( #y2的轴设置
                    yaxis=opts.AxisOpts(
                                        name='uv',#轴名字
                                        min_=0,#轴起点值
                                        max_=1.6, #轴最大值
                                        interval=0.4, #轴区间间隔
                                        axislabel_opts=opts.LabelOpts(formatter="{value} 万人") #轴数据标签格式设置
                                         )
                    )
        .set_global_opts( #全局设置
                        tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'), #随鼠标位置显示xy轴的数据、聚焦形式(交叉)
                        xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),#随鼠标位置凸显x轴长条、凸显形式(阴影)
                        yaxis_opts=opts.AxisOpts(name='pv',axislabel_opts=opts.LabelOpts(formatter="{value} 万次")),#y1轴(默认轴)名字、轴数据标签格式设置
                        title_opts=opts.TitleOpts(title="每日pv和uv") #标题       
                         )             
            )
pvuv_day_line.render_notebook() #展示图表

  

  分析:

  1、双12活动的流量主要集中于12月11日和12日,10日之前并没有太大的上涨,特别是uv的涨幅甚小。建议优化商品预热的活动方案,提前吸引人流进店挑选商品;

  2、活动期间pv比uv波动较大,活动引流效应明显;

  3、13日之后pv、uv均回落至比活动前稍高的水平;

 

  每日pv、uv增量

  双y轴双曲线图

1
2
3
4
5
6
#提取数据
#联结pv、uv
pv_uv= pd.merge(pv_day,uv_day,on='date',how='outer')
#向下作差(后一日减前一日)
new_pv_uv = pv_uv.diff()
new_pv_uv.columns=['new_pv','new_uv']

  

 

  分析:

  1、活动前有两波收藏、加购小高峰(11.23-25,11.30-12.3),邻近活动,加购数明显上涨,12当天购买人数超活动前的日均加购数。

 

  日常和双12日均各时段用户行为趋势对比

  曲线图

 

df['date'] = pd.to_datetime(df['date'])
#提取数据
#日常各时段行为总量
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
view_hour = daily_df[daily_df.behavior_type =='1'].groupby('hour')['behavior_type'].count()
col_hour = daily_df[daily_df.behavior_type =='2'].groupby('hour')['behavior_type'].count()
add_hour = daily_df[daily_df.behavior_type =='3'].groupby('hour')['behavior_type'].count()
buy_hour = daily_df[daily_df.behavior_type =='4'].groupby('hour')['behavior_type'].count()
#双12各时段行为总量 (日期说明:因双十二开启时间为12月12号零点,活动期间用户行为主要发生时间在11号到12号,因此本次活动分析时筛选的时间为这两天)
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
view_active_ahour = active_df[active_df.behavior_type =='1'].groupby('hour')['behavior_type'].count()
col_active_ahour = active_df[active_df.behavior_type =='2'].groupby('hour')['behavior_type'].count()
add_active_ahour = active_df[active_df.behavior_type =='3'].groupby('hour')['behavior_type'].count()
buy_active_ahour = active_df[active_df.behavior_type =='4'].groupby('hour')['behavior_type'].count()

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# 制作图表
x=col_hour.index.tolist()
#日常
y2=np.around(col_hour.values/29,decimals=0).tolist()
y3=np.around(add_hour.values/29,decimals=0).tolist()
y4=np.around(buy_hour.values/29,decimals=0).tolist()
#双12
y5=np.around(col_active_ahour.values/2,decimals=0).tolist()
y6=np.around(add_active_ahour.values/2,decimals=0).tolist()
y7=np.around(buy_active_ahour.values/2,decimals=0).tolist()
 
daily_line = (Line()
       .add_xaxis(x)
       .add_yaxis('收藏',
                  y_axis=y2,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('加购',
                  y_axis=y3,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('购买',
                  y_axis=y4,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            legend_opts=opts.LegendOpts(pos_top='50%'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name='人次',
                                     axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="日常日均各时段用户行为",pos_top='45%')  
                        )             
            )
 
active_line = (Line()
       .add_xaxis(x)
       .add_yaxis('收藏',
                  y_axis=y5,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('加购',
                  y_axis=y6,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('购买',
                  y_axis=y7,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            legend_opts=opts.LegendOpts(pos_top='5%'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name='人次',
                                     axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="双12日均各时段用户行为",pos_top='0%')       
                        )             
            )
 
ggrid = ( #拼图
    Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK)) #统一设置主题
    .add(active_line, grid_opts=opts.GridOpts(pos_bottom="60%")) #位置
    .add(daily_line, grid_opts=opts.GridOpts(pos_top="60%")) #位置
)
ggrid.render_notebook()

  

  分析:

  1、流量高峰主要在19点开始,23点后下降。建议尽量在19时之前更换好优化后的商品主图、详情页等页面,吸引更多的人流。

 

  不同时段购买率

  双轴折线图

1
2
3
4
5
6
7
8
9
10
11
#准备数据
#日常各时段购买率
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
view_user_num = daily_df[daily_df.behavior_type =='1'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
buy_user_num = daily_df[daily_df.behavior_type =='4'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
daily_buy_rate = buy_user_num/view_user_num
#双12各时段购买率
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
view_active_user_num = active_df[active_df.behavior_type =='1'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
buy_active_user_num = active_df[active_df.behavior_type =='4'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
acitve_buy_rate = buy_active_user_num/view_active_user_num

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
x=view_hour.index.tolist()
y1 = np.around(daily_buy_rate,decimals=2).tolist()
y2 = np.around(acitve_buy_rate,decimals=2).tolist()
 
buy_rate_line = (Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
       .add_xaxis(x)
       .add_yaxis('日常购买率',
                  y_axis=y1,
                  label_opts=opts.LabelOpts(is_show=True)
                 )
       .add_yaxis('双12购买率',
                  y_axis=y2,
                  label_opts=opts.LabelOpts(is_show=True)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name=' ',axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="不同时段购买率"),
            legend_opts=opts.LegendOpts(pos_top='5%')
                        )             
            )
buy_rate_line.render_notebook()

  

 

  分析:

  1、日常和双12活动的转化率大致相似,加购、收藏和购买转化率均很低,引流效果不佳,流失率高。建议:1.优化广告渠道,提高投放精准度,提高渠道引流来的用户质量;2.借鉴优秀同行产品,优化商品主图,提高产品视觉吸引力,增加转换率。

 

  双12行业热度top10排名和分析

  条形图*2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#提取数据
#1、流量:类目的pv top10
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
# 降序
view_active_num = active_df[active_df.behavior_type =='1'].groupby('item_category')['behavior_type'].count().sort_values(ascending=False)
# 取前十名
x1= view_active_num.index.tolist()[0:10]
y1=view_active_num.values.tolist()[0:10]
 
#2、购买增长率:双12类目下单量top10 及其日均下单量增长率 对比
# 双12购买数
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
buy_active_num =active_df[active_df.behavior_type =='4'].groupby('item_category')['behavior_type'].count().reset_index()
#日常购买数
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
buy_num = daily_df[daily_df.behavior_type =='4'].groupby('item_category')['behavior_type'].count().reset_index()
#合并
view_buy_avg = buy_active_num.merge(buy_num,how='inner',left_on='item_category',right_on='item_category')
view_buy_avg.columns=['item_category','buy_active_num','buy_num']
#计算增长率
view_buy_avg['buy_active_avg']=view_buy_avg['buy_active_num']/2
view_buy_avg['buy_avg']=view_buy_avg['buy_num']/29
view_buy_avg['growth_rate']=(view_buy_avg['buy_active_avg']-view_buy_avg['buy_avg'])/view_buy_avg['buy_avg']
# 降序
view_buy_avg.sort_values(by=['buy_active_num'],axis=0,ascending=False,inplace=True)
#取前十名
x2=view_buy_avg.item_category.tolist()[0:10]
y2=view_buy_avg.buy_active_num.tolist()[0:10]
y3=np.around(view_buy_avg.growth_rate*100,0).tolist()[0:10]
 
#制作图表
view_active_bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add_xaxis(xaxis_data=x1)
    .add_yaxis(
    "pv",
        y1,
        color='rgb(300, 0, 100, 0.2)',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="双12商品类目pv TOP10"),
        legend_opts=opts.LegendOpts(pos_top='5%'),
        xaxis_opts=opts.AxisOpts(name=' ',name_location = "middle"),
        yaxis_opts=opts.AxisOpts(name='人次',
                                axislabel_opts=opts.LabelOpts(formatter="{value}"))
    )
)
 
buy_active_bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))
    .add_xaxis(xaxis_data=x2)
    .add_yaxis(
    "下单量",
        y2,
        label_opts=opts.LabelOpts(is_show=False)
                )
     .add_yaxis('日均下单量增长率(与非活动日比较)',
                  yaxis_index=1,
                  y_axis=y3,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
      .extend_axis(
            yaxis=opts.AxisOpts(
                name=' ',
                min_=100,
                max_=600,
                interval=100,
                axislabel_opts=opts.LabelOpts(formatter="{value}%")
 
                                )
                    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="双12商品类目下单量TOP10及日均下单量增长率",pos_top='48%'),
        legend_opts=opts.LegendOpts(pos_top='53%'),
        xaxis_opts=opts.AxisOpts(name='商品类目',name_location = "middle",name_gap=30),
        yaxis_opts=opts.AxisOpts(name=' ',is_show = True,
                                 min_=0,
                                 max_=800,
                                 interval=200,                               
                                 axislabel_opts=opts.LabelOpts(formatter="{value}"))
 
                    )
     .set_series_opts(
                    markpoint_opts=opts.MarkPointOpts(data=[ #标记最值!
                        opts.MarkPointItem(type_="min",value_index=1,name="最小值"),
                        opts.MarkPointItem(type_="max",value_index=1,name="最大值")])
                     )
                )
 
ggrid = (
    Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add(view_active_bar, grid_opts=opts.GridOpts(pos_bottom="60%"))
    .add(buy_active_bar, grid_opts=opts.GridOpts(pos_top="60%"))
        )
 
ggrid.render_notebook()

  

分析:

1、编号为11279、2825、10894的商品类目,虽然PV排前十,但下单量并没有排在前十,说明引流效果不佳,有待优化广告营销策略。

2、编号为6344、5027、13230、9516、1838的商品类目,均,属于下单量的top10,说明该商品类目销售无需太依赖pv流量,在广告方面的支出花费可能相对较少。

3、下单量排名第5到10名的商品类目,虽然销量不是最高,但相较于第3、4名,它们的双12下单量相较于日常的增长率均较高,说明这些类目的销售增长明显、热度较高,受到消费者的近期关注;建议该类目商家抓住机会加大广告营销、商品的研发,提高商家的知名度、口碑,提高服务质量。

  项目源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import pandas as pd
import numpy as np
import datetime
from pyecharts.charts import *
from pyecharts.globals import ThemeType
from pyecharts.charts import Bar
from pyecharts.charts import Funnel
from pyecharts import options as opts
 
df = pd.read_csv(r'tianchi_fresh_comp_train_user.csv')
df.info()
df.head()
df.describe()
df.drop_duplicates(inplace=True)
df.head()
df.isnull().sum()
df['time']=pd.to_datetime(df['time'])
df.info()
df['date'] = df.time.dt.date
df['hour'] = df.time.dt.hour
df.head()
df['user_id'] = df.user_id.values.astype('str')
df['item_id'] = df.item_id.values.astype('str')
df['behavior_type'] = df.behavior_type.values.astype('str')
df.info()
df['item_category'] = df.item_category.values.astype('str')
#提取数据
pv_day = df[df.behavior_type =='1'].groupby('date')['behavior_type'].count()
uv_day = df[df.behavior_type =='1'].drop_duplicates(['user_id','date']).groupby('date')['user_id'].count()
#转换成图表所需的格式(list)
#1、日期(list.index)
date = pv_day.index
#2、pv、uv(list.values)
pv = np.around(pv_day.values/10000,decimals=2)
uv = np.around(uv_day.values/10000,decimals=2)
#制作图表
x=col_hour.index.tolist()
y1=np.around(view_hour.values/29,decimals=0).tolist()
y8=np.around(view_active_ahour.values/2,decimals=0).tolist()
 
bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add_xaxis(xaxis_data=x)
    .add_yaxis(
    "日常PV",
        y1,
        stack='stack1',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .add_yaxis(
    "双12PV",
        y8,
        stack='stack1',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="日常和双12每日时段PV走势对比"),
        legend_opts=opts.LegendOpts(pos_top='5%'),
        yaxis_opts=opts.AxisOpts(name='人次',
                                axislabel_opts=opts.LabelOpts(formatter="{value}"))
    )
)
 
bar.render_notebook()
 
# 制作图表
x=list(date)
y1=pv
y2=uv
pvuv_day_line = (Line(init_opts=opts.InitOpts(theme=ThemeType.DARK)) #主题设置
       .add_xaxis(x) #x轴数据源
       .add_yaxis('pv',#图例名字
                  y1, #y1轴数据源
                  label_opts=opts.LabelOpts(is_show=False) #不显示数据标签
                 )
       .add_yaxis('uv',#图例名字
                  yaxis_index=1, #Y的双轴1号索引(区别于y1轴)
                  y_axis=y2, #y2轴数据源
                  label_opts=opts.LabelOpts(is_show=False) #不显示数据标签
                 )
        .extend_axis( #y2的轴设置
                    yaxis=opts.AxisOpts(
                                        name='uv',#轴名字
                                        min_=0,#轴起点值
                                        max_=1.6, #轴最大值
                                        interval=0.4, #轴区间间隔
                                        axislabel_opts=opts.LabelOpts(formatter="{value} 万人") #轴数据标签格式设置
                                         )
                    )
        .set_global_opts( #全局设置
                        tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'), #随鼠标位置显示xy轴的数据、聚焦形式(交叉)
                        xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),#随鼠标位置凸显x轴长条、凸显形式(阴影)
                        yaxis_opts=opts.AxisOpts(name='pv',axislabel_opts=opts.LabelOpts(formatter="{value} 万次")),#y1轴(默认轴)名字、轴数据标签格式设置
                        title_opts=opts.TitleOpts(title="每日pv和uv") #标题       
                         )             
            )
pvuv_day_line.render_notebook() #展示图表
 
 
#提取数据
#联结pv、uv
pv_uv= pd.merge(pv_day,uv_day,on='date',how='outer')
#向下作差(后一日减前一日)
new_pv_uv = pv_uv.diff()
new_pv_uv.columns=['new_pv','new_uv']
 
df['date'] = pd.to_datetime(df['date'])
#提取数据
#日常各时段行为总量
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
view_hour = daily_df[daily_df.behavior_type =='1'].groupby('hour')['behavior_type'].count()
col_hour = daily_df[daily_df.behavior_type =='2'].groupby('hour')['behavior_type'].count()
add_hour = daily_df[daily_df.behavior_type =='3'].groupby('hour')['behavior_type'].count()
buy_hour = daily_df[daily_df.behavior_type =='4'].groupby('hour')['behavior_type'].count()
#双12各时段行为总量 (日期说明:因双十二开启时间为12月12号零点,活动期间用户行为主要发生时间在11号到12号,因此本次活动分析时筛选的时间为这两天)
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
view_active_ahour = active_df[active_df.behavior_type =='1'].groupby('hour')['behavior_type'].count()
col_active_ahour = active_df[active_df.behavior_type =='2'].groupby('hour')['behavior_type'].count()
add_active_ahour = active_df[active_df.behavior_type =='3'].groupby('hour')['behavior_type'].count()
buy_active_ahour = active_df[active_df.behavior_type =='4'].groupby('hour')['behavior_type'].count()
 
 
# 制作图表
x=col_hour.index.tolist()
#日常
y2=np.around(col_hour.values/29,decimals=0).tolist()
y3=np.around(add_hour.values/29,decimals=0).tolist()
y4=np.around(buy_hour.values/29,decimals=0).tolist()
#双12
y5=np.around(col_active_ahour.values/2,decimals=0).tolist()
y6=np.around(add_active_ahour.values/2,decimals=0).tolist()
y7=np.around(buy_active_ahour.values/2,decimals=0).tolist()
 
daily_line = (Line()
       .add_xaxis(x)
       .add_yaxis('收藏',
                  y_axis=y2,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('加购',
                  y_axis=y3,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('购买',
                  y_axis=y4,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            legend_opts=opts.LegendOpts(pos_top='50%'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name='人次',
                                     axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="日常日均各时段用户行为",pos_top='45%')  
                        )             
            )
 
active_line = (Line()
       .add_xaxis(x)
       .add_yaxis('收藏',
                  y_axis=y5,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('加购',
                  y_axis=y6,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
       .add_yaxis('购买',
                  y_axis=y7,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            legend_opts=opts.LegendOpts(pos_top='5%'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name='人次',
                                     axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="双12日均各时段用户行为",pos_top='0%')       
                        )             
            )
 
ggrid = ( #拼图
    Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK)) #统一设置主题
    .add(active_line, grid_opts=opts.GridOpts(pos_bottom="60%")) #位置
    .add(daily_line, grid_opts=opts.GridOpts(pos_top="60%")) #位置
)
ggrid.render_notebook()
 
 
#准备数据
#日常各时段购买率
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
view_user_num = daily_df[daily_df.behavior_type =='1'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
buy_user_num = daily_df[daily_df.behavior_type =='4'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
daily_buy_rate = buy_user_num/view_user_num
#双12各时段购买率
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
view_active_user_num = active_df[active_df.behavior_type =='1'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
buy_active_user_num = active_df[active_df.behavior_type =='4'].drop_duplicates(['user_id','date','hour']).groupby('hour')['behavior_type'].count()
acitve_buy_rate = buy_active_user_num/view_active_user_num
 
x=view_hour.index.tolist()
y1 = np.around(daily_buy_rate,decimals=2).tolist()
y2 = np.around(acitve_buy_rate,decimals=2).tolist()
 
buy_rate_line = (Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
       .add_xaxis(x)
       .add_yaxis('日常购买率',
                  y_axis=y1,
                  label_opts=opts.LabelOpts(is_show=True)
                 )
       .add_yaxis('双12购买率',
                  y_axis=y2,
                  label_opts=opts.LabelOpts(is_show=True)
                 )
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis",axis_pointer_type='cross'),
            xaxis_opts=opts.AxisOpts(type_='category',axispointer_opts=opts.AxisPointerOpts(is_show=True,type_="shadow")),
            yaxis_opts=opts.AxisOpts(name=' ',axislabel_opts=opts.LabelOpts(formatter="{value}")),
            title_opts=opts.TitleOpts(title="不同时段购买率"),
            legend_opts=opts.LegendOpts(pos_top='5%')
                        )             
            )
buy_rate_line.render_notebook()
 
#提取数据
#1、流量:类目的pv top10
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
# 降序
view_active_num = active_df[active_df.behavior_type =='1'].groupby('item_category')['behavior_type'].count().sort_values(ascending=False)
# 取前十名
x1= view_active_num.index.tolist()[0:10]
y1=view_active_num.values.tolist()[0:10]
 
#2、购买增长率:双12类目下单量top10 及其日均下单量增长率 对比
# 双12购买数
active_df = df[df['date'].isin(['2021-12-11','2021-12-12'])]
buy_active_num =active_df[active_df.behavior_type =='4'].groupby('item_category')['behavior_type'].count().reset_index()
#日常购买数
daily_df = df[~df['date'].isin(['2021-12-11','2021-12-12'])]
buy_num = daily_df[daily_df.behavior_type =='4'].groupby('item_category')['behavior_type'].count().reset_index()
#合并
view_buy_avg = buy_active_num.merge(buy_num,how='inner',left_on='item_category',right_on='item_category')
view_buy_avg.columns=['item_category','buy_active_num','buy_num']
#计算增长率
view_buy_avg['buy_active_avg']=view_buy_avg['buy_active_num']/2
view_buy_avg['buy_avg']=view_buy_avg['buy_num']/29
view_buy_avg['growth_rate']=(view_buy_avg['buy_active_avg']-view_buy_avg['buy_avg'])/view_buy_avg['buy_avg']
# 降序
view_buy_avg.sort_values(by=['buy_active_num'],axis=0,ascending=False,inplace=True)
#取前十名
x2=view_buy_avg.item_category.tolist()[0:10]
y2=view_buy_avg.buy_active_num.tolist()[0:10]
y3=np.around(view_buy_avg.growth_rate*100,0).tolist()[0:10]
 
#制作图表
view_active_bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add_xaxis(xaxis_data=x1)
    .add_yaxis(
    "pv",
        y1,
        color='rgb(300, 0, 100, 0.2)',
        label_opts=opts.LabelOpts(is_show=False)
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="双12商品类目pv TOP10"),
        legend_opts=opts.LegendOpts(pos_top='5%'),
        xaxis_opts=opts.AxisOpts(name=' ',name_location = "middle"),
        yaxis_opts=opts.AxisOpts(name='人次',
                                axislabel_opts=opts.LabelOpts(formatter="{value}"))
    )
)
 
buy_active_bar=(
    Bar(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))
    .add_xaxis(xaxis_data=x2)
    .add_yaxis(
    "下单量",
        y2,
        label_opts=opts.LabelOpts(is_show=False)
                )
     .add_yaxis('日均下单量增长率(与非活动日比较)',
                  yaxis_index=1,
                  y_axis=y3,
                  label_opts=opts.LabelOpts(is_show=False)
                 )
      .extend_axis(
            yaxis=opts.AxisOpts(
                name=' ',
                min_=100,
                max_=600,
                interval=100,
                axislabel_opts=opts.LabelOpts(formatter="{value}%")
 
                                )
                    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="双12商品类目下单量TOP10及日均下单量增长率",pos_top='48%'),
        legend_opts=opts.LegendOpts(pos_top='53%'),
        xaxis_opts=opts.AxisOpts(name='商品类目',name_location = "middle",name_gap=30),
        yaxis_opts=opts.AxisOpts(name=' ',is_show = True,
                                 min_=0,
                                 max_=800,
                                 interval=200,                               
                                 axislabel_opts=opts.LabelOpts(formatter="{value}"))
 
                    )
     .set_series_opts(
                    markpoint_opts=opts.MarkPointOpts(data=[ #标记最值!
                        opts.MarkPointItem(type_="min",value_index=1,name="最小值"),
                        opts.MarkPointItem(type_="max",value_index=1,name="最大值")])
                     )
                )
 
ggrid = (
    Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add(view_active_bar, grid_opts=opts.GridOpts(pos_bottom="60%"))
    .add(buy_active_bar, grid_opts=opts.GridOpts(pos_top="60%"))
        )
 
ggrid.render_notebook()

  

四、总结

1.通过对数据的整理,可以使冗杂的数据更加直观明了双十二当天购买率的暴增,以及晚上购物占比大。基本达到预期目标。
2.收获:对数据清理,以及运用柱形图、折线图等数据分析更加熟练。运用python分析更加省时省力,分析的更加直观全面。建议 :对代码运用还不够熟练,容易出bug,应多加练习。

 

 

 

posted @   zzh'  阅读(265)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
点击右上角即可分享
微信分享提示