|NO.Z.00041|——————————|BigDataEnd|——|Hadoop&PB级数仓.V08|——|PB数仓.v08|高防日志数据测试|

一、高仿日志数据测试
### --- 数据采集

~~~     1000W左右日活用户
~~~     按 30条日志 / 人天,合计3亿条事件日志
~~~     每条日志 650字节 左右
~~~     总数据量大概在180G
~~~     采集数据时间约2.5小时
二、事件日志采集
### --- 清理环境
~~~     # 清理本地环境
~~~     删除本地元数据文件、日志、hdfs等文件

[root@hadoop02 ~]# rm -rf /data/yanqidw/conf/startlog_position.json
[root@hadoop02 ~]# rm -rf /data/yanqidw/logs/start/*
[root@hadoop02 ~]# rm -rf /data/yanqidw/logs/event/*
~~~     # 清理hdfs数据

[root@hadoop02 ~]# hdfs dfs -rm -r -f /user/data/logs/event/dt=2020-07-21
[root@hadoop02 ~]# hdfs dfs -rm -r -f /user/data/logs/event/dt=2020-07-22
[root@hadoop02 ~]# hdfs dfs -rm -r -f /user/data/logs/event/dt=2020-07-23
### --- 启动Flume:
~~~     日志文件很大,可以将hdfs文件滚动设置为10G甚至更大

[root@hadoop02 ~]# nohup flume-ng agent --conf /opt/yanqi/servers/flume-1.9.0/conf \
--conf-file /data/yanqidw/conf/flume-log2hdfs4.conf \
-name a1 -Dflume.root.logger=INFO,console &
### --- 写日志

[root@hadoop02 ~]#  cd /data/yanqidw/jars
[root@hadoop02 jars]#  nohup java -cp \
data-generator-1.1-SNAPSHOT-jar-withdependencies.jar \
com.yanqi.ecommerce.AppEvent 300000000 2020-07-22 \
> /data/yanqidw/logs/event/eventlog0722.log &
三、计算高仿日志数据
### --- 计算2020-07-22事件日志

[root@hadoop02 ~]# cd /data/yanqidw/script/advertisement/
sh ods_load_event_log.sh 2020-07-22
sh dwd_load_event_log.sh 2020-07-22
sh dwd_load_ad_log.sh 2020-07-22
sh ads_load_ad_show.sh 2020-07-22
sh ads_load_ad_show_rate.sh 2020-07-22
sh ads_load_ad_show_page.sh 2020-07-22
sh ads_load_ad_show_page_window.sh 2020-07-22
~~~     查看2020-07-22事件日志

hive (default)> show partitions ads.ads_ad_show_place_window;
partition
dt=2020-07-21
dt=2020-07-22
hive (default)> select * from ads.ads_ad_show_place_window where dt='2020-07-22' limit 5;
OK
ads_ad_show_place_window.hour   ads_ad_show_place_window.place  ads_ad_show_place_window.product_id ads_ad_show_place_window.cnt    ads_ad_show_place_window.rank   ads_ad_show_place_window.dt
00  placecampaign1_index    0   4   1   2020-07-22
00  placecampaign1_index    1   3   1   2020-07-22
00  placecampaign1_index    2   5   1   2020-07-22
00  placecampaign1_index    3   2   1   2020-07-22
00  placecampaign1_index    4   2   1   2020-07-22
hive (default)> select count(*) from ads.ads_ad_show_place_window where dt='2020-07-22' limit 5;
604
### --- 计算2020-07-23事件日志

[root@hadoop02 ~]# cd /data/yanqidw/script/advertisement/
sh ods_load_event_log.sh 2020-07-23
sh dwd_load_event_log.sh 2020-07-23
sh dwd_load_ad_log.sh 2020-07-23
sh ads_load_ad_show.sh 2020-07-23
sh ads_load_ad_show_rate.sh 2020-07-23
sh ads_load_ad_show_page.sh 2020-07-23
sh ads_load_ad_show_page_window.sh 2020-07-23
~~~     查看2020-07-23事件日志

hive (default)> show partitions ads.ads_ad_show_place_window;
partition
dt=2020-07-21
dt=2020-07-22
dt=2020-07-23
hive (default)> select * from ads.ads_ad_show_place_window where dt='2020-07-23' limit 5;
ads_ad_show_place_window.hour   ads_ad_show_place_window.place  ads_ad_show_place_window.product_id ads_ad_show_place_window.cnt    ads_ad_show_place_window.rank   ads_ad_show_place_window.dt
00  placecampaign1_index    1   1   1   2020-07-23
00  placecampaign1_index    2   4   1   2020-07-23
00  placecampaign1_index    3   5   1   2020-07-23
00  placecampaign1_index    4   2   1   2020-07-23
00  placecampaign1_index    5   2   1   2020-07-23
hive (default)> select count(*) from ads.ads_ad_show_place_window where dt='2020-07-23' limit 5;
607

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

 

posted on   yanqi_vip  阅读(16)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示