大数据分析案例--日志分析
具体操作和上次课堂测试类似,可视化代码更是比上次简单
这里只展示datagrip里的sql语句
这里不包括日期的清洗,日期的清洗有些困难,请教同学后知道怎样清洗,具体写在下一节
create table `past`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string ) row format delimited fields terminated by "," ; --字段之间分隔符 select * from past LIMIT 3;--查询前三条数据 --建立一个新的表是清洗过后的数据 create table `now1`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string )row format delimited fields terminated by "," ; select * from now1 LIMIT 3;--查询前三条数据 --修改时间信息 --提取文章为11325 create table `article`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string )row format delimited fields terminated by "," ; insert overwrite table article select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.type like 'article%' group by id,ip,`Date`,day ,traffic,type; select * from article LIMIT 3;--查询前三条数据 --提取文章为video create table `video`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string )row format delimited fields terminated by "," ; select * from video LIMIT 3;--查询前三条数据 insert overwrite table video select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.type like 'video%' group by id,ip,`Date`,day ,traffic,type; --清洗时间是2016-11-10 00:01:03 create table `time`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string )row format delimited fields terminated by "," ; select * from `time` LIMIT 10;--查询前三条数据 insert overwrite table `time` select ip as ip ,`Date` as `Date`,day as day ,traffic as traffic,type as type,id as id from now1 where now1.`Date` like '2016-11-10 00:01:03%' group by id,ip,`Date`,day ,traffic,type; --清洗流量--暂时还不知道咋整 create table `liuliang`( `ip` string, `Date` string, `day` string, `traffic` string, `type` string, `id` string )row format delimited fields terminated by "," ; insert overwrite table `liuliang` select ip as ip ,`Date` as `Date`,day as day ,sum(traffic) as traffic,type as type,id as id from now1 where now1.`Date` like '2016-11-10 00:01:03%' group by id,ip,`Date`,day ,type; --统计最受欢迎的视频文章top10访问次数 create table `table1`( `traffic` string, `id` string, `type` string, `times` string )row format delimited fields terminated by "," ; insert overwrite table table1 select sum( traffic ) AS traffic,id as id ,count(id) as times,type as type from now1 group by id ,type order by traffic desc ; select * from `table1` LIMIT 20;--查询前三条数据 --按照流量统计最受欢迎的课程10 create table `table2`( `ip` string, `id` string, `type` string, `traffic` string )row format delimited fields terminated by "," ; select * from `table2` LIMIT 100;--查询前三条数据 insert overwrite table table2 select ip as ip ,id as id, type as type ,sum(traffic) as traffic from now1 group by ip,id ,type order by sum(traffic),id desc ; --按照地市统计最受欢迎的课程10 create table `table3`( `ip` string, `id` string, `type` string, `ips` string )row format delimited fields terminated by "," ; select * from `table3` LIMIT 50;--查询前三条数据 insert overwrite table table3 select ip as ip ,id as id, type as type ,count(ip) as ips from now1 group by ip,id ,type order by count(ip) desc ;
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律