大数据第49天—Mysql练习题12道之四-观看电影的次数-杨大伟
有一个5000万的用户文件(user_id,name,age),一个2亿记录的用户看电影的记录文件(user_id,url),根据年龄段观看电影的次数进行排序?
1 --建表 2 --用户表 3 drop table if exists test_four_log; 4 create table test_four_user( 5 user_id string COMMENT '用户ID', 6 name string COMMENT '用户姓名', 7 age int COMMENT '用户年龄' 8 ) 9 row format delimited fields terminated by '\t'; 10 --日志表 11 drop table if exists test_four_log; 12 create table test_four_log( 13 user_id string COMMENT '用户ID', 14 url string COMMENT '链接' 15 ) 16 row format delimited fields terminated by '\t';
1 --插入数据 2 insert into table test_four_user values ('1','1',8); 3 insert into table test_four_user values ('2','2',45); 4 insert into table test_four_user values ('3','3',14); 5 insert into table test_four_user values ('4','4',18); 6 insert into table test_four_user values ('5','5',17); 7 insert into table test_four_user values ('6','6',19); 8 insert into table test_four_user values ('7','7',26); 9 insert into table test_four_user values ('8','8',22); 10 insert into table test_four_log values('1','111'); 11 insert into table test_four_log values('2','111'); 12 insert into table test_four_log values('3','111'); 13 insert into table test_four_log values('4','111'); 14 insert into table test_four_log values('5','111'); 15 insert into table test_four_log values('6','111'); 16 insert into table test_four_log values('7','111'); 17 insert into table test_four_log values('8','111'); 18 insert into table test_four_log values('1','111'); 19 insert into table test_four_log values('2','111'); 20 insert into table test_four_log values('3','111'); 21 insert into table test_four_log values('4','111'); 22 insert into table test_four_log values('5','111'); 23 insert into table test_four_log values('6','111'); 24 insert into table test_four_log values('7','111'); 25 insert into table test_four_log values('8','111'); 26 insert into table test_four_log values('1','111'); 27 insert into table test_four_log values('2','111'); 28 insert into table test_four_log values('3','111'); 29 insert into table test_four_log values('4','111'); 30 insert into table test_four_log values('5','111'); 31 insert into table test_four_log values('6','111'); 32 insert into table test_four_log values('7','111'); 33 insert into table test_four_log values('8','111');
1 -- 根据年龄段观看电影的次数进行排序? 2 select 3 age_size `年龄段`, 4 count(*) `观影次数` 5 from 6 ( 7 select 8 u.*, 9 l.url, 10 case 11 when u.age >=0 and u.age <= 10 then '1-10' 12 when u.age >=11 and u.age <= 20 then '11-20' 13 when u.age >=21 and u.age <= 30 then '21-30' 14 when u.age >=31 and u.age <= 40 then '31-40' 15 when u.age >=41 and u.age <= 50 then '41-50' 16 else '51-100' 17 end age_size 18 from 19 test_four_user u join test_four_log l on u.user_id = l.user_id 20 ) t1 21 group by age_size 22 order by `观影次数` desc;