一、求出场率与出厂次数
1、有如下数据:(建表语句+sql查询)
id names 1 aa,bb,cc,dd,ee 2 aa,bb,ff,ww,qq 3 aa,cc,rr,yy 4 aa,bb,dd,oo,pp
2、求英雄的出场排名top3的出场次数及出场率
create table if not exists t_names( id int, names array ) row format delimited fields terminated by ‘\t’ collection items terminated by ‘,’ ; select * from ( select name,cc,cc / (sum(cc) over()) as ccl, rank() over(sort by cc desc) as rk from ( select name, count(1) as cc from t_names lateral view explode(names) tt as name group by name ) a ) aa where aa.rk <= 3 ;
二、求通话时长
1、有如下通话记录:
Zhangsan Wangwu 01:01:01 Zhangsan Zhaoliu 00:11:21 Zhangsan Yuqi 00:19:01 Zhangsan Jingba 00:21:01 Zhangsan Wuxi 01:31:17 Wangwu Zhaoliu 00:51:01 Wangwu Zhaoliu 01:11:19 Wangwu Yuqi 00:00:21 Wangwu Yuqi 00:23:01 Yuqi Zhaoliu 01:18:01 Yuqi Wuxi 00:18:00 Jingba Wangwu 00:01:01 Jingba Wangwu 00:00:06 Jingba Wangwu 00:02:04 Jingba Wangwu 00:02:54 Wangwu Yuqi 01:00:13 Wangwu Yuqi 00:01:01 Wangwu Zhangsan 00:01:01
2、统计两个人的通话总时长(用户之间互相通话的时长)
create table relations( fromstr string, tostr string, time string ) row format delimited fields terminated by ’ ’ ; select fromstr, tostr, sum(duration) as durations from ( Select Case when fromstr >= tostr then fromstr else tostr end fromstr, Case when fromstr >= tostr then tostr else fromstr end tostr, Split(time,’:’)[0] * 60 * 60 + Split(time,’:’)[1] * 60 + Split(time,’:’)[2] duration from relations ) a group by fromstr,tostr ;
三、求出每个店铺的当月销售额和累计到当月的总销售额
1、有如下销售数据:(建表语句+sql查询)
店铺 月份 金额
a,01,150 a,01,200 b,01,1000 b,01,800 c,01,250 c,01,220 b,01,6000 a,02,2000 a,02,3000 b,02,1000 b,02,1500 c,02,350 c,02,280 a,03,350 a,03,250
2、编写Hive的HQL语句求出每个店铺的当月销售额和累计到当月的总销售额
create table t_store( name string, months int, money int ) row format delimited fields terminated by “,”; select name,months,amoney,sum(amoney) over(distribute by name sort by months asc rows between unbounded preceding and current row) as totalmomey from ( Select name,months,sum(money) as amoney From t_store Group by name,months ) a ;
四、统计amt连续3个月,环比增长>50%的user
user_id month amt 1,20170101,100 3,20170101,20 4,20170101,30 1,20170102,200 2,20170102,240 3,20170102,30 4,20170102,2 1,20170101,180 2,20170101,250 3,20170101,30 4,20170101,260 … … select user_id from( select user_id,month,mon_amt,pre_mon_amt, sum(case when ((mon_amt - pre_mon_amt) / pre_mon_amt * 100) > 50 and datediff(to_date(month,‘yyyymm’),to_date(pre2_month,‘yyyymm’),‘mm’) = 2 then 1 else 0 end) over(partition by user_id order by month asc rows between current row and 2 following) as flag from ( select user_id, substr(month,0,6) as month, sum(amt) as mon_amt, lag(sum(amt),1,0.00001) over(partition by user_id order by substr(month,0,6) asc ) as pre_mon_amt, substr(lag(substr(month,0,6),2,‘199001’) over(partition by user_id order by substr(month,0,6) asc),0,6) as pre_2_mon from amt group by user_id,substr(month,0,6) ) t1 ) t2 where t2.flag >=3;
本文来自博客园,作者:大码王,转载请注明原文链接:https://www.cnblogs.com/huanghanyu/