HiveSql题
题目一
已知条件如下:用户表(t_user)uid int 用户 id
订单表(t_order)oid int订单IDuid int用户ID
otime date订单时间oamount int订单金额
其中用户表和订单表是一对多的关系
结果集要求:(标准SQL/HIVE 或者其他脚本)
计算在2017年1月下过订单2月份没有下过订单的用户在3月份的订单金额分布,具体字段如下(注:没有匹配到3月份订单的用0填充)
uid,3月份订单金额超过10的订单数,3月份首次下单的金额,3月份最后一次下单的金额
要求:对订单表查询次数不大于2次
t_user
uid
t_order
oid 订单
uid 用户
otime
oamount
1. 201701 下过订单的 并且 201702 没有订单的 用户
2. 这部分用户在3月份的订单金额分布
--数据准备
CREATE TABLE `t_order`(
`oid` int,
`uid` int,
`otime` date,
`oamount` int)
PARTITIONED BY (
`dt` string);
use test;
insert into table t_order partition(dt ='2018-02-01') values(1001,1,'2018-02-01',110);
insert into table t_order partition(dt ='2018-02-01') values(1002,3,'2018-02-01',110);
insert into table t_order partition(dt ='2018-02-03') values(1003,3,'2018-02-03',100);
insert into table t_order partition(dt ='2018-02-03') values(1004,3,'2018-02-03',20);
insert into table t_order partition(dt ='2018-02-04') values(1005,3,'2018-02-04',30);
insert into table t_order partition(dt ='2018-02-04') values(1006,6,'2018-02-04',100);
insert into table t_order partition(dt ='2018-02-04') values(1007,6,'2018-02-04',130);
insert into table t_order partition(dt ='2018-03-01') values(1001,1,'2018-03-01',120);
insert into table t_order partition(dt ='2018-03-03') values(1002,2,'2018-03-03',5);
insert into table t_order partition(dt ='2018-03-03') values(1003,2,'2018-03-03',11);
insert into table t_order partition(dt ='2018-03-03') values(1004,3,'2018-03-03',1);
insert into table t_order partition(dt ='2018-03-04') values(1005,3,'2018-03-04',20);
insert into table t_order partition(dt ='2018-03-04') values(1006,4,'2018-03-04',30);
insert into table t_order partition(dt ='2018-03-04') values(1007,2,'2018-03-04',50);
原始数据
+--------------+--------------+----------------+------------------+-------------+
| t_order.oid | t_order.uid | t_order.otime | t_order.oamount | t_order.dt |
+--------------+--------------+----------------+------------------+-------------+
| 1003 | 2 | 2018-01-01 | 100 | 2018-01-01 |
| 1004 | 2 | 2018-01-02 | 20 | 2018-01-02 |
| 1005 | 2 | 2018-01-02 | 100 | 2018-01-02 |
| 1006 | 4 | 2018-01-02 | 30 | 2018-01-02 |
| 1007 | 1 | 2018-01-03 | 130 | 2018-01-03 |
| 1008 | 2 | 2018-01-03 | 5 | 2018-01-03 |
| 1007 | 1 | 2018-01-03 | 130 | 2018-01-03 |
| 1008 | 2 | 2018-01-03 | 5 | 2018-01-03 |
| 1009 | 2 | 2018-01-03 | 5 | 2018-01-03 |
| 1001 | 1 | 2018-02-01 | 110 | 2018-02-01 |
| 1002 | 3 | 2018-02-01 | 110 | 2018-02-01 |
| 1003 | 3 | 2018-02-03 | 100 | 2018-02-03 |
| 1004 | 3 | 2018-02-03 | 20 | 2018-02-03 |
| 1005 | 3 | 2018-02-04 | 30 | 2018-02-04 |
| 1006 | 6 | 2018-02-04 | 100 | 2018-02-04 |
| 1007 | 6 | 2018-02-04 | 130 | 2018-02-04 |
| 1001 | 1 | 2018-03-01 | 120 | 2018-03-01 |
| 1002 | 2 | 2018-03-03 | 5 | 2018-03-03 |
| 1003 | 2 | 2018-03-03 | 11 | 2018-03-03 |
| 1004 | 3 | 2018-03-03 | 1 | 2018-03-03 |
| 1005 | 3 | 2018-03-04 | 20 | 2018-03-04 |
| 1006 | 4 | 2018-03-04 | 30 | 2018-03-04 |
| 1007 | 2 | 2018-03-04 | 50 | 2018-03-04 |
+--------------+--------------+----------------+------------------+-------------+
解题:
--1
select
uid,
--sum(if(date_format(otime,'YYYY-MM')='2017-1',1,0)) month1count,
sum( case when date_format(otime,'YYYY-MM')='2018-01' then 1 else 0 end) month1count,
--sum(if(date_format(otime,'YYYY-MM')='2018-2',1,0)) month2count,
sum( case when date_format(otime,'YYYY-MM')='2018-02' then 1 else 0 end) month2count,
--sum(if(date_format(otime,'YYYY-MM')='2018-3' and oamount>10,1,0)) oamount_10,
sum( case when date_format(otime,'YYYY-MM')='2018-03' and oamount>10 then 1 else 0 end) oamount_10,
--sum(if(date_format(otime,'YYYY-MM')='2018-1' and t.rn=1,oamount,0)) first_oamount,
sum( case when date_format(otime,'YYYY-MM')='2018-03' and t.rn=1 then oamount else 0 end) first_oamount,
--sum(if(date_format(otime,'YYYY-MM')='2018-1' and t.rn = cn,oamount,0 )) last_oamount
sum( case when date_format(otime,'YYYY-MM')='2018-03' and t.rn = cn then oamount else 0 end) last_oamount
from
(
select * ,
row_number() over (partition by date_format(otime,"YYYY-MM"),uid order by otime) rn,
count(*) over (partition by date_format(otime,"YYYY-MM"),uid) cn
from
t_order
where date_format(otime,"YYYY-MM") in ("2018-01","2018-02","2018-03")
) t
group by uid
having month1count >0 and month2count = 0;
--2
select
uid,
--sum(case when oamount >10 then 1 else 0 end ) oamount_10,
count(case when oamount >10 then 1 else null end ) oamount_10,
max(case when rn = 1 then oamount else 0 end ) first_oamount,
max(case when rn = dk then oamount else 0 end ) last_oamount
from
(
select
uid,
oamount,
row_number() over (partition by uid,date_format(otime,"YYYY-MM") order by otime) rn,
count(*) over (partition by uid,date_format(otime,"YYYY-MM")) dk
from t_order
where
uid in
(
select distinct (a.uid) from (
select uid,oamount from t_order where date_format(otime,"YYYY-MM") ="2018-01" and oamount>0 ) a
left join
(select uid, oamount from t_order where date_format(otime,"YYYY-MM") ="2018-02" ) b
on a.uid = b.uid
where b.uid is null
) and date_format(otime,"YYYY-MM") = "2018-03"
) r
group by uid;
绝不摆烂