窗口函数-hive

with t1 as (
    select 'huawei' as channel ,'2021-08-01' as credit_date,5 as score
    union all
    select 'huawei' as channel ,'2021-08-03' as credit_date,12 as score
    union all
    select 'huawei' as channel ,'2021-08-04' as credit_date,25 as score
    union all
    select 'huawei' as channel ,'2021-08-02' as credit_date,1 as score
    union all
    select 'huawei' as channel ,'2021-08-07' as credit_date,7 as score
    union all
    select 'huawei' as channel ,'2021-08-08' as credit_date,10 as score
    union all
    select 'huawei' as channel ,'2021-08-06' as credit_date,33 as score

    union all

    select 'vivo' as channel ,'2021-09-01' as credit_date,1 as score
    union all
    select 'vivo' as channel ,'2021-09-02' as credit_date,2 as score
    union all
    select 'vivo' as channel ,'2021-09-04' as credit_date,5 as score
    union all
    select 'vivo' as channel ,'2021-09-09' as credit_date,9 as score
    union all
    select 'vivo' as channel ,'2021-09-07' as credit_date,77 as score
    union all
    select 'vivo' as channel ,'2021-09-08' as credit_date,10 as score
    union all
    select 'vivo' as channel ,'2021-09-11' as credit_date,3 as score
)
select *
,sum(score) over() as win

from t1


t1.*
,rank() over(partition by t1.c_name order by t1.score) as rank
,dense_rank() over(partition by t1.c_name order by t1.score) as dense_rank
,row_number() over(partition by t1.c_name order by t1.score) as row_number
,sum(t1.score) over(partition by t1.c_name order by t1.score) as sum
,avg(t1.score) over(partition by t1.c_name order by t1.score) as avg
,count(t1.score) over(partition by t1.c_name order by t1.score) as count
,max(t1.score) over(partition by t1.c_name order by t1.score) as max
,min(t1.score) over(partition by t1.c_name order by t1.score) as min
,rank() over( order by t1.score) as no_partition


-- 面试
https://mp.weixin.qq.com/s?__biz=Mzg3NjI4OTg1Mw==&mid=2247483973&idx=1&sn=1249c5b03c8a43cc75f32f334a7ca559&chksm=cf35c817f8424101a6d4dc2797610eccd558311a4c789365294f8c37b7a50e40f49a2d7c5bcb&scene=21#wechat_redirect




https://blog.csdn.net/a934079371/article/details/108301730?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-3.control
https://blog.csdn.net/dingchangxiu11/article/details/83145151
-- 聚合函数(案例)
with t1 as (
    select '20191020' as logday,'11111' as userid,85 as score
    union all
    select '20191020','22222',83
    union all
    select '20191020','33333',86
    union all
    select '20191021','11111',87
    union all
    select '20191021','22222',65
    union all
    select '20191021','33333',98
    union all
    select '20191022','11111',67
    union all
    select '20191022','22222',34
    union all
    select '20191022','33333',88
    union all
    select '20191023','11111',99
    union all
    select '20191023','22222',33
)
-- 1、使用 over() 函数进行数据统计, 统计每个用户及表中数据的总数
-- select *,count(1) over() as total from t1
-- 2、求用户明细并统计每天的用户总数
-- select *,count(userid) over(partition by logday) as total from t1
-- 3、计算从第一天到现在的所有 score 大于80分的用户总数
-- select *,count()over(order by logday  rows between unbounded preceding and current row) as total from t1 where score > 80
-- 4、计算每个用户到当前日期分数大于80的天数
-- select *,count() over(partition by userid order by logday rows between unbounded preceding and current row) as total from t1 where score > 80




-- 其他类函数(案例)
with t1 as (
    select 'jack' as name,'2017-01-01' as orderdate,10 as cost
    union all
    select 'tony','2017-01-02',15
    union all
    select 'jack','2017-02-03',23
    union all
    select 'tony','2017-01-04',29
    union all
    select 'jack','2017-01-05',46
    union all
    select 'jack','2017-04-06',42
    union all
    select 'tony','2017-01-07',50
    union all
    select 'jack','2017-01-08',55
    union all
    select 'mart','2017-04-08',62
    union all
    select 'mart','2017-04-09',68
    union all
    select 'neil','2017-05-10',12
    union all
    select 'mart','2017-04-11',75
    union all
    select 'neil','2017-06-12',80
    union all
    select 'mart','2017-04-13',94
)
-- 1、查询在2017年4月份购买过的顾客及总人数
-- select *,count(name) over() as as total from t1 where substr(orderdate,1,7) = '2017-04'
-- 2、查询顾客的购买明细及月购买总额
-- select *,sum(cost) over(partition by name,substr(orderdate,1,7) )  as total from t1
-- 3、查询顾客的购买明细及到目前为止每个顾客购买总金额
-- select *,sum(cost) over(partition by name order by orderdate  ) as total
-- ,sum(cost) over(partition by name order  by  orderdate
-- ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) total_amount
-- from t1
-- 4、查询顾客上次的购买时间----lag()over()偏移量分析函数的运用
    -- 方案1:笛卡尔积 + 求日期差
    -- select all_tab.name,all_tab.orderdate,all_tab.cost
    -- ,max(if(all_tab.rank = 2,all_tab.orderdate1,null)) as pre_orderdate
    -- from (
    -- select t1.*,t2.orderdate as orderdate1
    -- ,date_diff('day',cast (t2.orderdate as date),cast(t1.orderdate as date)) as day_diff
    -- ,row_number() over(partition by t1.name,t1.orderdate order by
    -- date_diff('day',cast (t2.orderdate as date),cast(t1.orderdate as date))
    -- ) as rank
    -- from t1
    -- left outer join t1 as t2
    -- on t1.name = t2.name
    -- where date_diff('day',cast (t2.orderdate as date),cast(t1.orderdate as date)) >= 0
    -- ) as  all_tab
    -- where all_tab.rank in (1,2)
    -- group by all_tab.name,all_tab.orderdate,all_tab.cost
    -- order by all_tab.name,all_tab.orderdate
-- select *, lag(orderdate,1) over(partition by name order by orderdate) last_date from t1
-- 5、查询前20%时间的订单信息
select
all_tab.*
from (
select *,ntile(5)over(order  by  cost) as sortgroup_num from t1
) as all_tab
where sortgroup_num = 1



-- 排序函数(案例)
with t1 as (
    select '孙悟空' as name,'语文' as subject,87 as score
    union all
    select '孙悟空','数学',95
    union all
    select '孙悟空','英语',68
    union all
    select '大海','语文',94
    union all
    select '大海','数学',56
    union all
    select '大海','英语',84
    union all
    select '宋宋','语文',64
    union all
    select '宋宋','数学',86
    union all
    select '宋宋','英语',84
    union all
    select '婷婷','语文',65
    union all
    select '婷婷','数学',85
    union all
    select '婷婷','英语',78
)
-- 1、每门学科学生成绩排名(是否并列排名、空位排名三种实现)
-- select *,rank() over(partition by subject order by score) as rank -- 跳跃排序 并列排序，有空排
-- ,dense_rank() over(partition by subject order by score) as dense_rank -- 密集排序 并列排序，无空排
-- ,row_number() over(partition by subject order by score) as row_number -- 连续排序 不并列排序,无空排
-- from t1
-- 2、每门学科成绩排名top n的学生
-- select * from (
-- select *,
-- dense_rank() over(partition by subject order by score desc) as desen_rank
-- from t1
-- ) as all_tab
-- where all_tab.desen_rank = 1