|NO.Z.00036|——————————|BigDataEnd|——|Hadoop&Hue.V05|——|Hue.v05|案例实现.v02|

一、实现:步骤一:创建表
### --- 步骤一:创建表

~~~     将数据存放在ORC文件中
~~~     # createtable.hql
hive (tuning)>  drop database sale cascade;

hive (tuning)>  create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited
fields terminated by ",";
hive (tuning)> create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited
fields terminated by ",";

hive (tuning)> create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited
fields terminated by ",";
hive (tuning)> create table sale.dimdate(
dt date,
    yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;

hive (tuning)> create table sale.sale(
orderid string,
locationid string,
dt date
) stored as orc;
hive (tuning)> create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)stored as orc;

[root@linux122 ~]# hive -f createtable.hql
二、步骤二:导入数据
### --- 步骤二:导入数据

~~~     # 加载数据
hive (tuning)> use sale;
hive (sale)> load data local inpath "/home/hadoop/data/tbDate.dat" overwrite into table dimdate_ori;
hive (sale)> load data local inpath "/home/hadoop/data/tbSale.dat" overwrite into table sale_ori;
hive (sale)> load data local inpath "/home/hadoop/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
~~~     # 导入数据
hive (sale)> insert into table dimdate select * from dimdate_ori;
hive (sale)> insert into table sale select * from sale_ori;
hive (sale)> insert into table saledetail select * from saledetail_ori;

[root@linux122 ~]# hive -f loaddata.hql
### --- 按年统计销售额

hive (sale)> SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
FROM saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt);
~~输出参数
year    amount
2004    326.81
2005    1325.76
2006    1368.1
2007    1671.94
2008    1467.43
2009    632.37
2010    21.09
3274    0.17
### --- 销售金额在 10W 以上的订单

hive (sale)> SELECT orderid, round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000;
~~输出参数
orderid amount
HMJSL00009024   119084.8
HMJSL00009958   159126.0
### --- 每年销售额的差值

hive (sale)> SELECT year, round(amount, 2) amount, round(lag(amount) over
(ORDER BY year), 2) prioramount
,round(amount - lag(amount) over (ORDER BY year), 2) diff
from (SELECT year(B.dt) year, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt)
) tmp;
~~输出参数
year    amount  prioramount diff
2004    3268115.5   NULL    NULL
2005    1.325756415E7   3268115.5   9989448.65
2006    1.36809829E7    1.325756415E7   423418.75
2007    1.671935456E7   1.36809829E7    3038371.66
2008    1.46742953E7    1.671935456E7   -2045059.26
2009    6323697.19  1.46742953E7    -8350598.11
2010    210949.66   6323697.19  -6112747.53
3274    1703.0  210949.66   -209246.66
### --- 年度订单金额前10位(年度、订单号、订单金额、排名)

~~~     # 方法一
hive (sale)> SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY
amount desc) rank
from (SELECT year(B.dt) dt, A.orderid, sum(A.amount)
amount
from saledetail A join sale B on
A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
) tmp1
) tmp2
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2004    HMJSL00001557   23656.79999999997   1
2004    HMJSL00001556   22010.599999999984  2
2004    HMJSL00001349   17147.2 3
2004    HMJSL00001531   16605.6 4
2004    HMJSL00001567   15651.0 5
2004    HMJSL00001562   15429.999999999989  6
2004    HMJSL00000706   15266.0 7
2004    HMJSL00000656   15021.319999999998  8
2004    HMJSL00001568   14444.0 9
2004    HMJSL00000963   14268.800000000001  10
~~~     # 方法二
hive (sale)> with tmp as (
SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
)
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY
amount desc) rank
from tmp
) tmp2
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2004    HMJSL00001557   23656.79999999997   1
2004    HMJSL00001556   22010.599999999984  2
2004    HMJSL00001349   17147.2 3
2004    HMJSL00001531   16605.6 4
2004    HMJSL00001567   15651.0 5
2004    HMJSL00001562   15429.999999999989  6
2004    HMJSL00000706   15266.0 7
2004    HMJSL00000656   15021.319999999998  8
2004    HMJSL00001568   14444.0 9
### --- 季度订单金额前10位(年度、季度、订单id、订单金额、排名)

~~~     # 方法一
hive (sale)> with tmp as (
select C.year, C.quat, A.orderid, round(sum(B.amount), 2)
amount
from sale A join saledetail B on A.orderid=B.orderid
join dimdate C on A.dt=C.dt
group by C.year, C.quat, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
year    quat    orderid amount  rank
2004    1   HMJSL00002525   3113.84 1
2004    3   HMJSL00000609   10730.0 1
2004    3   HMJSL00000600   9031.2  2
2004    3   HMJSL00000606   8880.8  3
2004    3   HMJSL00000571   8570.16 4
2004    3   HMJSL00000610   8094.0  5
2004    3   HMJSL00000574   7540.1  6
2004    3   HMJSL00000561   6981.6  7
2004    3   HMJSL00000615   6946.0  8
2004    3   HMJSL00000616   6509.0  9
2004    3   HMJSL00000627   6164.8  10
~~~     # 方法二
hive (sale)> with tmp as(
select year(A.dt) year,
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid = B.orderid
group by year(A.dt),
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end,
A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
~~~     # 方法三:求季度
hive (sale)> select floor(month(dt/3.1)) + 1;

hive (sale)> with tmp as (
select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order
by amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
year    quat    orderid amount  rank
2004    1   HMJSL00002525   3113.84 1
2004    3   HMJSL00000609   10730.0 1
2004    3   HMJSL00000600   9031.2  2
2004    3   HMJSL00000606   8880.8  3
2004    3   HMJSL00000571   8570.16 4
2004    3   HMJSL00000610   8094.0  5
2004    3   HMJSL00000574   7540.1  6
2004    3   HMJSL00000561   6981.6  7
2004    3   HMJSL00000615   6946.0  8
2004    3   HMJSL00000616   6509.0  9
2004    3   HMJSL00000627   6164.8  10
### --- 求所有交易日中订单金额最高的前10位

~~~     # topN问题:
~~~     基础数据
~~~     上排名函数
~~~     解决N的问题
hive (sale)> with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (
select dt, orderid, amount, dense_rank() over(order by
amount desc) rank
from tmp
) tmp1
where rank <= 10;
~~输出参数
dt  orderid amount  rank
2007-07-31  HMJSL00009958   159126.0    1
2007-04-24  HMJSL00009024   119084.8    2
2008-01-15  HMJSL00010598   55828.0 3
2007-07-31  HMJSL00009957   52422.0 4
2007-10-09  HMJSL00010216   49800.0 5
2007-07-31  HMJSL00009956   43018.0 6
2007-11-04  HMJSL00010339   42157.6 7
2007-02-07  HMJSL00008593   41902.4 8
2007-09-27  HMJSL00010137   40340.0 9
2005-05-31  HMJSL00003263   38186.4 10
### --- 每年度销售额最大的交易日

hive (sale)> with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);
~~输出参数
year    dayamount
2004    160121.08
2005    189984.81
2006    232302.62
2007    309427.1
2008    151134.0
2009    71127.0
2010    28161.8
3274    1703.0
~~~     # 备注:以上求解忽略了交易日,以下SQL更符合题意
hive (sale)> with tmp as (
select dt, amount, dense_rank() over (partition by year(dt)
order by amount desc) as rank
from (select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt) tab1
)
select year(dt) as year, dt, amount
from tmp
where rank=1;
~~输出参数
year    dt  amount
2004    2004-11-01  160121.08
2005    2005-10-03  189984.81
2006    2006-04-29  232302.62
2007    2007-07-31  309427.1
2008    2008-01-20  151134.0
2009    2009-01-24  71127.0
2010    2010-01-01  28161.8
3274    3274-12-12  1703.0
### --- 年度最畅销的商品(即每年销售金额最大的商品)

hive (sale)> with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount, dense_rank() over
(partition by year order by amount desc) rank
from tmp) tmp1
where rank = 1;
~~输出参数
year    goods   amount
2004    JY424420810101  53401.76
2005    24124118880102  56627.33
2006    JY425468460101  113720.6
2007    JY425468460101  70225.1
2008    E2628204040101  98003.6
2009    YL327439080102  30029.2
2010    SQ429425090101  4494.0
3274    YA217390232301  698.0

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

posted on   yanqi_vip  阅读(60)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示