qwb0614

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
  187 随笔 :: 0 文章 :: 0 评论 :: 2048 阅读

这次的hive测试的流程主要分为以下几个步骤

1、数据导入到hive数据库里

2、对数据清洗

3、数据分析处理

4、将处理后的数据导出到MySQL

5、 通过网页进行数据可视化

 

 

 

一.数据导入到hive数据库里

1)建立初始表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
create table sales_sample_20214044 (
 
`day_id` string comment '时间',
 
`sale_nbr` string comment '卖出方',
 
`buy_nbr` string  comment '买入方',
 
`cnt` int comment '数量',
 
`round` int comment '金额')
 
row format delimited
 
fields terminated by ','
 
lines terminated by '\n';

  

 

   2)将数据导入到表中

1
load data local inpath '/root/hivedata/sales_sample_20170310.csv' into table sales_sample_20214044;

  

 
 

二、 对数据清洗

将id转换成真是日期
1
2
3
4
5
6
7
8
9
insert overwrite table sales_sample_20214044
select concat('2022-09-',day_id),sale_nbr,buy_nbr,cnt,round from sales_sample_20170310  ;
create table sales_sample111 as
    select  to_date(from_unixtime(UNIX_TIMESTAMP(day_id,'yyyy-MM-dd'))) as day_id,
           sale_nbr,
           buy_nbr,
           cnt,
           round
    from sales_sample_20214044;

  

三、数据分析处理

       1)统计每天各个机场的销售数量和销售金额

           
    create table sale_hangkong as

select

    day_id,sale_nbr,sum(cnt) as cnt,sum(round) as round from sales_sample111 where sale_nbr like 'C%' group by day_id, sale_nbr;

 

       2)统计每天各个代理商的销售数量和销售金额。

复制代码
create table day_sale as

select

    day_id,

    sale_nbr,

    sum(cnt) as cnt_max,

    sum(round) as round_max

from sales_sample111

where sale_nbr like 'O%'

group by sale_nbr,day_id;
复制代码

 

 

3)统计每天各个代理商的销售活跃度。

复制代码
create table huoyuedumaichu as

select

    day_id,

    sale_nbr,

    count(*) as sale_number

from sales_sample111

where sale_nbr like 'O%'

group by sale_nbr,day_id;







create table huoyuedumairu as

select

    day_id,

    buy_nbr,

    count(*) as sale_number

from sales_sample111

where buy_nbr like 'O%'

group by buy_nbr,day_id;



create table huoyuedu as

select

    a.day_id as day_id,

    a.sale_nbr as sale_nbr,

    a.sale_number+h.sale_number as sale_number

from huoyuedumaichu a join huoyuedumairu h on a.day_id = h.day_id and a.sale_nbr=h.buy_nbr;
复制代码

 

 

4)汇总统计10月1日到10月15日之间各个代理商的销售利润。

复制代码
--计算代理商买入数量金额

drop table mairu;

create table mairu as

select

    day_id,

    buy_nbr,

    sum(cnt) as cnt,

    sum(round) as round

from sales_sample111

where buy_nbr like 'O%'

group by day_id, buy_nbr;



create table  maichu as

select

    day_id,

    sale_nbr,

    sum(cnt) as cnt,

    sum(round) as round

from sales_sample111

where sale_nbr like 'O%'

group by day_id, sale_nbr;





create table lirun as

select a.day_id as day_id,

       b.sale_nbr as nbr,

       a.cnt as cnt_buy,

       a.round as rount_buy,

       b.cnt as cnt_sale,

       b.round as round_sale,

       b.round-a.round as liren

from mairu a join maichu b on a.buy_nbr = b.sale_nbr and a.day_id = b.day_id

where a.day_id between '2022-10-01' and '2022-10-15';
复制代码

 

 

 

四、将处理后的数据导出到MySQL

       1)统计每天各个机场的销售数量和销售金额

复制代码
建表
create table sale_hangkong(
    day_id varchar(50) not null ,
    sale_nbr varchar(20),
    cnt int ,
    round int
);
导出
bin/sqoop export \
--connect jdbc:mysql://node1:3306/myhive\
--username root \          
--password 123456 \
--table sale_hangkong \   
--columns day_id,sale_nbr,cnt,round \  
--export-dir /user/hive/warehouse/myhive.db /sales_hangkong \
--input-fields-terminated-by "\001"
复制代码

 

 

2)统计每天各个代理商的销售数量和销售金额。

复制代码
create table day_sale(
    day_id varchar(20) not null ,
    sale_nbr varchar(20),
    cnt_sum int ,
    round_sum int
);
bin/sqoop export \
--connect jdbc:mysql://node1:3306/myhive \
--username root \
--password 123456 \
--table day_sale \
--columns day_id,sale_nbr,cnt_sum,round_sum \
--export-dir /user/hive/warehouse/myhive.db/day_sale \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'
复制代码

 

 

3)统计每天各个代理商的销售活跃度。

复制代码
create table huoyuedu(
    day_id varchar(50) ,
    sale_nbr varchar(20),
    sale_number int
);
bin/sqoop export \
--connect jdbc:mysql://node1:3306/myhive \
--username root \
--password 123456 \
--table huoyuedu \
--columns day_id,sale_nbr,sale_number \
--export-dir /user/hive/warehouse/myhive.db/huoyuedu \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'
复制代码

 

 

4)汇总统计10月1日到10月15日之间各个代理商的销售利润。

复制代码
--mysql
create table lirun(
    day_id varchar(50) ,
    nbr varchar(20),
     cnt_buy int,
     rount_buy int,
     cnt_sale int,
     round_sale int,
     lirun int
);
bin/sqoop export \
--connect jdbc:mysql://node1:3306/myhive \
--username root \
--password 123456 \
--table lirun \
--columns day_id,nbr,cnt_buy,rount_buy,cnt_sale,round_sale,lirun \
--export-dir /user/hive/warehouse/myhive.db/lirun \
--fields-terminated-by "\001" \
--input-null-non-string '\\N'
复制代码

 

 

             

 

 

五、通过网页进行数据可视化

通过srpingboot+vue,整合echarts实现的表格可视化展示

 

posted on   嘎嘎鸭1  阅读(39)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!
点击右上角即可分享
微信分享提示