每日总结04

Hive测试的流程报告

一:启动虚拟机中的hadoopzookeeperhiveyarn

 A start 脚本用于启动hadoopzookeeperhbase

 

用命令启动hive的两个服务

 

启动yarn

 

二:将所需要的数据上传至node19870并完成清洗

 

三:hive数据库连接datagrip,执行相应的数据库操作语句,同时mysql数据库中创建一样的表。完成数据的清洗,和方便后面的操作,数据的传输。

 

 

-- 创建初始表

create table testSales

(

    day_id   string,

    sale_nbr string,

    buy_nbr  string,

    cnt      string,

    round    string

)

    row format delimited fields terminated by ',';

-- 将本地数据导入初始表中(先将数据文件上传到虚拟机里)

load data inpath '/user/hive/warehouse/sales_sample_20170310.csv' into table testSales;

--创建表格存放清洗后的数据

create table test_sales_wash

(

    day_id   string,

    sale_nbr string,

    buy_nbr  string,

    cnt      string,

    round    string

)

    row format delimited fields terminated by ',';

-- 插入清洗后的数据

insert overwrite table test_sales_wash

select date_add('2021-09-00', cast(day_id as int)) as day_id,

       sale_nbr                                    as sale_nbr,

       buy_nbr                                     as buy_nbr,

       cnt                                         as cnt,

       round                                       as round

from testSales;

 

-- 1.统计每天各个机场的销售数量和销售金额。

-- 创建机场表存放数据

create table test_jichang

(

    day_id   string,

    sale_nbr string,

    cnt      string,

    round    string

) row format delimited fields terminated by ',';

--向机场表插入数据

insert into table test_jichang

select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round

from test_sales_wash

group by sale_nbr, day_id

having sale_nbr like 'C%';

 

--2.统计每天各个代理商的销售数量和销售金额。

--创建代理商表存放数据

create table test_dailishang

(

    day_id   string,

    sale_nbr string,

    cnt      string,

    round    string

) row format delimited fields terminated by ',';

--向代理商表插入数据

insert into table test_dailishang

select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round

from test_sales_wash

group by sale_nbr, day_id

having sale_nbr like 'O%';

 

--3.统计每天各个代理商的销售活跃度

-- 创建表存放每天代理商卖出的活跃度

create table test_dailishang_mc

(

    day_id   string,

    sale_nbr string,

    count    int

) row format delimited fields terminated by ',';

--插入数据到代理商卖出活跃表

insert into table test_dailishang_mc

select day_id, sale_nbr, count(*) as count

from test_sales_wash

where sale_nbr like 'O%'

group by sale_nbr, day_id;

--创建表存放每天代理商买入的活跃度

create table test_dailishang_mr

(

    day_id   string,

    sale_nbr string,

    count    int

) row format delimited fields terminated by ',';

--插入数据到代理商买入活跃表

insert into table test_dailishang_mr

select day_id, buy_nbr as sale_nbr, count(*) as count

from test_sales_wash

where buy_nbr like 'O%'

group by buy_nbr, day_id;

--创建表统计每天代理商的活跃度

create table test_dailishang_h

(

    day_id      string,

    sale_nbr    string,

    sale_number int

) row format delimited fields terminated by ',';

--插入数据

insert into table test_dailishang_h

select test_dailishang_mc.day_id                           as day_id,

       test_dailishang_mc.sale_nbr                         as sale_nbr,

       test_dailishang_mc.count + test_dailishang_mr.count as sale_number

from test_dailishang_mc

         join test_dailishang_mr on (test_dailishang_mc.sale_nbr = test_dailishang_mr.sale_nbr);

--查看结果

select *

from test_dailishang_h

where day_id = '2021-09-01'

limit 40;

 

--4.汇总统计 9 1 日到 9 15 日之间各个代理商的销售利润。

-- 创建表存放每天代理商卖出的数量和金额

create table test_daiout

(

    day_id   string,

    sale_nbr string,

    cnt      int,

    round    int

) row format delimited fields terminated by ',';

--插入数据

insert into table test_daiout

select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round

from test_sales_wash

where sale_nbr like 'O%'

group by day_id, sale_nbr;

-- 创建表存放每天代理商买入的数量和金额

create table test_daiin

(

    day_id  string,

    buy_nbr string,

    cnt     int,

    round   int

) row format delimited fields terminated by ',';

--插入数据

insert into table test_daiin

select day_id, buy_nbr, sum(cnt) as cnt, sum(round) as round

from test_sales_wash

where buy_nbr like 'O%'

group by day_id, buy_nbr;

--创建表存放每天代理商的销售利润

create table test_daili

(

    day_id   string,

    sale_nbr string,

    incnt    int,

    inround  int,

    outcnt   int,

    outround int,

    lirun    int

) row format delimited fields terminated by ',';

--插入数据

insert into table test_daili

select test_daiout.day_id              as day_id,

       test_daiout.sale_nbr            as sale_nbr,

       test_daiin.cnt                  as incnt,

       test_daiin.round                as inround,

       test_daiout.cnt                 as outcnt,

       test_daiout.round               as outround,

       test_daiout.round - test_daiin.round as lirun

from test_daiout

         join test_daiin on (test_daiin.buy_nbr = test_daiout.sale_nbr);

Mysql中的表

 

 

四:虚拟机中终端执行命令,完成数据的传输

sqoop export \
--connect "jdbc:mysql://node1:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table test_jichang \
--num-mappers 1 \
--export-dir /user/hive/warehouse/test_jichang \
--input-fields-terminated-by ","

每一张表都需要完成上述代码,修改表的名称即可,完成数据从hivemysql的传输

五.完成数据的可视化

数据的可视化由springbootechats完成后台数据的获取和前端数据的展示。

 

 

具体效果图如下

 

 

 

 

posted @   布吉岛???  阅读(5)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
点击右上角即可分享
微信分享提示