每日总结04
Hive测试的流程报告
一:启动虚拟机中的hadoop、zookeeper、hive和yarn
A start 脚本用于启动hadoop、zookeeper、hbase。
用命令启动hive的两个服务
启动yarn
二:将所需要的数据上传至node1:9870并完成清洗
三:hive数据库连接datagrip,执行相应的数据库操作语句,同时mysql数据库中创建一样的表。完成数据的清洗,和方便后面的操作,数据的传输。
-- 创建初始表
create table testSales
(
day_id string,
sale_nbr string,
buy_nbr string,
cnt string,
round string
)
row format delimited fields terminated by ',';
-- 将本地数据导入初始表中(先将数据文件上传到虚拟机里)
load data inpath '/user/hive/warehouse/sales_sample_20170310.csv' into table testSales;
--创建表格存放清洗后的数据
create table test_sales_wash
(
day_id string,
sale_nbr string,
buy_nbr string,
cnt string,
round string
)
row format delimited fields terminated by ',';
-- 插入清洗后的数据
insert overwrite table test_sales_wash
select date_add('2021-09-00', cast(day_id as int)) as day_id,
sale_nbr as sale_nbr,
buy_nbr as buy_nbr,
cnt as cnt,
round as round
from testSales;
-- 1.统计每天各个机场的销售数量和销售金额。
-- 创建机场表存放数据
create table test_jichang
(
day_id string,
sale_nbr string,
cnt string,
round string
) row format delimited fields terminated by ',';
--向机场表插入数据
insert into table test_jichang
select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round
from test_sales_wash
group by sale_nbr, day_id
having sale_nbr like 'C%';
--2.统计每天各个代理商的销售数量和销售金额。
--创建代理商表存放数据
create table test_dailishang
(
day_id string,
sale_nbr string,
cnt string,
round string
) row format delimited fields terminated by ',';
--向代理商表插入数据
insert into table test_dailishang
select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round
from test_sales_wash
group by sale_nbr, day_id
having sale_nbr like 'O%';
--3.统计每天各个代理商的销售活跃度
-- 创建表存放每天代理商卖出的活跃度
create table test_dailishang_mc
(
day_id string,
sale_nbr string,
count int
) row format delimited fields terminated by ',';
--插入数据到代理商卖出活跃表
insert into table test_dailishang_mc
select day_id, sale_nbr, count(*) as count
from test_sales_wash
where sale_nbr like 'O%'
group by sale_nbr, day_id;
--创建表存放每天代理商买入的活跃度
create table test_dailishang_mr
(
day_id string,
sale_nbr string,
count int
) row format delimited fields terminated by ',';
--插入数据到代理商买入活跃表
insert into table test_dailishang_mr
select day_id, buy_nbr as sale_nbr, count(*) as count
from test_sales_wash
where buy_nbr like 'O%'
group by buy_nbr, day_id;
--创建表统计每天代理商的活跃度
create table test_dailishang_h
(
day_id string,
sale_nbr string,
sale_number int
) row format delimited fields terminated by ',';
--插入数据
insert into table test_dailishang_h
select test_dailishang_mc.day_id as day_id,
test_dailishang_mc.sale_nbr as sale_nbr,
test_dailishang_mc.count + test_dailishang_mr.count as sale_number
from test_dailishang_mc
join test_dailishang_mr on (test_dailishang_mc.sale_nbr = test_dailishang_mr.sale_nbr);
--查看结果
select *
from test_dailishang_h
where day_id = '2021-09-01'
limit 40;
--4.汇总统计 9 月 1 日到 9 月 15 日之间各个代理商的销售利润。
-- 创建表存放每天代理商卖出的数量和金额
create table test_daiout
(
day_id string,
sale_nbr string,
cnt int,
round int
) row format delimited fields terminated by ',';
--插入数据
insert into table test_daiout
select day_id, sale_nbr, sum(cnt) as cnt, sum(round) as round
from test_sales_wash
where sale_nbr like 'O%'
group by day_id, sale_nbr;
-- 创建表存放每天代理商买入的数量和金额
create table test_daiin
(
day_id string,
buy_nbr string,
cnt int,
round int
) row format delimited fields terminated by ',';
--插入数据
insert into table test_daiin
select day_id, buy_nbr, sum(cnt) as cnt, sum(round) as round
from test_sales_wash
where buy_nbr like 'O%'
group by day_id, buy_nbr;
--创建表存放每天代理商的销售利润
create table test_daili
(
day_id string,
sale_nbr string,
incnt int,
inround int,
outcnt int,
outround int,
lirun int
) row format delimited fields terminated by ',';
--插入数据
insert into table test_daili
select test_daiout.day_id as day_id,
test_daiout.sale_nbr as sale_nbr,
test_daiin.cnt as incnt,
test_daiin.round as inround,
test_daiout.cnt as outcnt,
test_daiout.round as outround,
test_daiout.round - test_daiin.round as lirun
from test_daiout
join test_daiin on (test_daiin.buy_nbr = test_daiout.sale_nbr);
Mysql中的表
四:虚拟机中终端执行命令,完成数据的传输
sqoop export \
--connect "jdbc:mysql://node1:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table test_jichang \
--num-mappers 1 \
--export-dir /user/hive/warehouse/test_jichang \
--input-fields-terminated-by ","
每一张表都需要完成上述代码,修改表的名称即可,完成数据从hive到mysql的传输
五.完成数据的可视化
数据的可视化由springboot和echats完成后台数据的获取和前端数据的展示。
具体效果图如下
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署