每日随笔——hive数据分析1

根据上篇博客发布的问题,进行题目解答

1、数据导入

复制代码
Create table test2(day_id varchar(30), sale_nbr varchar(30), buy_nbr varchar(30), cnt varchar(30), round varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES

    (

        "separatorChar"=","

    )

STORED AS TEXTFILE;

load data local inpath '/export/server/sales.csv' into table test2;

 
复制代码

2、数据清洗

复制代码
Create table test3(day_id varchar(30), sale_nbr varchar(30), buy_nbr varchar(30), cnt varchar(30), round varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES

    (

        "separatorChar"=","

    )

STORED AS TEXTFILE;#导入清洗后的数据insert overwrite table test3 select date_add('2022-10-00',cast(day_id AS INT)) as day_id,sale_nbr,buy_nbr,cnt,round from test2;
复制代码

3、数据分析

3.1统计每天各个机场的销售数量和销售金额

复制代码
Create table text3_1(day_id varchar(30), sale_nbr varchar(30), cnt varchar(30), round varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES

    (

        "separatorChar"=","

    )

STORED AS TEXTFILE;#数据导入
insert overwrite table text3_1

SELECT

  day_id,

  sale_nbr,

  SUM(cnt),

  SUM(round)

FROM

  test3

WHERE sale_nbr LIKE 'C%'

GROUP BY day_id,sale_nbr;
复制代码

3.2统计每天各个代理商的销售数量和销售金额

复制代码
#建表
Create table text3_2(day_id varchar(30), sale_nbr varchar(30), cnt varchar(30), round varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES

    (

        "separatorChar"=","

    )

STORED AS TEXTFILE;#数据导入insert overwrite table text3_2SELECT  day_id,  sale_nbr,   SUM(cnt),  SUM(round)FROM  test3WHERE sale_nbr LIKE "O%"GROUP BY day_id,sale_nbr;
复制代码

3.3统计每天各个代理商的销售活跃度

复制代码
#建表
Create table text3_3(day_id varchar(30), sale_nbr varchar(30), sale_number varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES

    (

        "separatorChar"=","

    )

STORED AS TEXTFILE;#导入数据insert overwrite table text3_3SELECT  day_id,  sale_nbr,  COUNT(sale_nbr)FROM  test3WHERE sale_nbr LIKE "O%"GROUP BY day_id,sale_nbr;
复制代码

3.4汇总统计10月1日到10月15日之间各个代理商的销售利润(先将数据导入,然后重新建表,取出部分数据)

create table daiout as select day_id as day_id,sale_nbr as sale_nbr,sum(cnt)as cnt,sum(round)as round from test3 where sale_nbr like 'O%' group by day_id,sale_nbr;create table daiin as select day_id as day_id,buy_nbr as buy_nbr,sum(cnt)as cnt,sum(round)as round from test3 where buy_nbr like 'O%' group by day_id,buy_nbr;create table daili as select daiin.day_id as day_id,daiout.sale_nbr as sale_nbr,daiin.cnt as incnt,daiin.round as inround,daiout.cnt as outcnt,daiout.round as outround,daiout.round-daiin.round as li from daiin join daiout on (daiin.buy_nbr=daiout.sale_nbr);create table profit(day_id varchar(200),sale_nbr VARCHAR(200),incnt double,inround double,outcnt double,outround double,li double);insert overwrite table profit select * from daili where day_id between '2022-10-01' and '2022-10-15';

 

posted @   伽澄  阅读(19)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
历史上的今天:
2022-09-23 如何生成随机数
点击右上角即可分享
微信分享提示