大数据测试

1、数据导入:

要求将样表文件中的sales_sample_20170310数据导入HIVE数据仓库中。

2、数据清洗:

要求将day_id一列中的数值清洗为真实的日期格式,可用字符串表示。

数据1对应日期2021-09-01,依次类推15对应日期2021-09-15

3数据分析处理:

1)统计每天各个机场的销售数量和销售金额。

要求的输出字段

day_id,sale_nbr,,cnt,round

日期编号,卖出方代码,数量,金额

2)统计每天各个代理商的销售数量和销售金额

要求的输出字段

day_id,sale_nbr,,cnt,round

日期编号,卖出方代码,数量,金额

3)统计每天各个代理商的销售活跃度。

要求的输出字段

day_id,sale_nbr, sale_number

日期编号,卖出方代码,交易次数买入或者卖出均算交易次数)

4)汇总统计9月1日9月15日之间各个代理商的销售利润。

编号,卖出方代码,买入数量,买入金额卖出数量,卖出金额,销售利润(卖出金额-买入金额)

5)设计分析代理商的市场地位根据市场交易次数、交易对象个数销售机票数量、销售利润等。(选做题

4处理结果入库

上述统计分析的结果数据保存到mySQL数据库中。

5、数据可视化展示:

   利用Echarts上述统计结果以图形化展示的方式展现出来:饼图、柱状图、地图、折线图等

 

 

  1 CREATE TABLE IF NOT EXISTS sales_sample (
  2   day_id STRING,
  3   sale_nbr STRING,
  4   buy_nbr STRING,
  5   cnt INT,
  6   round INT
  7 )
  8 ROW FORMAT DELIMITED
  9 FIELDS TERMINATED BY ','
 10 STORED AS TEXTFILE;
 11 
 12 
 13 CREATE  TABLE IF NOT EXISTS target_sales_sample (
 14   day_id STRING,
 15   sale_nbr STRING,
 16   buy_nbr STRING,
 17   cnt INT,
 18   round INT
 19 )
 20 ROW FORMAT DELIMITED
 21 FIELDS TERMINATED BY ','
 22 STORED AS TEXTFILE;
 23 
 24 -- -- 将数据上传到HDFS
 25 -- -- hadoop fs -put employees.csv /
 26 
 27 -- -- 导入数据到Hive表
 28 LOAD DATA INPATH '/xiyou/sales_sample_20170310.csv' INTO TABLE target_sales_sample;
 29 --
 30 -- -- 验证数据导入
 31 SELECT * FROM  target_sales_sample ;
 32 SELECT * FROM  sales_sample ;
 33 -- -- 使用Hive的UDF将day_id映射为日期格式
 34 
 35 insert overwrite table sales_sample
 36 select
 37     date_add('2023-09-00',cast(day_id as int)) as day_id,
 38     sale_nbr as sale_nbr,
 39     buy_nbr as buy_nbr,
 40     cnt as cnt,
 41     round as round
 42 from target_sales_sample;
 43 
 44 -- drop table sales_sample;
 45 -- drop table jichang;
 46 
 47 -- (1)统计每天各个机场的销售数量和销售金额。
 48 -- 要求的输出字段
 49 -- day_id,sale_nbr,,cnt,round
 50 -- 日期编号,卖出方代码,数量,金额
 51 
 52 CREATE TABLE IF NOT EXISTS jichang (
 53   day_id STRING,
 54   sale_nbr STRING,
 55   cnt INT,
 56   round INT
 57 )
 58 ROW FORMAT DELIMITED
 59 FIELDS TERMINATED BY ','
 60 STORED AS TEXTFILE;
 61 
 62 insert into table jichang
 63 select
 64     day_id,
 65     sale_nbr,
 66     sum(cnt) as cnt,
 67     sum(round) as round
 68 from sales_sample
 69 group by sale_nbr,day_id having sale_nbr like 'C%';
 70 
 71 SELECT * FROM  jichang ;
 72 
 73 -- (2)统计每天各个代理商的销售数量和销售金额。
 74 -- 要求的输出字段
 75 -- day_id,sale_nbr,,cnt,round
 76 -- 日期编号,卖出方代码,数量,金额
 77 CREATE TABLE IF NOT EXISTS dailishang (
 78   day_id STRING,
 79   sale_nbr STRING,
 80   cnt INT,
 81   round INT
 82 )
 83 ROW FORMAT DELIMITED
 84 FIELDS TERMINATED BY ','
 85 STORED AS TEXTFILE;
 86 
 87 insert into table dailishang
 88 select
 89     day_id,
 90     sale_nbr,
 91     sum(cnt) as cnt,
 92     sum(round) as round
 93 from sales_sample
 94 group by sale_nbr,day_id having sale_nbr like 'O%';
 95 
 96 SELECT * FROM  dailishang ;
 97 
 98 -- (1)创建表存放每天代理商卖出的活跃度
 99 CREATE TABLE IF NOT EXISTS dailishang_active (
100   day_id STRING,
101   sale_nbr STRING,
102   sale_number INT
103 )
104 ROW FORMAT DELIMITED
105 FIELDS TERMINATED BY ','
106 STORED AS TEXTFILE;
107 
108 insert into table dailishang_active
109 select
110     day_id,
111     sale_nbr,
112     count(*) as sale_number
113 from sales_sample
114 group by sale_nbr,day_id having sale_nbr like 'O%';
115 
116 SELECT * FROM  dailishang_active ;
117 
118 -- 2、创建表存放每天代理商买入的活跃度
119 CREATE TABLE IF NOT EXISTS dailishang_active2 (
120   day_id STRING,
121   sale_nbr STRING,
122   sale_number INT
123 )
124 ROW FORMAT DELIMITED
125 FIELDS TERMINATED BY ','
126 STORED AS TEXTFILE;
127 
128 insert into table dailishang_active2
129 select
130     day_id,
131     buy_nbr as sale_nbr,
132     count(*) as sale_number
133 from sales_sample
134 group by buy_nbr,day_id having buy_nbr like 'O%';
135 
136 SELECT * FROM  dailishang_active2 ;
137 
138 -- 3、创建表统计每天代理商活跃度
139 CREATE TABLE IF NOT EXISTS dailishang_huoyue (
140   day_id STRING,
141   sale_nbr STRING,
142   sale_number INT
143 )
144 ROW FORMAT DELIMITED
145 FIELDS TERMINATED BY ','
146 STORED AS TEXTFILE;
147 
148 insert into table dailishang_huoyue
149 select dailishang_active.day_id as day_id,
150        dailishang_active.sale_nbr as sale_nbr,
151        dailishang_active.sale_number+dailishang_active2.sale_number as sale_number
152 from dailishang_active join dailishang_active2 on (dailishang_active.sale_nbr=dailishang_active2.sale_nbr) ;
153 
154 SELECT * FROM  dailishang_huoyue;
155 
156 -- (1)创建表存放每天代理商卖出的数量和金额
157 CREATE TABLE IF NOT EXISTS mai_chu (
158   day_id STRING,
159   sale_nbr STRING,
160   cnt int,
161   round int
162 )
163 ROW FORMAT DELIMITED
164 FIELDS TERMINATED BY ','
165 STORED AS TEXTFILE;
166 
167 insert into table mai_chu
168 select
169     day_id,
170     sale_nbr,
171     sum(cnt) as cnt,
172     sum(round) as round
173 from sales_sample
174 where sale_nbr like 'O%'
175 group by sale_nbr,day_id ;
176 
177 SELECT * FROM  mai_chu ;
178 
179 -- (2)创建表存放每天代理商买入的数量和金额
180 CREATE TABLE IF NOT EXISTS mai_ru (
181   day_id STRING,
182   buy_nbr  STRING,
183   cnt int,
184   round int
185 )
186 ROW FORMAT DELIMITED
187 FIELDS TERMINATED BY ','
188 STORED AS TEXTFILE;
189 
190 insert into table mai_ru
191 select
192     day_id,
193     buy_nbr,
194     sum(cnt) as cnt,
195     sum(round) as round
196 from sales_sample
197 where buy_nbr like 'O%'
198 group by buy_nbr,day_id ;
199 
200 SELECT * FROM  mai_ru ;
201 
202 -- (3)创建表存放每天代理商的销售利润
203 CREATE TABLE IF NOT EXISTS dai_li (
204   day_id STRING,
205   sale_nbr STRING,
206   incnt int,
207   inround int,
208   outcnt int,
209   outround int,
210   lirun int
211 )
212 ROW FORMAT DELIMITED
213 FIELDS TERMINATED BY ','
214 STORED AS TEXTFILE;
215 
216 insert into table dai_li
217 select mai_chu.day_id as day_id,
218        mai_chu.sale_nbr as sale_nbr,
219        mai_ru.cnt as incnt,
220        mai_ru.round as inround,
221        mai_chu.cnt as outcnt,
222        mai_chu.round as outround,
223        mai_chu.round-mai_ru.round as lirun
224 from mai_chu join mai_ru on (mai_ru.buy_nbr=mai_chu.sale_nbr);
225 
226 SELECT * FROM  sales_sample;
227 SELECT * FROM  jichang;
228 SELECT * FROM  dailishang;
229 SELECT * FROM  dailishang_huoyue;
230 SELECT * FROM  dai_li;

 

posted @ 2023-09-23 23:10  旺旺大菠萝  阅读(12)  评论(0编辑  收藏  举报