大数据-地域维度清洗

将地域代码和地域名称清洗到一列中

 

启动hadoop和hive

创建表

create table tab03(

ID int,

QA04 string,

QA05 string,

QA07 string,

QA15 int,

QA19 int,

HYWD string,

QB03 int,

QB03ONE string,

QB03TWO string,

QB03_1 int,

QB06 int,

QB16 int,

QB16V string,

GXWD string,

QB16_1 int,

QB16_1V string,

QC02 double,

QC05_0 double,

QC24 double,

QC40 double,

QD01 int,

QD28 int,

QJ09 int,

QJ20 int,

QJ55 int,

QJ74 int,

DYWD string,

SYEAR int

)

row format delimited fields terminated by ','

lines terminated by '\n';

 

 

导入文件

load data local inpath ‘/home/lt/file/1.csv' overwrite into table tab03;

create table tab04(

dm int,

dmms string

)

row format delimited fields terminated by ','

lines terminated by '\n';

 

 

load data local inpath '/home/lt/file/2.csv' overwrite into table tab04;

insert overwrite table tab05 select 

ID,

QA04,

QA05,

QA07,

QA15,

QA19,

HYWD,

QB03,

QB03ONE,

QB03TWO ,

QB03_1,

QB06,

QB16,

QB16V,

GXWD,

QB16_1,

QB16_1V,

QC02,

QC05_0,

QC24,

QC40,

QD01,

QD28,

QJ09,

QJ20,

QJ55,

QJ74,

concat(QA19,t4.dmms),

SYEAR

from tab04 t4 join tab03 t3 on (t4.dm=t3.QA19);

 

 

清洗后的数据

 

导入mysql

 

posted @ 2021-11-11 12:48  学习中_1  阅读(67)  评论(0编辑  收藏  举报