大数据-地域维度清洗
将地域代码和地域名称清洗到一列中
启动hadoop和hive
创建表
create table tab03(
ID int,
QA04 string,
QA05 string,
QA07 string,
QA15 int,
QA19 int,
HYWD string,
QB03 int,
QB03ONE string,
QB03TWO string,
QB03_1 int,
QB06 int,
QB16 int,
QB16V string,
GXWD string,
QB16_1 int,
QB16_1V string,
QC02 double,
QC05_0 double,
QC24 double,
QC40 double,
QD01 int,
QD28 int,
QJ09 int,
QJ20 int,
QJ55 int,
QJ74 int,
DYWD string,
SYEAR int
)
row format delimited fields terminated by ','
lines terminated by '\n';
导入文件
load data local inpath ‘/home/lt/file/1.csv' overwrite into table tab03;
create table tab04(
dm int,
dmms string
)
row format delimited fields terminated by ','
lines terminated by '\n';
load data local inpath '/home/lt/file/2.csv' overwrite into table tab04;
insert overwrite table tab05 select
ID,
QA04,
QA05,
QA07,
QA15,
QA19,
HYWD,
QB03,
QB03ONE,
QB03TWO ,
QB03_1,
QB06,
QB16,
QB16V,
GXWD,
QB16_1,
QB16_1V,
QC02,
QC05_0,
QC24,
QC40,
QD01,
QD28,
QJ09,
QJ20,
QJ55,
QJ74,
concat(QA19,t4.dmms),
SYEAR
from tab04 t4 join tab03 t3 on (t4.dm=t3.QA19);
清洗后的数据
导入mysql