每日日报 2021.10.5
完成内容:
1.编写 2019 级课堂测试试卷-数据清洗 地域维度清洗
- 创建一个原数据表和一个行政区代码表
create table xinzhen(dm int, dmms string)ROW format delimited fields terminated by ',';
create table qiye(id int, QA04 string,QA05 string,QA07 string,QA15 string,QA19 int,hangyeweidu string,QB03 int,QB03ONE string,QB03TWO string,QB03_1 int,QB06 int,QB16 int,QB16V string,gaoxinjishuweidu string,QB16_1 int,QB16_1V string,QC02 double,QC05_0 double,QC24 double,QC40 double,QD01 int,QD28 int,QJ09 int,QJ20 double,QJ55 int,QJ74 int,diyuweidu string,SYEAR string)ROW format delimited fields terminated by ',';
- 将xls文件转换为.csv文件,上传.修改字符集解决乱码
load data local inpath '/opt/module/hive/xinzhen.csv' into table xinzhen;
load data local inpath '/opt/module/hive/qiye.csv' into table qiye;
ALTER TABLE xinzhen SET SERDEPROPERTIES ('serialization.encoding'='GBK');