北京市政百姓信件分析--数据清洗

-- 建表
create table xingfang(
kind string,
`time` string,
processingDepartment string,
content string
)
row format delimited fields terminated by ',';

 


-- 数据导入
load data local inpath '/home/onesec/xinfang_data.csv' into table xingfan

 


-- 每年的信件数量 统计和导出
insert overwrite local directory '/home/onesec/result1.csv'
row format delimited fields terminated by ','
stored as textfile
select year(replace(`time`,'/','-')) year,count(*) cnt from xingfang group by year(replace(`time`,'/','-'))

 

 

 

 

 


-- 信件种类及数量 统计和导出
insert overwrite local directory '/home/onesec/result2.csv'
row format delimited fields terminated by ','
stored as textfile
select kind,count(*) cnt from xingfang group by kind;

 

 

 

-- 政府部门及处理数量的信件数量 统计和导出
insert overwrite local directory '/home/onesec/result3.csv'
row format delimited fields terminated by ','
stored as textfile
select replace(processingDepartment,'"',""),count(*) cnt from xingfang group by replace(processingDepartment,'"',"");

 

 

 

 

posted @ 2023-06-12 10:10  清梦韶华  阅读(27)  评论(0编辑  收藏  举报