第一周总结——大数据清洗范例

create database iptest;

use iptest;
//首先创建和文件相对应的表结构
create table data(
`ip` string,
`Date` string,
`day` string,
`traffic` string,
`type` string,
`id` string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
//接着将文件内容导入该表(预先将文件放入虚拟机的/export/data目录下)
load data local inpath '/export/data/result.csv' into table data;



create table video_article as
(select
type,
id,
count(*) as times
from data
group by type, id
order by times DESC
limit 10);

select * from video_article;

create table city_test as
(select
CONCAT(split(ip,'\\.')[0],'.',split(ip,'\\.')[1]) as city,
ip,
traffic,
type,
id
from data);

select * from city_test limit 10;

create table city as
select
city,
type,
id,
count(*) as cishu
from city_test
group by city,type,id
order by cishu DESC
limit 10;

select * from city;

create table traffic as
select
type,
id,
sum(traffic) as liulian
from city_test
group by type,id
order by liulian DESC
limit 10;

select * from traffic;
posted @   Joranger  阅读(2)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 使用C#创建一个MCP客户端
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示