4.明细宽表_项目一

明细宽表的构建及数据加载

-- 明细宽表 相当于在贴源数据表的基础之上,增加了9个字段,时间字段增加6个,请求字段增加了3个
create external table if not exists web_detail(
	ipaddr string comment "ip address",
	visit_time string comment "日志的产生时间",
	visit_date string comment "日志的产生日期 年 月 日",
	visit_time_str string comment "日志产生的时间 时 分 秒",
	visit_year string comment "日志产生的年份",
	visit_month string comment "日志产生的月份",
	visit_day string comment "日志产生的天",
	visit_hour string comment "日志产生的时",
	request_url string comment "请求的网址",
	request_host string comment "请求的域名",
	request_path string comment "请求的资源路径",
	request_query string comment "请求的携带参数",
	status int comment "网站的响应状态码",
	body_bytes int comment "响应字节数",
	referer_url string comment "请求网址的来源网站",
	user_agent string comment "用户的浏览信息",
	province string comment "用户访问时所处的省份",
	latitude string comment "纬度",
	longitude string comment "经度",
	age int comment "年龄"
)
partitioned by(logdate string)
row format delimited
fields terminated by ',';

desc formatted web_detail;

insert into table web_detail partition(logdate="${hiveconf:yesterday}") 
select 
	ipaddr,
	visit_time,
	date_format(visit_time, "yyyy-MM-dd") as visit_date,
	date_format(visit_time, "HH:mm:ss") as visit_time_str,
	date_format(visit_time, "yyyy") as visit_year,
	date_format(visit_time, "MM") as visit_month,
	date_format(visit_time, "dd") as visit_day,
	date_format(visit_time, "HH") as visit_hour,
	request_url,
	tmp.request_host as request_host,
	tmp.request_path as request_path,
	tmp.request_query as request_query,
	status,
	body_bytes,
	referer_url,
	user_agent,
	province,
	latitude,
	longitude,
	age
from web_origin
lateral view parse_url_tuple(request_url, "HOST", "PATH", "QUERY") tmp as request_host, request_path, request_query
where logdate="${hiveconf:yesterday}";

-- select * from web_detail limit 11;

明细宽表数据添加及数据统计

/*
 * 统计分析
 */
-- 1. 统计网站每个月用户的访问量指标
-- 创建指标结果的数据存储表 指标表一般都是内部表,而且不需要分区
create table if not exists month_pvs(
	visit_year string,
	visit_month string,
	pvs int
) row format delimited 
fields terminated by ',';

-- 将查询结果增加到指标表中
insert overwrite table month_pvs 
select visit_month, visit_year, count(*) as pvs
from web_detail
group by visit_month, visit_year;

-- 查询结果
select * from month_pvs limit 11;
posted @ 2022-08-09 23:39  jsqup  阅读(76)  评论(0编辑  收藏  举报