Hive第二次测试总结
首先看一下题目:
Result文件数据说明:
Ip:106.39.41.166,(城市)
Date:10/Nov/2016:00:01:02 +0800,(日期)
Day:10,(天数)
Traffic: 54 ,(流量)
Type: video,(类型:视频video或文章article)
Id: 8701(视频或者文章的id)
测试要求:
1、 数据清洗:****按照进行数据清洗,并将清洗后的数据导入hive数据库中。
两阶段数据清洗:
(1)第一阶段:把需要的信息从原始日志中提取出来
ip: 199.30.25.88
time: 10/Nov/2016:00:01:03 +0800
traffic: 62
文章: article/11325
视频: video/3235
(2)第二阶段:根据提取出来的信息做精细化操作
ip--->城市 city(IP)
date--> time:2016-11-10 00:01:03
day: 10
traffic:62
type:article/video
id:11325
(3)hive数据库表结构:
create table data( ip string, time string , day string, traffic bigint,
type string, id string )
2、****数据分析:在HIVE统计下列数据。
(1)统计最受欢迎的视频/文章的Top10访问次数 (video/article)
(2)按照地市统计最受欢迎的Top10课程 (ip)
(3)按照流量统计最受欢迎的Top10课程 (traffic)
3、数据可视化:
将统计结果倒入MySql数据库中,通过图形化展示的方式展现出来。
清洗数据我就不作过多赘述,首先记一下sqoop上传mysql的表到hive的操作语句:
sqoop import \
--connect jdbc:mysql://node1:3306/hive2 \
--username root \
--password 20204112 \
--table table \
--num-mappers 1 \
--hive-import \
--fields-terminated-by "\t" \
--hive-overwrite \
--hive-table hive2.result (hive2是数据库名,result是表名)
上传好后就可以在Hive里对数据进行处理,首先是建立四个表,我分别命名为table,table1,table2,table3:
create table table (
ip string,
data string,
day string,
traffic string,
type string,
id string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table1 (
traffic string,
id string,
type string,
times string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table2 (
ip string,
id string,
type string,
traffic string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table3 (
ip string,
id string,
type string,
ips string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
然后就可以开始处理数据了,就是按照要求向表里插入处理好的数据:
insert overwrite table table1 SELECT
SUM( traffic ) AS traffic,
id,
type,
count(id) as times
FROM
`table`
GROUP BY
id,type
ORDER BY
traffic DESC ;
insert overwrite table table2 SELECT ip,id,type,sum(traffic)as traff FROM `table` GROUP BY ip,id,type ORDER BY id,sum(traffic) DESC ;
insder overwrite table table3 SELECT ip,id,type,count(ip) as ips FROM `table` GROUP BY ip,id,type ORDER BY count(ip) DESC;
处理好数据后还需要将Hive里的表导入到mysql里,导出语句如下:
sqoop export \
--connect jdbc:mysql://node1:3306/hive2 \
--username root \
--password 20204112 \
--table table3 \
--num-mappers 1 \
--export-dir /user/hive/warehouse/hive2.db/table3 \
--input-fields-terminated-by ","
三张表都要执行以上操作。
接下来就是Java以及Html代码:
Dao:
package org.example.Dao;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import org.example.DBUtils.DBUtil;
import org.example.Bean.HiveBean;
public class HiveDao {
public List<HiveBean> table1() {
String sql1 = "SELECT traffic,id,times FROM `table1` WHERE type = 'video' LIMIT 10";
List<HiveBean> list = new ArrayList<>();
Connection conn = DBUtil.getConn();
Statement state = null;
ResultSet rs = null;
try {
state = conn.createStatement();
rs = state.executeQuery(sql1);
HiveBean bean = null;
while (rs.next()) {
String traffic = rs.getString("traffic");
String id = rs.getString("id");
String times = rs.getString("times");
System.out.println(traffic);
System.out.println(id);
System.out.println(times);
bean = new HiveBean(traffic,id,times);
list.add(bean);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
DBUtil.close(conn, state, rs);
}
return list;
}
public List<HiveBean> table1_1() {
String sql = "SELECT traffic,id,times FROM `table1` WHERE type = 'article' LIMIT 10";
List<HiveBean> list = new ArrayList<>();
Connection conn = DBUtil.getConn();
Statement state = null;
ResultSet rs = null;
try {
state = conn.createStatement();
rs = state.executeQuery(sql);
HiveBean bean = null;
while (rs.next()) {
String traffic = rs.getString("traffic");
String id = rs.getString("id");
String times = rs.getString("times");
System.out.println(traffic);
System.out.println(id);
System.out.println(times);
bean = new HiveBean(traffic,id,times);
list.add(bean);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
DBUtil.close(conn, state, rs);
}
return list;
}
public List<HiveBean> table2() {
String sql = "SELECT ip,ips FROM `table3` LIMIT 10";
List<HiveBean> list = new ArrayList<>();
Connection conn = DBUtil.getConn();
Statement state = null;
ResultSet rs = null;
try {
state = conn.createStatement();
rs = state.executeQuery(sql);
HiveBean bean = null;
while (rs.next()) {
String ip = rs.getString("ip");
String ips = rs.getString("ips");
System.out.println(ip);
System.out.println(ips);
bean = new HiveBean(ip,ips);
list.add(bean);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
DBUtil.close(conn, state, rs);
}
return list;
}
public List<HiveBean> table3() {
String sql = "SELECT traffic,id,type FROM `table1` LIMIT 10";
List<HiveBean> list = new ArrayList<>();
Connection conn = DBUtil.getConn();
Statement state = null;
ResultSet rs = null;
try {
state = conn.createStatement();
rs = state.executeQuery(sql);
HiveBean bean = null;
while (rs.next()) {
String traffic = rs.getString("traffic");
String id = rs.getString("id");
String type = rs.getString("type");
System.out.println(traffic);
System.out.println(id);
System.out.println(type);
bean = new HiveBean(traffic,id,type);
list.add(bean);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
DBUtil.close(conn, state, rs);
}
return list;
}
}
Bean:
package org.example.Bean;
public class HiveBean {
private String ip;
private String id;
private String traffic;
private String type;
private String ips;
private String times;
public HiveBean() {
super();
}
public HiveBean(String id) {
super();
this.id = id;
}
public HiveBean(String ip, String ips) {
super();
this.id = ip;
this.ips = ips;
}
public HiveBean(String traffic, String id, String times) {
super();
this.traffic = traffic;
this.id = id;
this.times = times;
}
public HiveBean(String ip, String id, String type, String ips) {
super();
this.ip = ip;
this.id = id;
this.type = type;
this.ips = ips;
}
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTraffic() {
return traffic;
}
public void setTraffic(String traffic) {
this.traffic = traffic;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getIps() {
return ips;
}
public void setIps(String ips) {
this.ips = ips;
}
public String getTimes() {
return times;
}
public void setTimes(String times) {
this.times = times;
}
@Override
public String toString() {
return "HiveBean{" +
"ip='" + ip + '\'' +
", id='" + id + '\'' +
", traffic='" + traffic + '\'' +
", type='" + type + '\'' +
", ips='" + ips + '\'' +
", times='" + times + '\'' +
'}';
}
}
DBUtil:
package org.example.DBUtils;
import java.io.*;
import java.util.Properties;
import java.sql.*;
public class DBUtil {
private static String driver;
private static String url;
private static String user;
private static String pass;
static {
//读取db.properties文件的内容
InputStream is = DBUtil.class.getClassLoader().getResourceAsStream("db.properties");
//创建properties类型的对象
Properties p = new Properties();
try {
p.load(is);
driver = p.getProperty("driver");
url = p.getProperty("url");
user = p.getProperty("username");
pass = p.getProperty("password");
//加载数据库的驱动
Class.forName(driver);
System.out.println("驱动加载成功");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static Connection getConn() {
Connection conn = null;
try {
//连接数据库
conn = DriverManager.getConnection(url, user, pass);
System.out.println("数据库连接成功");
} catch (Exception e) {
e.printStackTrace();
System.out.println("数据库连接失败");
}
return conn;
}
public static void close(PreparedStatement ps, Connection conn) {
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public static void close(ResultSet result, PreparedStatement ps, Connection conn) {
if (result != null) {
try {
result.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public static void close(Connection conn, Statement state, ResultSet result) {
try {
if (result != null) {
result.close();
result = null;
}
if (state != null) {
state.close();
state = null;
}
if (conn != null) {
conn.close();
conn = null;
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) throws SQLException {
Connection conn = getConn();
Statement state = conn.createStatement();
ResultSet result = state.executeQuery("select id,type from table1 limit 10");
while (result.next()) {
System.out.print(result.getString("type") + " ");
System.out.print(result.getString("id"));
}
}
}
Servlet:
package org.example.Servlet;
import java.io.IOException;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.example.Dao.HiveDao;
import org.example.Bean.HiveBean;
@WebServlet("/HiveServlet")
public class HiveServlet extends HttpServlet {
/**
* 特有id号
*/
private static final long serialVersionUID = 1L;
HiveDao dao = new HiveDao();
/**
* 方法选择
* @return
* @throws IOException
* @throws ServletException
*/
protected void service(HttpServletRequest req,HttpServletResponse resp) throws ServletException, IOException
{
req.setCharacterEncoding("utf-8");
String method = req.getParameter("method");
if("table1".equals(method)) {
table1(req,resp);
}else if("table1_1".equals(method)) {
table1_1(req,resp);
}else if("table2".equals(method)) {
table2(req,resp);
}else if("table3".equals(method)) {
table3(req,resp);
}
}
private void table1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
// TODO Auto-generated method stub
req.setCharacterEncoding("utf-8");
List<HiveBean> table1 = dao.table1();
req.setAttribute("table1", table1);
req.getRequestDispatcher("table1.jsp").forward(req, resp);
}
private void table1_1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
req.setCharacterEncoding("utf-8");
List<HiveBean> table1_1 = dao.table1_1();
req.setAttribute("table1_1",table1_1);
req.getRequestDispatcher("table1.jsp").forward(req, resp);
}
private void table2(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
req.setCharacterEncoding("utf-8");
List<HiveBean> table2 = dao.table2();
req.setAttribute("table2", table2);
req.getRequestDispatcher("table2.jsp").forward(req, resp);
}
private void table3(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
req.setCharacterEncoding("utf-8");
List<HiveBean> table3 = dao.table3();
req.setAttribute("table3", table3);
req.getRequestDispatcher("table3.jsp").forward(req, resp);
}
}
index.jsp:
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>查询</title>
<script src="layui/jquery.min.js"></script>
<style>
.a{
margin-bottom: 20px;
}
.b{
margin-top:45px;
margin-bottom: 20px;
}
.c{
width:300px;
height:240px;
background:rgb(195,195,195);
position:absolute;
top:30%;
left:50%;
/*transform:translaet(-50%,-50%);*/
}
.d{
font-size:5px;
color:black;
font-family:"宋体";
}
.e{
width:240px;
height:30px;
margin-left:30px;
background-color:white;
}
.f{
margin-left:30px;
margin-top:45px;
margin-bottom: 20px;
}
.g{
margin-left:30px;
margin-bottom: 20px;
}
</style>
</head>
<body>
<div class="c">
<form action="HiveServlet?method=table1" method="post" onsubmit="return check()">
<div>
<input type="submit" class="e" value="table1">
</div>
</form>
<form action="HiveServlet?method=table2" method="post" onsubmit="return check()">
<div>
<input type="submit" class="e" value="table2">
</div>
</form>
<form action="HiveServlet?method=table3" method="post" onsubmit="return check()">
<div>
<input type="submit" class="e" value="table3">
</div>
</form>
</div>
</body>
</html>
table1.jsp:
<%--
Created by IntelliJ IDEA.
User: 16475
Date: 2022/10/14
Time: 21:08
To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
<title>最受欢迎的文章TOP10</title>
<script src="layui/jquery.min.js"></script>
<script src="echarts/dist/echarts.js"></script>
<style>
*{margin:0;padding:0}
html,body{
width:auto;
height:auto;
}
#main1{
width:1500px;
height:700px;
margin:auto;
border:1px solid #ddd;
}
#main2{
width:1500px;
height:700px;
margin:800px;
border:1px solid #ddd;
}
/*默认长宽比0.75*/
</style>
</head>
<body>
<%
Object message = request.getAttribute("message");
if(message!=null && !"".equals(message)){
%>
<script type="text/javascript">
alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main1">
</div>
<div id="main2">
</div>
<script type="text/javascript">
var myChart = echarts.init(document.getElementById('main1'));
var arr1 = new Array();
var arr2 = new Array();
var index1 = 0;
var index2 = 0;
<c:forEach items="${table1}" var="table1">
arr1[index1++] = ${table1.times};
</c:forEach>
<c:forEach items="${table1_1}" var="table1_1">
arr2[index2++] = ${table1_1.times};
</c:forEach>
var option1 = {
title: {
text: '最受欢迎的文章TOP10'
},
tooltip: {
},
legend: {
data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]
},
xAxis : [
{
type : 'category',
data :[
<c:forEach items="${table1}" var="table1">
["${table1.id}"],
</c:forEach>
],
<%--data : [--%>
<%-- name="article",--%>
<%-- <c:forEach items="${table1_1}" var="table1_1">--%>
<%-- ["${table1_1.id}"],--%>
<%-- </c:forEach>--%>
<%--]--%>
}
],
yAxis : [
{
type : 'value'
}
],
series : [
{
name:'video',
type:'bar',
data: arr1
}
],
// series : [
// {
// name:'article',
// type:'bar',
// data: arr2
// }
// ]
};
<%--var myChart = echarts.init(document.getElementById('main2'));--%>
<%--var option2 = {--%>
<%-- title: {--%>
<%-- text: '最受欢迎的文章TOP10'--%>
<%-- },--%>
<%-- tooltip: {--%>
<%-- },--%>
<%-- legend: {--%>
<%-- data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]--%>
<%-- },--%>
<%-- xAxis : [--%>
<%-- {--%>
<%-- type : 'category',--%>
<%-- data : [--%>
<%-- name="article",--%>
<%-- <c:forEach items="${table1_1}" var="table1_1">--%>
<%-- ["${table1_1.id}"],--%>
<%-- </c:forEach>--%>
<%-- ]--%>
<%-- }--%>
<%-- ],--%>
<%-- yAxis : [--%>
<%-- {--%>
<%-- type : 'value'--%>
<%-- }--%>
<%-- ],--%>
<%-- series : [--%>
<%-- {--%>
<%-- name:'article',--%>
<%-- type:'bar',--%>
<%-- data: arr2--%>
<%-- }--%>
<%-- ]--%>
<%--};--%>
myChart.setOption(option1);
myChart.setOption(option2);
// myChart.on('click', function (params) {
// var url = "HiveServlet?method=searchTime&day_id=" + params.name;
// window.location.href = url;
// });
</script>
</body>
</html>
table2.jsp:
<%--
Created by IntelliJ IDEA.
User: 16475
Date: 2022/10/14
Time: 21:08
To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
<title>年度汇总</title>
<script src="layui/jquery.min.js"></script>
<script src="echarts/dist/echarts.js"></script>
<style>
*{margin:0;padding:0}
html,body{
width:auto;
height:auto;
}
#main{
width:1500px;
height:700px;
margin:auto;
border:1px solid #ddd;
}
/*默认长宽比0.75*/
</style>
</head>
<body>
<%
Object message = request.getAttribute("message");
if(message!=null && !"".equals(message)){
%>
<script type="text/javascript">
alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main">
</div>
<script type="text/javascript">
var myChart = echarts.init(document.getElementById('main'));
var arr = new Array();
var index = 0;
<c:forEach items="${table2}" var="table2">
arr[index++] = ${table2.ips};
</c:forEach>
var option = {
title: {
text: '城市排名'
},
tooltip: {
},
legend: {
data:['城市排名']
},
xAxis : [
{
type : 'category',
data : [
<c:forEach items="${table2}" var="table2">
["${table2.ip}"],
</c:forEach>
]
}
],
yAxis : [
{
type : 'value'
}
],
series : [
{
name:'访问次数',
type:'bar',
data: arr
}
]
};
myChart.setOption(option);
// myChart.on('click', function (params) {
// var url = "HiveServlet?method=searchTime&day_id=" + params.name;
// window.location.href = url;
// });
</script>
</body>
</html>
table3.jsp:
<%--
Created by IntelliJ IDEA.
User: 16475
Date: 2022/10/14
Time: 21:08
To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
<title>年度汇总</title>
<script src="layui/jquery.min.js"></script>
<script src="echarts/dist/echarts.js"></script>
<style>
*{margin:0;padding:0}
html,body{
width:auto;
height:auto;
}
#main{
width:1500px;
height:700px;
margin:auto;
border:1px solid #ddd;
}
/*默认长宽比0.75*/
</style>
</head>
<body>
<%
Object message = request.getAttribute("message");
if(message!=null && !"".equals(message)){
%>
<script type="text/javascript">
alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main">
</div>
<script type="text/javascript">
var myChart = echarts.init(document.getElementById('main'));
var arr = new Array();
var index = 0;
<c:forEach items="${table3}" var="table3">
arr[index++] = ${table3.traffic};
</c:forEach>
var option = {
title: {
text: '排名'
},
tooltip: {
},
legend: {
data:['排名']
},
xAxis : [
{
type : 'category',
data : [
<c:forEach items="${table3}" var="table3">
["${table3.id}"],
</c:forEach>
]
}
],
yAxis : [
{
type : 'value'
}
],
series : [
{
name:'id',
type:'bar',
data: arr
}
]
};
myChart.setOption(option);
// myChart.on('click', function (params) {
// var url = "HiveServlet?method=searchTime&day_id=" + params.name;
// window.location.href = url;
// });
</script>
</body>
</html>
运行结果如下所示: