2024-11-21《关于hive总结测试》

Hive第二次测试总结

 

  首先看一下题目:

Result文件数据说明:

Ip:106.39.41.166,(城市)

Date:10/Nov/2016:00:01:02 +0800,(日期)

Day:10,(天数)

Traffic: 54 ,(流量)

Type: video,(类型:视频video或文章article)

Id: 8701(视频或者文章的id)

测试要求:

1、 数据清洗:****按照进行数据清洗,并将清洗后的数据导入hive数据库中。

两阶段数据清洗:

(1)第一阶段:把需要的信息从原始日志中提取出来

ip: 199.30.25.88

time: 10/Nov/2016:00:01:03 +0800

traffic: 62

文章: article/11325

视频: video/3235

(2)第二阶段:根据提取出来的信息做精细化操作

ip--->城市 city(IP)

date--> time:2016-11-10 00:01:03

day: 10

traffic:62

type:article/video

id:11325

(3)hive数据库表结构:

create table data( ip string, time string , day string, traffic bigint,

type string, id string )

2、****数据分析:在HIVE统计下列数据。

(1)统计最受欢迎的视频/文章的Top10访问次数 (video/article)

(2)按照地市统计最受欢迎的Top10课程 (ip)

(3)按照流量统计最受欢迎的Top10课程 (traffic)

3、数据可视化:

将统计结果倒入MySql数据库中,通过图形化展示的方式展现出来。


  清洗数据我就不作过多赘述,首先记一下sqoop上传mysql的表到hive的操作语句:

  sqoop import \
  --connect jdbc:mysql://node1:3306/hive2 \
  --username root \
  --password 20204112 \
  --table table \
  --num-mappers 1 \
  --hive-import \
  --fields-terminated-by "\t" \
  --hive-overwrite \
  --hive-table hive2.result (hive2是数据库名,result是表名)

  上传好后就可以在Hive里对数据进行处理,首先是建立四个表,我分别命名为table,table1,table2,table3:

  create table table (
  ip string,
  data string,
  day string,
  traffic string,
  type string,
  id string
  )
  ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
  create table table1 (
  traffic string,
  id string,
  type string,
  times string
  )
  ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
  create table table2 (
  ip string,
  id string,
  type string,
  traffic string
  )
  ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
  create table table3 (
  ip string,
  id string,
  type string,
  ips string
  )
  ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

  然后就可以开始处理数据了,就是按照要求向表里插入处理好的数据:

  insert overwrite table table1 SELECT
  SUM( traffic ) AS traffic,
  id,
  type,
  count(id) as times
  FROM
  `table`
  GROUP BY
  id,type
  ORDER BY
  traffic DESC ;
  insert overwrite table table2 SELECT ip,id,type,sum(traffic)as traff FROM `table` GROUP BY ip,id,type ORDER BY id,sum(traffic) DESC ;
  insder overwrite table table3 SELECT ip,id,type,count(ip) as ips FROM `table` GROUP BY ip,id,type ORDER BY count(ip) DESC;

  处理好数据后还需要将Hive里的表导入到mysql里,导出语句如下:

  sqoop export \
  --connect jdbc:mysql://node1:3306/hive2 \
  --username root \
  --password 20204112 \
  --table table3 \
  --num-mappers 1 \
  --export-dir /user/hive/warehouse/hive2.db/table3 \
  --input-fields-terminated-by ","

  三张表都要执行以上操作。

  接下来就是Java以及Html代码:

  Dao:

  package org.example.Dao;
   
  import java.sql.Connection;
  import java.sql.PreparedStatement;
  import java.sql.ResultSet;
  import java.sql.SQLException;
  import java.sql.Statement;
  import java.util.ArrayList;
  import java.util.List;
   
  import org.example.DBUtils.DBUtil;
  import org.example.Bean.HiveBean;
   
  public class HiveDao {
   
  public List<HiveBean> table1() {
  String sql1 = "SELECT traffic,id,times FROM `table1` WHERE type = 'video' LIMIT 10";
  List<HiveBean> list = new ArrayList<>();
  Connection conn = DBUtil.getConn();
  Statement state = null;
  ResultSet rs = null;
   
  try {
  state = conn.createStatement();
  rs = state.executeQuery(sql1);
  HiveBean bean = null;
  while (rs.next()) {
   
  String traffic = rs.getString("traffic");
  String id = rs.getString("id");
  String times = rs.getString("times");
  System.out.println(traffic);
  System.out.println(id);
  System.out.println(times);
  bean = new HiveBean(traffic,id,times);
  list.add(bean);
   
  }
  } catch (SQLException e) {
  e.printStackTrace();
  } finally {
  DBUtil.close(conn, state, rs);
  }
   
   
  return list;
  }
   
  public List<HiveBean> table1_1() {
  String sql = "SELECT traffic,id,times FROM `table1` WHERE type = 'article' LIMIT 10";
  List<HiveBean> list = new ArrayList<>();
  Connection conn = DBUtil.getConn();
  Statement state = null;
  ResultSet rs = null;
   
  try {
  state = conn.createStatement();
  rs = state.executeQuery(sql);
  HiveBean bean = null;
  while (rs.next()) {
  String traffic = rs.getString("traffic");
  String id = rs.getString("id");
  String times = rs.getString("times");
  System.out.println(traffic);
  System.out.println(id);
  System.out.println(times);
  bean = new HiveBean(traffic,id,times);
  list.add(bean);
   
  }
  } catch (SQLException e) {
  e.printStackTrace();
  } finally {
  DBUtil.close(conn, state, rs);
  }
   
  return list;
  }
   
  public List<HiveBean> table2() {
  String sql = "SELECT ip,ips FROM `table3` LIMIT 10";
  List<HiveBean> list = new ArrayList<>();
  Connection conn = DBUtil.getConn();
  Statement state = null;
  ResultSet rs = null;
   
  try {
  state = conn.createStatement();
  rs = state.executeQuery(sql);
  HiveBean bean = null;
  while (rs.next()) {
   
  String ip = rs.getString("ip");
  String ips = rs.getString("ips");
  System.out.println(ip);
  System.out.println(ips);
  bean = new HiveBean(ip,ips);
  list.add(bean);
   
  }
  } catch (SQLException e) {
  e.printStackTrace();
  } finally {
  DBUtil.close(conn, state, rs);
  }
   
  return list;
  }
   
   
   
  public List<HiveBean> table3() {
  String sql = "SELECT traffic,id,type FROM `table1` LIMIT 10";
  List<HiveBean> list = new ArrayList<>();
  Connection conn = DBUtil.getConn();
  Statement state = null;
  ResultSet rs = null;
   
  try {
  state = conn.createStatement();
  rs = state.executeQuery(sql);
  HiveBean bean = null;
  while (rs.next()) {
   
  String traffic = rs.getString("traffic");
  String id = rs.getString("id");
  String type = rs.getString("type");
  System.out.println(traffic);
  System.out.println(id);
  System.out.println(type);
  bean = new HiveBean(traffic,id,type);
  list.add(bean);
   
  }
  } catch (SQLException e) {
  e.printStackTrace();
  } finally {
  DBUtil.close(conn, state, rs);
  }
   
  return list;
  }
  }
   

  Bean:

  package org.example.Bean;
   
  public class HiveBean {
   
  private String ip;
   
  private String id;
   
  private String traffic;
   
  private String type;
   
  private String ips;
   
  private String times;
   
   
  public HiveBean() {
  super();
  }
   
  public HiveBean(String id) {
  super();
  this.id = id;
  }
   
  public HiveBean(String ip, String ips) {
  super();
  this.id = ip;
  this.ips = ips;
  }
   
  public HiveBean(String traffic, String id, String times) {
  super();
  this.traffic = traffic;
  this.id = id;
  this.times = times;
  }
   
  public HiveBean(String ip, String id, String type, String ips) {
  super();
  this.ip = ip;
  this.id = id;
  this.type = type;
  this.ips = ips;
  }
   
  public String getIp() {
  return ip;
  }
   
  public void setIp(String ip) {
  this.ip = ip;
  }
   
  public String getId() {
  return id;
  }
   
  public void setId(String id) {
  this.id = id;
  }
   
  public String getTraffic() {
  return traffic;
  }
   
  public void setTraffic(String traffic) {
  this.traffic = traffic;
  }
   
  public String getType() {
  return type;
  }
   
  public void setType(String type) {
  this.type = type;
  }
   
  public String getIps() {
  return ips;
  }
   
  public void setIps(String ips) {
  this.ips = ips;
  }
   
  public String getTimes() {
  return times;
  }
   
  public void setTimes(String times) {
  this.times = times;
  }
   
  @Override
  public String toString() {
  return "HiveBean{" +
  "ip='" + ip + '\'' +
  ", id='" + id + '\'' +
  ", traffic='" + traffic + '\'' +
  ", type='" + type + '\'' +
  ", ips='" + ips + '\'' +
  ", times='" + times + '\'' +
  '}';
  }
  }

  DBUtil:

  package org.example.DBUtils;
   
  import java.io.*;
  import java.util.Properties;
  import java.sql.*;
   
  public class DBUtil {
  private static String driver;
  private static String url;
  private static String user;
  private static String pass;
   
  static {
  //读取db.properties文件的内容
  InputStream is = DBUtil.class.getClassLoader().getResourceAsStream("db.properties");
  //创建properties类型的对象
  Properties p = new Properties();
  try {
  p.load(is);
  driver = p.getProperty("driver");
  url = p.getProperty("url");
  user = p.getProperty("username");
  pass = p.getProperty("password");
  //加载数据库的驱动
  Class.forName(driver);
  System.out.println("驱动加载成功");
  } catch (Exception e) {
  throw new RuntimeException(e);
  }
  }
   
   
  public static Connection getConn() {
   
  Connection conn = null;
  try {
  //连接数据库
  conn = DriverManager.getConnection(url, user, pass);
  System.out.println("数据库连接成功");
  } catch (Exception e) {
  e.printStackTrace();
  System.out.println("数据库连接失败");
  }
  return conn;
  }
   
  public static void close(PreparedStatement ps, Connection conn) {
  if (ps != null) {
  try {
  ps.close();
  } catch (SQLException e) {
  e.printStackTrace();
  }
  }
   
  if (conn != null) {
  try {
  conn.close();
  } catch (SQLException e) {
  e.printStackTrace();
  }
  }
  }
   
  public static void close(ResultSet result, PreparedStatement ps, Connection conn) {
  if (result != null) {
  try {
  result.close();
  } catch (SQLException e) {
  e.printStackTrace();
  }
  }
   
  if (ps != null) {
  try {
  ps.close();
  } catch (SQLException e) {
  e.printStackTrace();
  }
  }
   
  if (conn != null) {
  try {
  conn.close();
  } catch (SQLException e) {
  e.printStackTrace();
  }
  }
  }
   
   
  public static void close(Connection conn, Statement state, ResultSet result) {
  try {
  if (result != null) {
  result.close();
  result = null;
  }
  if (state != null) {
  state.close();
  state = null;
  }
   
  if (conn != null) {
  conn.close();
  conn = null;
  }
   
  } catch (SQLException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
  }
  }
   
  public static void main(String[] args) throws SQLException {
  Connection conn = getConn();
  Statement state = conn.createStatement();
  ResultSet result = state.executeQuery("select id,type from table1 limit 10");
  while (result.next()) {
  System.out.print(result.getString("type") + " ");
  System.out.print(result.getString("id"));
   
  }
   
  }
   
  }
   
   

  Servlet:

  package org.example.Servlet;
   
  import java.io.IOException;
  import java.util.List;
   
  import javax.servlet.ServletException;
  import javax.servlet.annotation.WebServlet;
  import javax.servlet.http.HttpServlet;
  import javax.servlet.http.HttpServletRequest;
  import javax.servlet.http.HttpServletResponse;
   
  import org.example.Dao.HiveDao;
  import org.example.Bean.HiveBean;
   
  @WebServlet("/HiveServlet")
  public class HiveServlet extends HttpServlet {
   
  /**
  * 特有id号
  */
  private static final long serialVersionUID = 1L;
  HiveDao dao = new HiveDao();
  /**
  * 方法选择
  * @return
  * @throws IOException
  * @throws ServletException
  */
  protected void service(HttpServletRequest req,HttpServletResponse resp) throws ServletException, IOException
  {
  req.setCharacterEncoding("utf-8");
  String method = req.getParameter("method");
  if("table1".equals(method)) {
  table1(req,resp);
  }else if("table1_1".equals(method)) {
  table1_1(req,resp);
  }else if("table2".equals(method)) {
  table2(req,resp);
  }else if("table3".equals(method)) {
  table3(req,resp);
  }
  }
  private void table1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
  // TODO Auto-generated method stub
  req.setCharacterEncoding("utf-8");
  List<HiveBean> table1 = dao.table1();
  req.setAttribute("table1", table1);
  req.getRequestDispatcher("table1.jsp").forward(req, resp);
   
  }
   
  private void table1_1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
  req.setCharacterEncoding("utf-8");
  List<HiveBean> table1_1 = dao.table1_1();
  req.setAttribute("table1_1",table1_1);
  req.getRequestDispatcher("table1.jsp").forward(req, resp);
  }
   
  private void table2(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
  req.setCharacterEncoding("utf-8");
  List<HiveBean> table2 = dao.table2();
  req.setAttribute("table2", table2);
  req.getRequestDispatcher("table2.jsp").forward(req, resp);
  }
   
  private void table3(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
  req.setCharacterEncoding("utf-8");
  List<HiveBean> table3 = dao.table3();
  req.setAttribute("table3", table3);
  req.getRequestDispatcher("table3.jsp").forward(req, resp);
  }
  }

  index.jsp:

  <%@ page language="java" contentType="text/html; charset=UTF-8"
  pageEncoding="UTF-8"%>
  <!DOCTYPE html>
  <html>
  <head>
  <meta charset="UTF-8">
  <title>查询</title>
  <script src="layui/jquery.min.js"></script>
   
  <style>
  .a{
  margin-bottom: 20px;
  }
  .b{
  margin-top:45px;
  margin-bottom: 20px;
  }
  .c{
  width:300px;
  height:240px;
  background:rgb(195,195,195);
  position:absolute;
  top:30%;
  left:50%;
  /*transform:translaet(-50%,-50%);*/
  }
  .d{
  font-size:5px;
  color:black;
  font-family:"宋体";
  }
  .e{
  width:240px;
  height:30px;
  margin-left:30px;
  background-color:white;
  }
  .f{
  margin-left:30px;
  margin-top:45px;
  margin-bottom: 20px;
  }
  .g{
  margin-left:30px;
  margin-bottom: 20px;
  }
  </style>
  </head>
  <body>
  <div class="c">
  <form action="HiveServlet?method=table1" method="post" onsubmit="return check()">
   
  <div>
  <input type="submit" class="e" value="table1">
  </div>
  </form>
   
  <form action="HiveServlet?method=table2" method="post" onsubmit="return check()">
   
  <div>
  <input type="submit" class="e" value="table2">
  </div>
  </form>
   
  <form action="HiveServlet?method=table3" method="post" onsubmit="return check()">
   
  <div>
  <input type="submit" class="e" value="table3">
  </div>
  </form>
  </div>
   
   
  </body>
  </html>

  table1.jsp:

  <%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
  --%>
  <%@ page contentType="text/html;charset=UTF-8" language="java" %>
  <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
  <html>
  <head>
  <title>最受欢迎的文章TOP10</title>
  <script src="layui/jquery.min.js"></script>
  <script src="echarts/dist/echarts.js"></script>
  <style>
  *{margin:0;padding:0}
  html,body{
  width:auto;
  height:auto;
  }
  #main1{
  width:1500px;
  height:700px;
  margin:auto;
  border:1px solid #ddd;
  }
  #main2{
  width:1500px;
  height:700px;
  margin:800px;
  border:1px solid #ddd;
  }
  /*默认长宽比0.75*/
  </style>
  </head>
  <body>
  <%
  Object message = request.getAttribute("message");
  if(message!=null && !"".equals(message)){
   
  %>
  <script type="text/javascript">
  alert("<%=request.getAttribute("message")%>");
  </script>
  <%} %>
  <div id="main1">
   
  </div>
   
  <div id="main2">
   
  </div>
  <script type="text/javascript">
   
  var myChart = echarts.init(document.getElementById('main1'));
   
  var arr1 = new Array();
  var arr2 = new Array();
   
  var index1 = 0;
  var index2 = 0;
   
  <c:forEach items="${table1}" var="table1">
  arr1[index1++] = ${table1.times};
  </c:forEach>
   
  <c:forEach items="${table1_1}" var="table1_1">
  arr2[index2++] = ${table1_1.times};
  </c:forEach>
  var option1 = {
  title: {
  text: '最受欢迎的文章TOP10'
  },
  tooltip: {
  },
  legend: {
  data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]
  },
  xAxis : [
  {
  type : 'category',
  data :[
  <c:forEach items="${table1}" var="table1">
  ["${table1.id}"],
  </c:forEach>
  ],
  <%--data : [--%>
  <%-- name="article",--%>
  <%-- <c:forEach items="${table1_1}" var="table1_1">--%>
  <%-- ["${table1_1.id}"],--%>
  <%-- </c:forEach>--%>
  <%--]--%>
  }
  ],
  yAxis : [
  {
  type : 'value'
  }
  ],
  series : [
  {
  name:'video',
  type:'bar',
  data: arr1
  }
  ],
  // series : [
  // {
  // name:'article',
  // type:'bar',
  // data: arr2
  // }
  // ]
  };
  <%--var myChart = echarts.init(document.getElementById('main2'));--%>
  <%--var option2 = {--%>
  <%-- title: {--%>
  <%-- text: '最受欢迎的文章TOP10'--%>
  <%-- },--%>
  <%-- tooltip: {--%>
  <%-- },--%>
  <%-- legend: {--%>
  <%-- data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]--%>
  <%-- },--%>
  <%-- xAxis : [--%>
  <%-- {--%>
  <%-- type : 'category',--%>
  <%-- data : [--%>
  <%-- name="article",--%>
  <%-- <c:forEach items="${table1_1}" var="table1_1">--%>
  <%-- ["${table1_1.id}"],--%>
  <%-- </c:forEach>--%>
  <%-- ]--%>
  <%-- }--%>
  <%-- ],--%>
  <%-- yAxis : [--%>
  <%-- {--%>
  <%-- type : 'value'--%>
  <%-- }--%>
  <%-- ],--%>
  <%-- series : [--%>
  <%-- {--%>
  <%-- name:'article',--%>
  <%-- type:'bar',--%>
  <%-- data: arr2--%>
  <%-- }--%>
  <%-- ]--%>
  <%--};--%>
  myChart.setOption(option1);
  myChart.setOption(option2);
  // myChart.on('click', function (params) {
  // var url = "HiveServlet?method=searchTime&day_id=" + params.name;
  // window.location.href = url;
  // });
   
  </script>
  </body>
  </html>

  table2.jsp:

  <%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
  --%>
  <%@ page contentType="text/html;charset=UTF-8" language="java" %>
  <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
  <html>
  <head>
  <title>年度汇总</title>
  <script src="layui/jquery.min.js"></script>
  <script src="echarts/dist/echarts.js"></script>
  <style>
  *{margin:0;padding:0}
  html,body{
  width:auto;
  height:auto;
  }
  #main{
  width:1500px;
  height:700px;
  margin:auto;
  border:1px solid #ddd;
  }
  /*默认长宽比0.75*/
  </style>
  </head>
  <body>
  <%
  Object message = request.getAttribute("message");
  if(message!=null && !"".equals(message)){
   
  %>
  <script type="text/javascript">
  alert("<%=request.getAttribute("message")%>");
  </script>
  <%} %>
  <div id="main">
   
  </div>
  <script type="text/javascript">
   
  var myChart = echarts.init(document.getElementById('main'));
  var arr = new Array();
  var index = 0;
  <c:forEach items="${table2}" var="table2">
  arr[index++] = ${table2.ips};
  </c:forEach>
   
  var option = {
  title: {
  text: '城市排名'
  },
  tooltip: {
  },
  legend: {
  data:['城市排名']
  },
  xAxis : [
  {
  type : 'category',
  data : [
  <c:forEach items="${table2}" var="table2">
  ["${table2.ip}"],
  </c:forEach>
  ]
  }
  ],
  yAxis : [
  {
  type : 'value'
  }
  ],
  series : [
  {
  name:'访问次数',
  type:'bar',
  data: arr
  }
  ]
  };
  myChart.setOption(option);
  // myChart.on('click', function (params) {
  // var url = "HiveServlet?method=searchTime&day_id=" + params.name;
  // window.location.href = url;
  // });
   
  </script>
  </body>
  </html>

  table3.jsp:

  <%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
  --%>
  <%@ page contentType="text/html;charset=UTF-8" language="java" %>
  <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
  <html>
  <head>
  <title>年度汇总</title>
  <script src="layui/jquery.min.js"></script>
  <script src="echarts/dist/echarts.js"></script>
  <style>
  *{margin:0;padding:0}
  html,body{
  width:auto;
  height:auto;
  }
  #main{
  width:1500px;
  height:700px;
  margin:auto;
  border:1px solid #ddd;
  }
  /*默认长宽比0.75*/
  </style>
  </head>
  <body>
  <%
  Object message = request.getAttribute("message");
  if(message!=null && !"".equals(message)){
   
  %>
  <script type="text/javascript">
  alert("<%=request.getAttribute("message")%>");
  </script>
  <%} %>
  <div id="main">
   
  </div>
  <script type="text/javascript">
   
  var myChart = echarts.init(document.getElementById('main'));
  var arr = new Array();
  var index = 0;
  <c:forEach items="${table3}" var="table3">
  arr[index++] = ${table3.traffic};
  </c:forEach>
   
  var option = {
  title: {
  text: '排名'
  },
  tooltip: {
  },
  legend: {
  data:['排名']
  },
  xAxis : [
  {
  type : 'category',
  data : [
  <c:forEach items="${table3}" var="table3">
  ["${table3.id}"],
  </c:forEach>
  ]
  }
  ],
  yAxis : [
  {
  type : 'value'
  }
  ],
  series : [
  {
  name:'id',
  type:'bar',
  data: arr
  }
  ]
  };
  myChart.setOption(option);
  // myChart.on('click', function (params) {
  // var url = "HiveServlet?method=searchTime&day_id=" + params.name;
  // window.location.href = url;
  // });
   
  </script>
  </body>
  </html>

  运行结果如下所示:

image-20221020130628014

image-20221020130642742

image-20221020130652345

image-20221020130700952

posted @   new菜鸟  阅读(10)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)
点击右上角即可分享
微信分享提示