Hive第二次测试总结

  首先看一下题目:

Result文件数据说明:

Ip:106.39.41.166,(城市)

Date:10/Nov/2016:00:01:02 +0800,(日期)

Day:10,(天数)

Traffic: 54 ,(流量)

Type: video,(类型:视频video或文章article)

Id: 8701(视频或者文章的id)

测试要求:

1、 数据清洗:****按照进行数据清洗,并将清洗后的数据导入hive数据库中。

两阶段数据清洗:

(1)第一阶段:把需要的信息从原始日志中提取出来

ip: 199.30.25.88

time: 10/Nov/2016:00:01:03 +0800

traffic: 62

文章: article/11325

视频: video/3235

(2)第二阶段:根据提取出来的信息做精细化操作

ip--->城市 city(IP)

date--> time:2016-11-10 00:01:03

day: 10

traffic:62

type:article/video

id:11325

(3)hive数据库表结构:

create table data( ip string, time string , day string, traffic bigint,

type string, id string )

2、****数据分析:在HIVE统计下列数据。

(1)统计最受欢迎的视频/文章的Top10访问次数 (video/article)

(2)按照地市统计最受欢迎的Top10课程 (ip)

(3)按照流量统计最受欢迎的Top10课程 (traffic)

3、数据可视化:

将统计结果倒入MySql数据库中,通过图形化展示的方式展现出来。


  清洗数据我就不作过多赘述,首先记一下sqoop上传mysql的表到hive的操作语句:

sqoop import \
--connect jdbc:mysql://node1:3306/hive2 \
--username root \
--password 20204112 \
--table table \
--num-mappers 1 \
--hive-import \
--fields-terminated-by "\t" \
--hive-overwrite \
--hive-table hive2.result (hive2是数据库名,result是表名)

  上传好后就可以在Hive里对数据进行处理,首先是建立四个表,我分别命名为table,table1,table2,table3:

create table table (
ip string,
data string,
day string,
traffic string,
type string,
id string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table1 (
traffic string,
id string,
type string,
times string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table2 (
ip string,
id string,
type string,
traffic string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table table3 (
ip string,
id string,
type string,
ips string
)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

  然后就可以开始处理数据了,就是按照要求向表里插入处理好的数据:

insert overwrite table table1 SELECT
 SUM( traffic ) AS traffic,
 id,
 type,
 count(id) as times
FROM
 `table` 
GROUP BY
 id,type
ORDER BY
 traffic DESC ;
insert overwrite table table2 SELECT ip,id,type,sum(traffic)as traff FROM `table` GROUP BY ip,id,type ORDER BY id,sum(traffic) DESC ;
insder overwrite table table3 SELECT ip,id,type,count(ip) as ips FROM `table` GROUP BY ip,id,type ORDER BY count(ip) DESC;

  处理好数据后还需要将Hive里的表导入到mysql里,导出语句如下:

sqoop export \
--connect jdbc:mysql://node1:3306/hive2 \
--username root \
--password 20204112 \
--table table3 \
--num-mappers 1 \
--export-dir /user/hive/warehouse/hive2.db/table3 \
--input-fields-terminated-by ","

  三张表都要执行以上操作。

  接下来就是Java以及Html代码:

  Dao:

package org.example.Dao;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import org.example.DBUtils.DBUtil;
import org.example.Bean.HiveBean;

public class HiveDao {

    public List<HiveBean> table1() {
        String sql1 = "SELECT traffic,id,times FROM `table1` WHERE type = 'video' LIMIT 10";
        List<HiveBean> list = new ArrayList<>();
        Connection conn = DBUtil.getConn();
        Statement state = null;
        ResultSet rs = null;

        try {
            state = conn.createStatement();
            rs = state.executeQuery(sql1);
            HiveBean bean = null;
            while (rs.next()) {

                    String traffic = rs.getString("traffic");
                    String id = rs.getString("id");
                    String times = rs.getString("times");
                    System.out.println(traffic);
                    System.out.println(id);
                    System.out.println(times);
                    bean = new HiveBean(traffic,id,times);
                    list.add(bean);

            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            DBUtil.close(conn, state, rs);
        }


        return list;
    }

    public List<HiveBean> table1_1() {
        String sql = "SELECT traffic,id,times FROM `table1` WHERE type = 'article' LIMIT 10";
        List<HiveBean> list = new ArrayList<>();
        Connection conn = DBUtil.getConn();
        Statement state = null;
        ResultSet rs = null;

        try {
            state = conn.createStatement();
            rs = state.executeQuery(sql);
            HiveBean bean = null;
            while (rs.next()) {
                String traffic = rs.getString("traffic");
                String id = rs.getString("id");
                String times = rs.getString("times");
                System.out.println(traffic);
                System.out.println(id);
                System.out.println(times);
                bean = new HiveBean(traffic,id,times);
                list.add(bean);

            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            DBUtil.close(conn, state, rs);
        }

        return list;
    }

    public List<HiveBean> table2() {
        String sql = "SELECT ip,ips FROM `table3` LIMIT 10";
        List<HiveBean> list = new ArrayList<>();
        Connection conn = DBUtil.getConn();
        Statement state = null;
        ResultSet rs = null;

        try {
            state = conn.createStatement();
            rs = state.executeQuery(sql);
            HiveBean bean = null;
            while (rs.next()) {

                String ip = rs.getString("ip");
                String ips = rs.getString("ips");
                System.out.println(ip);
                System.out.println(ips);
                bean = new HiveBean(ip,ips);
                list.add(bean);

            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            DBUtil.close(conn, state, rs);
        }

        return list;
    }



    public List<HiveBean> table3() {
        String sql = "SELECT traffic,id,type FROM `table1` LIMIT 10";
        List<HiveBean> list = new ArrayList<>();
        Connection conn = DBUtil.getConn();
        Statement state = null;
        ResultSet rs = null;

        try {
            state = conn.createStatement();
            rs = state.executeQuery(sql);
            HiveBean bean = null;
            while (rs.next()) {

                String traffic = rs.getString("traffic");
                String id = rs.getString("id");
                String type = rs.getString("type");
                System.out.println(traffic);
                System.out.println(id);
                System.out.println(type);
                bean = new HiveBean(traffic,id,type);
                list.add(bean);

            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            DBUtil.close(conn, state, rs);
        }

        return list;
    }
}

  Bean:

package org.example.Bean;

public class HiveBean {

    private String ip;

    private String id;

    private String traffic;

    private String type;

    private String ips;

    private String times;


    public HiveBean() {
        super();
    }

    public HiveBean(String id) {
        super();
        this.id = id;
    }

    public HiveBean(String ip, String ips) {
        super();
        this.id = ip;
        this.ips = ips;
    }

    public HiveBean(String traffic, String id, String times) {
        super();
        this.traffic = traffic;
        this.id = id;
        this.times = times;
    }

    public HiveBean(String ip, String id, String type, String ips) {
        super();
        this.ip = ip;
        this.id = id;
        this.type = type;
        this.ips = ips;
    }

    public String getIp() {
        return ip;
    }

    public void setIp(String ip) {
        this.ip = ip;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getTraffic() {
        return traffic;
    }

    public void setTraffic(String traffic) {
        this.traffic = traffic;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    public String getIps() {
        return ips;
    }

    public void setIps(String ips) {
        this.ips = ips;
    }

    public String getTimes() {
        return times;
    }

    public void setTimes(String times) {
        this.times = times;
    }

    @Override
    public String toString() {
        return "HiveBean{" +
                "ip='" + ip + '\'' +
                ", id='" + id + '\'' +
                ", traffic='" + traffic + '\'' +
                ", type='" + type + '\'' +
                ", ips='" + ips + '\'' +
                ", times='" + times + '\'' +
                '}';
    }
}

  DBUtil:

package org.example.DBUtils;

import java.io.*;
import java.util.Properties;
import java.sql.*;

public class DBUtil {
    private static String driver;
    private static String url;
    private static String user;
    private static String pass;

    static {
        //读取db.properties文件的内容
        InputStream is = DBUtil.class.getClassLoader().getResourceAsStream("db.properties");
        //创建properties类型的对象
        Properties p = new Properties();
        try {
            p.load(is);
            driver = p.getProperty("driver");
            url = p.getProperty("url");
            user = p.getProperty("username");
            pass = p.getProperty("password");
            //加载数据库的驱动
            Class.forName(driver);
            System.out.println("驱动加载成功");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }


    public static Connection getConn() {

        Connection conn = null;
        try {
            //连接数据库
            conn = DriverManager.getConnection(url, user, pass);
            System.out.println("数据库连接成功");
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("数据库连接失败");
        }
        return conn;
    }

    public static void close(PreparedStatement ps, Connection conn) {
        if (ps != null) {
            try {
                ps.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public static void close(ResultSet result, PreparedStatement ps, Connection conn) {
        if (result != null) {
            try {
                result.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (ps != null) {
            try {
                ps.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }


    public static void close(Connection conn, Statement state, ResultSet result) {
        try {
            if (result != null) {
                result.close();
                result = null;
            }
            if (state != null) {
                state.close();
                state = null;
            }

            if (conn != null) {
                conn.close();
                conn = null;
            }

        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws SQLException {
        Connection conn = getConn();
        Statement state = conn.createStatement();
        ResultSet result = state.executeQuery("select id,type from table1 limit 10");
        while (result.next()) {
            System.out.print(result.getString("type") + "     ");
            System.out.print(result.getString("id"));

        }

    }

}


  Servlet:

package org.example.Servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.example.Dao.HiveDao;
import org.example.Bean.HiveBean;

@WebServlet("/HiveServlet")
public class HiveServlet extends HttpServlet {

    /**
     * 特有id号
     */
    private static final long serialVersionUID = 1L;
    HiveDao dao = new HiveDao();
    /**
     * 方法选择
     * @return
     * @throws IOException
     * @throws ServletException
     */
    protected void service(HttpServletRequest req,HttpServletResponse resp) throws ServletException, IOException
    {
        req.setCharacterEncoding("utf-8");
        String method = req.getParameter("method");
        if("table1".equals(method)) {
            table1(req,resp);
        }else if("table1_1".equals(method)) {
            table1_1(req,resp);
        }else if("table2".equals(method)) {
            table2(req,resp);
        }else if("table3".equals(method)) {
            table3(req,resp);
        }
    }
    private void table1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        // TODO Auto-generated method stub
        req.setCharacterEncoding("utf-8");
        List<HiveBean> table1 = dao.table1();
        req.setAttribute("table1", table1);
        req.getRequestDispatcher("table1.jsp").forward(req, resp);

    }

    private void table1_1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        req.setCharacterEncoding("utf-8");
        List<HiveBean> table1_1 = dao.table1_1();
        req.setAttribute("table1_1",table1_1);
        req.getRequestDispatcher("table1.jsp").forward(req, resp);
    }

    private void table2(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        req.setCharacterEncoding("utf-8");
        List<HiveBean> table2 = dao.table2();
        req.setAttribute("table2", table2);
        req.getRequestDispatcher("table2.jsp").forward(req, resp);
    }

    private void table3(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        req.setCharacterEncoding("utf-8");
        List<HiveBean> table3 = dao.table3();
        req.setAttribute("table3", table3);
        req.getRequestDispatcher("table3.jsp").forward(req, resp);
    }
}

  index.jsp:

<%@ page language="java" contentType="text/html; charset=UTF-8"
         pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>查询</title>
    <script src="layui/jquery.min.js"></script>

    <style>
        .a{
            margin-bottom: 20px;
        }
        .b{
            margin-top:45px;
            margin-bottom: 20px;
        }
        .c{
            width:300px;
            height:240px;
            background:rgb(195,195,195);
            position:absolute;
            top:30%;
            left:50%;
            /*transform:translaet(-50%,-50%);*/
        }
        .d{
            font-size:5px;
            color:black;
            font-family:"宋体";
        }
        .e{
            width:240px;
            height:30px;
            margin-left:30px;
            background-color:white;
        }
        .f{
            margin-left:30px;
            margin-top:45px;
            margin-bottom: 20px;
        }
        .g{
            margin-left:30px;
            margin-bottom: 20px;
        }
    </style>
</head>
<body>
<div class="c">
    <form action="HiveServlet?method=table1"  method="post"  onsubmit="return check()">

        <div>
            <input type="submit" class="e" value="table1">
        </div>
    </form>

    <form action="HiveServlet?method=table2"  method="post"  onsubmit="return check()">

        <div>
            <input type="submit" class="e" value="table2">
        </div>
    </form>

    <form action="HiveServlet?method=table3"  method="post"  onsubmit="return check()">

        <div>
            <input type="submit" class="e" value="table3">
        </div>
    </form>
</div>


</body>
</html>

  table1.jsp:

<%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
    <title>最受欢迎的文章TOP10</title>
    <script src="layui/jquery.min.js"></script>
    <script src="echarts/dist/echarts.js"></script>
    <style>
        *{margin:0;padding:0}
        html,body{
            width:auto;
            height:auto;
        }
        #main1{
            width:1500px;
            height:700px;
            margin:auto;
            border:1px solid #ddd;
        }
        #main2{
            width:1500px;
            height:700px;
            margin:800px;
            border:1px solid #ddd;
        }
        /*默认长宽比0.75*/
    </style>
</head>
<body>
<%
    Object message = request.getAttribute("message");
    if(message!=null && !"".equals(message)){

%>
<script type="text/javascript">
    alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main1">

</div>

<div id="main2">

</div>
<script type="text/javascript">

    var myChart = echarts.init(document.getElementById('main1'));

    var arr1 = new Array();
    var arr2 = new Array();

    var index1 = 0;
    var index2 = 0;

    <c:forEach items="${table1}" var="table1">
    arr1[index1++] = ${table1.times};
    </c:forEach>

    <c:forEach items="${table1_1}" var="table1_1">
    arr2[index2++] = ${table1_1.times};
    </c:forEach>
    var option1 = {
        title: {
            text: '最受欢迎的文章TOP10'
        },
        tooltip: {
        },
        legend: {
            data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]
        },
        xAxis : [
            {
                type : 'category',
                data :[
                    <c:forEach items="${table1}" var="table1">
                    ["${table1.id}"],
                    </c:forEach>
                ],
                <%--data : [--%>
                <%--    name="article",--%>
                <%--    <c:forEach items="${table1_1}" var="table1_1">--%>
                <%--    ["${table1_1.id}"],--%>
                <%--    </c:forEach>--%>
                <%--]--%>
            }
        ],
        yAxis : [
            {
                type : 'value'
            }
        ],
        series : [
            {
                name:'video',
                type:'bar',
                data: arr1
            }
        ],
        // series : [
        //     {
        //         name:'article',
        //         type:'bar',
        //         data: arr2
        //     }
        // ]
    };
    <%--var myChart = echarts.init(document.getElementById('main2'));--%>
    <%--var option2 = {--%>
    <%--    title: {--%>
    <%--        text: '最受欢迎的文章TOP10'--%>
    <%--    },--%>
    <%--    tooltip: {--%>
    <%--    },--%>
    <%--    legend: {--%>
    <%--        data:[{name:"video",icon:"circle"},{name:"article",icon:"circle"}]--%>
    <%--    },--%>
    <%--    xAxis : [--%>
    <%--        {--%>
    <%--            type : 'category',--%>
    <%--            data : [--%>
    <%--                name="article",--%>
    <%--                <c:forEach items="${table1_1}" var="table1_1">--%>
    <%--                ["${table1_1.id}"],--%>
    <%--                </c:forEach>--%>
    <%--            ]--%>
    <%--        }--%>
    <%--    ],--%>
    <%--    yAxis : [--%>
    <%--        {--%>
    <%--            type : 'value'--%>
    <%--        }--%>
    <%--    ],--%>
    <%--    series : [--%>
    <%--        {--%>
    <%--            name:'article',--%>
    <%--            type:'bar',--%>
    <%--            data: arr2--%>
    <%--        }--%>
    <%--    ]--%>
    <%--};--%>
    myChart.setOption(option1);
    myChart.setOption(option2);
    // myChart.on('click', function (params) {
    //     var url = "HiveServlet?method=searchTime&day_id=" + params.name;
    //     window.location.href = url;
    // });

</script>
</body>
</html>

  table2.jsp:

<%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
    <title>年度汇总</title>
    <script src="layui/jquery.min.js"></script>
    <script src="echarts/dist/echarts.js"></script>
    <style>
        *{margin:0;padding:0}
        html,body{
            width:auto;
            height:auto;
        }
        #main{
            width:1500px;
            height:700px;
            margin:auto;
            border:1px solid #ddd;
        }
        /*默认长宽比0.75*/
    </style>
</head>
<body>
<%
    Object message = request.getAttribute("message");
    if(message!=null && !"".equals(message)){

%>
<script type="text/javascript">
    alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main">

</div>
<script type="text/javascript">

    var myChart = echarts.init(document.getElementById('main'));
    var arr = new Array();
    var index = 0;
    <c:forEach items="${table2}" var="table2">
    arr[index++] = ${table2.ips};
    </c:forEach>

    var option = {
        title: {
            text: '城市排名'
        },
        tooltip: {
        },
        legend: {
            data:['城市排名']
        },
        xAxis : [
            {
                type : 'category',
                data : [
                    <c:forEach items="${table2}" var="table2">
                    ["${table2.ip}"],
                    </c:forEach>
                ]
            }
        ],
        yAxis : [
            {
                type : 'value'
            }
        ],
        series : [
            {
                name:'访问次数',
                type:'bar',
                data: arr
            }
        ]
    };
    myChart.setOption(option);
    // myChart.on('click', function (params) {
    //     var url = "HiveServlet?method=searchTime&day_id=" + params.name;
    //     window.location.href = url;
    // });

</script>
</body>
</html>

  table3.jsp:

<%--
  Created by IntelliJ IDEA.
  User: 16475
  Date: 2022/10/14
  Time: 21:08
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<html>
<head>
    <title>年度汇总</title>
    <script src="layui/jquery.min.js"></script>
    <script src="echarts/dist/echarts.js"></script>
    <style>
        *{margin:0;padding:0}
        html,body{
            width:auto;
            height:auto;
        }
        #main{
            width:1500px;
            height:700px;
            margin:auto;
            border:1px solid #ddd;
        }
        /*默认长宽比0.75*/
    </style>
</head>
<body>
<%
    Object message = request.getAttribute("message");
    if(message!=null && !"".equals(message)){

%>
<script type="text/javascript">
    alert("<%=request.getAttribute("message")%>");
</script>
<%} %>
<div id="main">

</div>
<script type="text/javascript">

    var myChart = echarts.init(document.getElementById('main'));
    var arr = new Array();
    var index = 0;
    <c:forEach items="${table3}" var="table3">
    arr[index++] = ${table3.traffic};
    </c:forEach>

    var option = {
        title: {
            text: '排名'
        },
        tooltip: {
        },
        legend: {
            data:['排名']
        },
        xAxis : [
            {
                type : 'category',
                data : [
                    <c:forEach items="${table3}" var="table3">
                    ["${table3.id}"],
                    </c:forEach>
                ]
            }
        ],
        yAxis : [
            {
                type : 'value'
            }
        ],
        series : [
            {
                name:'id',
                type:'bar',
                data: arr
            }
        ]
    };
    myChart.setOption(option);
    // myChart.on('click', function (params) {
    //     var url = "HiveServlet?method=searchTime&day_id=" + params.name;
    //     window.location.href = url;
    // });

</script>
</body>
</html>

  运行结果如下所示:

image-20221020130628014

image-20221020130642742

image-20221020130652345

image-20221020130700952

posted @ 2022-10-20 13:08  信2005-2刘海涛  阅读(95)  评论(0编辑  收藏  举报