日志数据分析和数据的可视化处理
数据分析:在HIVE统计下列数据。
(1)统计最受欢迎的视频/文章的Top10访问次数 (video/article)
(2)按照地市统计最受欢迎的Top10课程 (ip)
(3)按照流量统计最受欢迎的Top10课程 (traffic)
1、insert overwrite table data1_1 select count(*),type,id from data0 group by id order by count(*) desc limit 0,10;
表的结构大致如下:建表时记得需要修改为按逗号’ , ‘ 进行分隔读取,如下:
Create table test2(day_id varchar(30), sale_nbr varchar(30), buy_nbr varchar(30), cnt varchar(30), round varchar(30)) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES ( "separatorChar"="," ) STORED AS TEXTFILE;
2、create table if not exists data_ip2 comment "ip" as select ip, id, count(traffic) as cnt_traffic from data2 group by ip,id order by cnt_traffic desc ;
这个也是这次测试中比较麻烦的一步了,题目中要求按照地势求出最受欢迎的top10,按照每个ip相同的地方算一个地势,工作量非常大。所以后面的可视化阶段我设置了点击事件用来展示。下图展示的是所有地势id的流量出现次数。
3、 insert overwrite table data1_3 select sum(traffic),type,id from data0 group by type,id order by sum(traffic) desc limit 0,10;
表三和表二基本一致。
到这里要的数据都展示出来了,下面就要开始可视化的操作了:
表一圆柱图:
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <title>圆柱图</title> <!-- 引入刚刚下载的 ECharts 文件 --> <script src="JS/echarts.js"></script> <script src="JS/jquery-3.1.1.js"></script><!--js文件在最后都有哦--> <script src="https://cdn.jsdelivr.net/npm/echarts-gl/dist/echarts-gl.min.js"></script> </head> <body> <div id="main" class="list-main"> <!-- 为 ECharts 准备一个具备大小(宽高)的 DOM --> <div id="areaone" style="width: 1500px;height:1500px;"></div> <script type="text/javascript"> var count0 = [];//定义数组用来接受后台的数据,数组名随意啦 var type0 = []; var id0 = []; var url = "selectdata1"; <!--使用ajax动态获取数据,将获取的数据放到数组中--> $.ajax({ async: false, type: "GET", url: url, dataType: 'json', contentType: "application/json;charset=UTF-8", success: function(result){ console.log(result); for (var i = 0; i < result.length; i++) { count0.push(result[i].count0); console.log(result[i].count0); type0.push(result[i].type0); console.log(result[i].type0); id0.push(result[i].id0); console.log(result[i].id0); } } }); //下面就是官网的数据了进行一些简单的修改就可以了 var myChartone = echarts.init(document.getElementById('areaone')); // 指定图表的配置项和数据 var optionone = { color: ['#c23531','#2f4554', '#61a0a8'], title: { text: '柱状图' }, tooltip: {}, legend: { data: ['video'], orient: 'horizontal', itemGap: 40, textStyle: { color: 'blue', fontSize: '20px', fontWeight: 700 } }, xAxis: { data: id0//x轴 }, yAxis: {//y轴不用设置(2d) }, series: [{ name: 'count', type: 'bar', data: count0//数据 }] }; myChartone.setOption(optionone); </script> </div> </body> </html>
表二饼图
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <title>圆柱图</title> <!-- 引入刚刚下载的 ECharts 文件 --> <script src="JS/echarts.js"></script> <script src="JS/jquery-3.1.1.js"></script><!--js文件在最后都有哦--> <script src="https://cdn.jsdelivr.net/npm/echarts-gl/dist/echarts-gl.min.js"></script> </head> <body> <div id="main" class="list-main"> <!-- 为 ECharts 准备一个具备大小(宽高)的 DOM --> <div id="areaone1" style="width: 2000px;height:1000px;"></div> <div id="areaone" style="width: 2000px;height:1000px;"></div> <script type="text/javascript"> var ip0 = [];//定义数组用来接受后台的数据,数组名随意啦 var sumtraffic = []; var id0=[]; var counttraffic=[]; var str=new Array(); var url = "selectdata2"; <!--使用ajax动态获取数据,将获取的数据放到数组中--> $.ajax({ async: false, type: "GET", url: url, dataType: 'json', contentType: "application/json;charset=UTF-8", success: function(result){ for (var i = 0; i < result.length; i++) { ip0.push(result[i].ip0); sumtraffic.push(result[i].sumtraffic); str.push({name:result[i].sumtraffic,value:result[i].ip0}) } } }); var chartDom = document.getElementById('areaone'); var myChart = echarts.init(chartDom); var option; option = { title: { text: '地市Top10课程', left: 'center' }, tooltip: { trigger: 'item' }, legend: { orient: 'vertical', left: 'left' }, series: [ { name: 'ip', type: 'pie', radius: '50%', data:str, emphasis: { itemStyle: { shadowBlur: 10, shadowOffsetX: 0, shadowColor: 'rgba(0, 0, 0, 0.5)' } } } ] }; //饼图点击事件 myChart.on("click", pieConsole); function pieConsole(param) { name=option.series[param.seriesIndex].data[param.dataIndex].name; value=option.series[param.seriesIndex].data[param.dataIndex].value; var str1=new Array(); $.ajax({ async: false, type: "GET", url: "Servletiii", data: {name1:name,jk:"1"}, dataType: 'json', contentType: "application/json;charset=UTF-8", success: function(result){ console.log(result); for (var i = 0; i < result.length; i++) { id0.push(result[i].id0); counttraffic.push(result[i].counttraffic); str1.push({name:result[i].counttraffic,value:result[i].id0}) } } }); var chartDom = document.getElementById('areaone1'); var myChart1 = echarts.init(chartDom); var option1; option1 = { title: { text: '最受欢迎的Top10课程', left: 'center' }, tooltip: { trigger: 'item' }, legend: { orient: 'vertical', left: 'left' }, series: [ { name: 'Access From', type: 'pie', radius: '50%', data:str1, emphasis: { itemStyle: { shadowBlur: 10, shadowOffsetX: 0, shadowColor: 'rgba(0, 0, 0, 0.5)' } } } ] }; option && myChart1.setOption(option1); } option && myChart.setOption(option); </script> </div> </body> </html>
表3折线图
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <title>条形图</title> <!-- 引入刚刚下载的 ECharts 文件 --> <script src="JS/echarts.js"></script> <script src="JS/jquery-3.1.1.js"></script> <script src="https://cdn.jsdelivr.net/npm/echarts-gl/dist/echarts-gl.min.js"></script> </head> <body> <div id="main" class="list-main"> <!-- 为 ECharts 准备一个具备大小(宽高)的 DOM --> <div id="areaone1" style="width: 2000px;height:1000px;"></div> <script type="text/javascript"> var traffic0 = []; var type0 = []; var id0 = []; var url = "selectdata3"; <!--使用ajax动态获取数据,将获取的数据放到数组中--> $.ajax({ async: false, type: "GET", url: url, dataType: 'json', contentType: "application/json;charset=UTF-8", success: function(result){ console.log(result); for (var i = 0; i < result.length; i++) { traffic0.push(result[i].traffic0); type0.push(result[i].type0); id0.push(result[i].id0); console.log(result[i].traffic0) } } }); var chartDom = document.getElementById('areaone1'); var myChart = echarts.init(chartDom); var option; // prettier-ignore const data = [] for(var i=0;i<id0.length;i++) { data.push([id0[i],traffic0[i]]); // console.log(data); } const test.data1 = [] for(var i=0;i<id0.length;i++) { data1.push([traffic0[i],traffic0[i]]); // console.log(data); } const dateList = data.map(function (item) { return item[0]; }); const valueList = data1.map(function (item) { return item[1]; }); option = { // Make gradient line here visualMap: [ { show: false, type: 'continuous', seriesIndex: 0, min: 0, max: 400 }, { show: false, type: 'continuous', seriesIndex: 1, dimension: 0, min: 0, max: dateList.length - 1 } ], title: [ { left: 'center', text: '流量统计最受欢迎的Top10课程 ' }, { top: '55%', left: 'center', } ], tooltip: { trigger: 'axis' }, xAxis: [ { data: dateList }, { data: dateList, gridIndex: 1 } ], yAxis: [ {}, { gridIndex: 1 } ], grid: [ { bottom: '60%' }, { top: '60%' } ], series: [ { type: 'line', name:'访问次数', showSymbol: false, data: valueList }, { } ] }; option && myChart.setOption(option); </script> </div> </body> </html>
后台的话我就拿表二举例吧其余的基本一致:
servlet
package Servlet; import Bean.bean; import Bean.bean01; import Bean.bean02; import Bean.bean03; import Dao.select; import com.alibaba.fastjson.JSON; import javax.servlet.*; import javax.servlet.http.*; import javax.servlet.annotation.*; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; @WebServlet(name = "selectdata2", value = "/selectdata2") public class selectdata2 extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doPost(request,response); } @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub response.setContentType("text/html;charset=utf-8"); // TODO Auto-generated method stub request.setCharacterEncoding("utf-8"); ArrayList<bean02> book = new ArrayList<bean02>(); select dao=new select(); try { dao.selectdata2(book); System.out.println("1"); } catch (ClassNotFoundException e) { e.printStackTrace(); System.out.println("2"); } catch (SQLException e) { e.printStackTrace(); System.out.println("3"); } System.out.println("*****************"); String json = JSON.toJSONString(book); response.getWriter().write(json); } }
Dao方法
public void selectdata2(ArrayList<bean02> book) throws ClassNotFoundException, SQLException { Connection connection= Util.util.getConnection(); String sql1="SELECT ip0,sumtraffic FROM data2 ORDER BY CAST(sumtraffic AS UNSIGNED) DESC limit 10"; // String sql1="SELECT * FROM text3_1 ORDER BY RAND() LIMIT 100"; PreparedStatement pre=null; PreparedStatement ps=connection.prepareStatement(sql1); ResultSet rs = ps.executeQuery();//获取结果集 while(rs.next()) { bean02 A=new bean02(); A.setSumtraffic(rs.getString(1)); A.setIp0(rs.getString(2)); // A.setRound(rs.getString(4)); book.add(A); } }