Blueherb In solitude, where we are least alone

4.17号自学成果

热词统计cvpr2019

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />
<script src="jquery-3.4.1.js" type="text/javascript"></script>
<script type="text/javascript" src="echarts.js"></script>
<script type="text/javascript" src="js/china.js"></script>
<script src="js/bootstrap.min.js" type="text/javascript"></script>
<script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
<script src='js/echarts-wordcloud.js'></script>
</head>
<body>
<div id="main" style="width: 100%;height: 400px"></div>
<div>
  <table class="table" style="width: 100%;align-content: center;" >
    <tr>
      <th align="center">论文连接</th>
    </tr>
    <c:forEach var="item" items="${list}">
      <tr>
        <td><a href="${item.lianjie }">${item.title}</a></td>
      </tr>
    </c:forEach>
  </table>
</div>
<script>
  var chart = echarts.init(document.getElementById('main'));
  var dt;
  $.ajax({
    url : "PaperServlet_",
    async : false,
    type : "POST",
    success : function(data) {
      dt = data;
     // alert(dt[0].title);
    },
    error : function() {
      alert("请求失败");
    },
    dataType : "json"
  });
  var mydata = new Array(0);
  for (var i = 0; i < dt.length; i++) {
      var d = {};
      
      d["name"] = dt[i].name;
      //alert(dt[i].name);
      d["value"] = dt[i].value;
      mydata.push(d);
  }
  var option = {
    tooltip: {},
    series: [ {
      type: 'wordCloud',
      gridSize: 2,
      sizeRange: [20, 50],
      rotationRange: [-90, 90],
      shape: 'pentagon',
      width: 600,
      height: 300,
      drawOutOfBound: true,
      textStyle: {
        normal: {
          color: function () {
            return 'rgb(' + [
              Math.round(Math.random() * 160),
              Math.round(Math.random() * 160),
              Math.round(Math.random() * 160)
            ].join(',') + ')';
          }
        },
        emphasis: {
          shadowBlur: 10,
          shadowColor: '#333'
        }
      },
      data: mydata
    } ]
  };

  chart.setOption(option);
  chart.on('click', function (params) {
      var url = "ClickServlet?geunjian=" + params.name;
      window.location.href = url;
    });
  window.onresize = chart.resize;
</script>
</body>
</html>
package utils;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Set;
import java.util.HashMap;  
import java.util.Iterator;  
import java.util.Map;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import dao.dao;
import entity.Cvf;



/**
 * 这是使用Jsoup解析
 * @author 张志伟
 *
 */
public class Jsouputil {

    /**
     * 使用Selector选择器获取元素
     */
    public static void testSelector()throws Exception{
        //获取Document对象
        HttpClientPool httpClientPool =new HttpClientPool();
        //创建连接池管理器
        PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
        //获取网页HTML字符串
        String content=httpClientPool.doGet(cm);
                        
        //解析字符串
        Document doc = Jsoup.parse(content);
//        System.out.println(doc.toString());
    
        //[attr=value],利用属性获取
        Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]");
        System.out.println(elements.toString());
        
        Cvf cvf=new Cvf();
        dao dao=new dao();
        if(elements!=null) 
        {
        for(Element ele:elements)
        {
            String href="http://openaccess.thecvf.com/";
            String cname=ele.select("a").text();
            System.out.println(cname);
            String href2=ele.select("a").attr("href");
            String chref=href.concat(href2);
            System.out.println(chref);
            String cabstract =null;
            String ckeyword  =null;
                //获取title的内容
            CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
                //创建URIBuilder
                  URIBuilder uribuilder= new URIBuilder(chref);
                  HttpGet httpGet= new HttpGet(uribuilder.build());
                  RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
                          .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
                          .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
                          .build();
                          //给请求设置请求信息
                          httpGet.setConfig(config);
                          CloseableHttpResponse response=null;
                          response = httpClient.execute(httpGet);
                  //解析响应,获取数据
                  //判断状态码是否为两百
                  if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) {
                      Document document = Jsoup.parse(new URL(chref), 100000);
                      cabstract = document.select("div[id=abstract]").text();
                      System.out.println("已获取摘要");
                    String[] strs = strTostrArray(cname+cabstract);
                      
                     ckeyword=keyword(strs);
                     
                  }
                  else {
                      System.out.println(response.getStatusLine().getStatusCode());
                      cabstract =null;
                      ckeyword=null;
                  }
                  if(response!=null) {
                      //关闭response 
                      response.close();
                  }
                  
            cvf=new Cvf(cname,chref,cabstract,ckeyword);
            dao.add(cvf);

        }
        }
        
    }
    public static String[] strTostrArray(String str) {
          /*
           * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符
           */
          str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写
          String regex = "[\\W]+";// 非字母的正则表达式 --\W:表示任意一个非单词字符
          str = str.replaceAll(regex, " ");
          String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组
          return strs;
         }
         public static String keyword(String[] strs) {
          /*
           * 建立字符串(String)出现次数(Integer)的映射
           */
          HashMap<String, Integer> strhash = new HashMap<String, Integer>();
          Integer in = null;// 用于存放put操作的返回值
          for (String s : strs) {// 遍历数组 strs

                  in = strhash.put(s, 1);
                  if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数
                      strhash.put(s, in + 1);
                  }

          }
          Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet();
          String maxStr = null;// 用于存放出现最多的单词
          int maxValue = 0;// 用于存放出现最多的次数
          for (java.util.Map.Entry<String, Integer> e : entrySet) {
           String key = e.getKey();
           Integer value = e.getValue();
           if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) {
               value=0;
           }
           if (value > maxValue) {
            maxValue = value;// 这里有自动拆装箱
            maxStr = key;
           }
          }
          System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "");
          return maxStr;
         }
}

 

posted @ 2020-04-17 16:31  帅气的小土豆  阅读(109)  评论(0编辑  收藏  举报