热词统计
DAO.java
package dao; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import entity.Cvf; import utils.DBUtil; public class dao { //添加数据入库 public boolean add(Cvf cvf) { String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)"; Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()}; return DBUtil.executeUpdate(sql, obj); } //查询数据 public List<Cvf> Query() { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr " ; Object [] params= {}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); String ckeyword=rs.getString("ckeyword"); cvf=new Cvf(Id,cname,chref,cabstract,ckeyword); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } //查询数据 public List<Cvf> Query(String key) { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr where ckeyword=? " ; Object [] params= {key}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); cvf=new Cvf(Id,cname,chref,cabstract,key); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } }
Cvf.java
package dao; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import entity.Cvf; import utils.DBUtil; public class dao { //添加数据入库 public boolean add(Cvf cvf) { String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)"; Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()}; return DBUtil.executeUpdate(sql, obj); } //查询数据 public List<Cvf> Query() { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr " ; Object [] params= {}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); String ckeyword=rs.getString("ckeyword"); cvf=new Cvf(Id,cname,chref,cabstract,ckeyword); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } //查询数据 public List<Cvf> Query(String key) { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr where ckeyword=? " ; Object [] params= {key}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); cvf=new Cvf(Id,cname,chref,cabstract,key); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } }
ListServlel.java
package servlet; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import dao.dao; import entity.Cvf; public class ListServlet extends HttpServlet { protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { /** * 这里是设置编码集,以避免出现乱码问题 */ request.setCharacterEncoding("utf-8"); String key= request.getParameter("keyword"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html;charset=utf-8"); dao dao=new dao(); List<Cvf> cvfs=dao.Query(key); System.out.println(cvfs); request.setAttribute("cvfs",cvfs); request.getRequestDispatcher("list.jsp").forward(request, response); } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
QueryServlet.java
package servlet; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import dao.dao; import entity.Cvf; import utils.Jsouputil; public class QueryServlet extends HttpServlet { protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { /** * 这里是设置编码集,以避免出现乱码问题 */ request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html;charset=utf-8"); /** * 这是爬取数据 */ // Jsouputil jsouptil=new Jsouputil(); // try { // Jsouputil.testSelector(); // } catch (Exception e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } dao dao=new dao(); List<Cvf> cvfs=dao.Query(); System.out.println(cvfs); request.setAttribute("cvfs",cvfs); request.getRequestDispatcher("show.jsp").forward(request, response); } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
DBUtil.java
package utils; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; public class DBUtil { //数据库URL和账号密码 public static String URL="jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=GB18030&useSSL=false&serverTimezone=GMT&allowPublicKeyRetrieval=true";//数据源 !!!!注意若出现加载或者连接数据库失败一般是这里出现问题 private static final String UNAME="root"; private static final String UPWD="1234"; public static PreparedStatement pstmt=null; public static ResultSet rs = null; public static Connection connection=null; //增删改 public static boolean executeUpdate(String sql,Object [] params) { boolean flag = false; try { //a.导入驱动,加载具体的驱动类 Class.forName("com.mysql.cj.jdbc.Driver"); //b.与数据库建立连接 connection = DriverManager.getConnection(URL,UNAME,UPWD); pstmt = connection.prepareStatement(sql); for(int i=0;i<params.length;i++) { pstmt.setObject(i+1, params[i]); } int count=pstmt.executeUpdate();//返回值表示,增删改几条数据 //处理结果 if(count>0) { System.out.println("操作成功!!!"); flag=true; } }catch(ClassNotFoundException e) { e.printStackTrace(); }catch(SQLException e) { e.printStackTrace(); }catch(Exception e){ e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(pstmt!=null)pstmt.close(); if(connection !=null)connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return flag; } //查 public static ResultSet executeQuery(String sql,Object [] params) { try { //a.导入驱动,加载具体的驱动类 Class.forName("com.mysql.cj.jdbc.Driver"); //b.与数据库建立连接 connection = DriverManager.getConnection(URL,UNAME,UPWD); pstmt = connection.prepareStatement(sql); if(params!=null) { for(int i=0;i<params.length;i++) { pstmt.setObject(i+1, params[i]); } } rs = pstmt.executeQuery(); return rs; }catch(ClassNotFoundException e) { e.printStackTrace(); return null; }catch(SQLException e) { e.printStackTrace(); return null; }catch(Exception e){ e.printStackTrace(); return null; } } }
HttpclientPool.java
package utils; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.NameValuePair; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import com.alibaba.fastjson.JSONObject; public class HttpClientPool { /** * 这是httpClient连接池 * @throws Exception */ public static void HttpClientPool() { //创建连接池管理器 PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager(); //设置最大连接数 cm.setMaxTotal(100); //设置每个主机的最大连接数 cm.setDefaultMaxPerRoute(10); //使用连接池管理器发起请求 // doGet(cm); // doPost(cm); } public static String doPost(PoolingHttpClientConnectionManager cm) throws Exception { //从连接池中获取httpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //2、输入网址,发起请求,创建httpPost对象 HttpPost httpPost= new HttpPost("http://openaccess.thecvf.com/CVPR2019.py#"); System.out.println("发起请求的信息:"+httpPost); //Post使用,声明List集合,封装表单中的参数 List<NameValuePair> params= new ArrayList<NameValuePair>(); params.add(new BasicNameValuePair("","")); //创建表单的Entity对象,第一个参数是封装好的参数,第二个是编码 UrlEncodedFormEntity formEntity= new UrlEncodedFormEntity(params,"utf8"); //设置表单的Entity对象到Post请求中 httpPost.setEntity(formEntity); //配置请求信息 RequestConfig config = RequestConfig.custom().setConnectTimeout(10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(50000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(1000*1000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpPost.setConfig(config); CloseableHttpResponse response=null; String content=null; try { //3、按回车,发起请求,返回响应,使用httpClient对象发起请求 response = httpClient.execute(httpPost); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200) { HttpEntity httpEntity = response.getEntity(); if(httpEntity!=null) { content = EntityUtils.toString(httpEntity, "utf8"); System.out.println(content.length()); // System.out.println(content); } }else { System.out.println("请求失败"+response); } }catch(Exception e) { e.printStackTrace(); }finally { try { //关闭response if(response!=null) { //关闭response response.close(); } //不关闭httpClient //httpClient.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return content; } public static String doGet(PoolingHttpClientConnectionManager cm) throws Exception { //从连接池中获取httpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //创建URIBuilder URIBuilder uribuilder= new URIBuilder("http://openaccess.thecvf.com/CVPR2019.py#"); //设置参数:参数名+参数值,可设置多个 //2、输入网址,发起请求,创建httpGet对象 HttpGet httpGet= new HttpGet(uribuilder.build()); System.out.println("发起请求的信息:"+httpGet); //配置请求信息 RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpGet.setConfig(config); CloseableHttpResponse response=null; String content=null; try { //3、按回车,发起请求,返回响应,使用httpClient对象发起请求 response = httpClient.execute(httpGet); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200) { HttpEntity httpEntity = response.getEntity(); if(httpEntity!=null) { content = EntityUtils.toString(httpEntity, "utf8"); // System.out.println(content.length()); // System.out.println(content); } } }catch(Exception e) { e.printStackTrace(); }finally { try { if(response!=null) { //关闭response response.close(); } //不能关闭httpClient //httpClient.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return content; } }
Jsouputil.java
package utils; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Set; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.jsoup.Jsoup; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import dao.dao; import entity.Cvf; /** * 这是使用Jsoup解析 */ public class Jsouputil { /** * 使用Selector选择器获取元素 */ public static void testSelector()throws Exception{ //获取Document对象 HttpClientPool httpClientPool =new HttpClientPool(); //创建连接池管理器 PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager(); //获取网页HTML字符串 String content=httpClientPool.doGet(cm); //解析字符串 Document doc = Jsoup.parse(content); // System.out.println(doc.toString()); //[attr=value],利用属性获取 Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]"); System.out.println(elements.toString()); Cvf cvf=new Cvf(); dao dao=new dao(); if(elements!=null) { for(Element ele:elements) { String href="http://openaccess.thecvf.com/"; String cname=ele.select("a").text(); System.out.println(cname); String href2=ele.select("a").attr("href"); String chref=href.concat(href2); System.out.println(chref); String cabstract =null; String ckeyword =null; //获取title的内容 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //创建URIBuilder URIBuilder uribuilder= new URIBuilder(chref); HttpGet httpGet= new HttpGet(uribuilder.build()); RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpGet.setConfig(config); CloseableHttpResponse response=null; response = httpClient.execute(httpGet); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) { Document document = Jsoup.parse(new URL(chref), 100000); cabstract = document.select("div[id=abstract]").text(); System.out.println("已获取摘要"); String[] strs = strTostrArray(cname+cabstract); ckeyword=keyword(strs); } else { System.out.println(response.getStatusLine().getStatusCode()); cabstract =null; ckeyword=null; } if(response!=null) { //关闭response response.close(); } cvf=new Cvf(cname,chref,cabstract,ckeyword); dao.add(cvf); } } } public static String[] strTostrArray(String str) { /* * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符 */ str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写 String regex = "[\\W]+";// 非字母的正则表达式 --\W:表示任意一个非单词字符 str = str.replaceAll(regex, " "); String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组 return strs; } public static String keyword(String[] strs) { /* * 建立字符串(String)出现次数(Integer)的映射 */ HashMap<String, Integer> strhash = new HashMap<String, Integer>(); Integer in = null;// 用于存放put操作的返回值 for (String s : strs) {// 遍历数组 strs in = strhash.put(s, 1); if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数 strhash.put(s, in + 1); } } Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet(); String maxStr = null;// 用于存放出现最多的单词 int maxValue = 0;// 用于存放出现最多的次数 for (java.util.Map.Entry<String, Integer> e : entrySet) { String key = e.getKey(); Integer value = e.getValue(); if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) { value=0; } if (value > maxValue) { maxValue = value;// 这里有自动拆装箱 maxStr = key; } } System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次"); return maxStr; } }