本次实验为结对实验,我和蒲煜凡完成整体实验,实验过程分工为,我进行数据爬取,蒲煜凡进行热词云构建。实验过程较为顺利。

我所负责部分遇到问题,爬取出现数据库相关问题,经分析,是爬取的数据过长,而数据库设计的长度不足。调整数据库字段长度后,问题解决。

代码如下

Yiqing

package com.jdbc.bean;

 

public class Yiqing {

private String title;

 

public Yiqing(String title) {

super();

this.title = title;

}

public String getTitle() {

return title;

}

 

public void setTitle(String title) {

this.title = title;

}

 

}

yiqidao

package com.jdbc.dao;

 

import java.sql.Connection;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.List;

 

import com.jdbc.bean.Yiqing;

import com.jdbc.util.BaseConnection;

 

public class yiqidao {

 

public static List<Yiqing> find2 ()

{

List<Yiqing> list = new ArrayList<Yiqing>();

Connection conn = BaseConnection.getConnection();

Statement statement = null;

String sql = "SELECT * FROM lunwen ";  

        ResultSet rs = null;

        try {

         statement = conn.createStatement();

         rs = statement.executeQuery(sql);

         Yiqing yiqing = null;

         while(rs.next())

         {

         String title = rs.getString(4);

         yiqing = new Yiqing(title);

         list.add(yiqing);

         } 

        }catch (SQLException e) {

         e.printStackTrace();

         }finally

     {

     BaseConnection.close(rs, statement, conn);

     }

     return list;

        }

}

yiqingServlet

package com.jdbc.servlet;

 

import java.io.IOException;

import java.util.List;

 

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

 

import com.jdbc.bean.Yiqing;

import com.jdbc.dao.yiqidao;

 

 

@WebServlet("/yiqingServlet")

public class yiqingServlet extends HttpServlet {

private static final long serialVersionUID = 1L;

 

    public  yiqingServlet() {

        super();

    }

protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

response.setContentType("text/html;charset=UTF-8");

request.setCharacterEncoding("UTF-8");

 

String method = request.getParameter("method");

//System.out.print(method);

if(method.equals("pc"))

{

add(request,response);

}

}

private void add(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

response.setContentType("text/html;charset=UTF-8");

request.setCharacterEncoding("UTF-8");

List<Yiqing> list =  yiqidao.find2();

System.out.println(list);

request.setAttribute("list", list);

request.getRequestDispatcher("ciyuntu.jsp").forward(request,response);

}

protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

// TODO Auto-generated method stub

doGet(request, response);

 

}

}

BaseConnection

package com.jdbc.util;

 

 

import java.sql.Connection;

import java.sql.DriverManager;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

 

public class BaseConnection {

 

 public static Connection getConnection(){

      Connection conn=null;

      String driver = "com.mysql.cj.jdbc.Driver";

      String url = "jdbc:mysql://localhost:3306/mydate?serverTimezone=UTC&characterEncoding=utf8&useSSL=true";

      String user = "root";

      String password = "123456";

      try{

      Class.forName(driver);

      conn=DriverManager.   

      getConnection(url,user,password);

      }catch(Exception e){

      e.printStackTrace();

      }

      return conn;

     }

 

 public static void close (Statement state, Connection conn) {

if (state != null) {

try {

state.close();

} catch (SQLException e) {

e.printStackTrace();

}

}

 

if (conn != null) {

try {

conn.close();

} catch (SQLException e) {

e.printStackTrace();

}

}

}

 

public static void close (ResultSet rs, Statement state, Connection conn) {

if (rs != null) {

try {

rs.close();

} catch (SQLException e) {

e.printStackTrace();

}

}

 

if (state != null) {

try {

state.close();

} catch (SQLException e) {

e.printStackTrace();

}

}

 

if (conn != null) {

try {

conn.close();

} catch (SQLException e) {

e.printStackTrace();

}

}

}

}

Lwpq:

import requests

import pymysql

from bs4 import BeautifulSoup

 

db = pymysql.connect('127.0.0.1',

                     port=3306,

                     user='root',

                     password='mysjz',

                     db='qsly',

                     charset='utf8')

 

cursor = db.cursor()

 

headers={

        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"

        }

url="http://openaccess.thecvf.com/CVPR2019.py"

html=requests.get(url)

 

soup=BeautifulSoup(html.content,'html.parser')

 

soup.a.contents=='pdf'

 

pdfs=soup.findAll(name="a",text="pdf")

 

lis = []

jianjie=""

for i,pdf in enumerate(pdfs):

    pdf_name=pdf["href"].split('/')[-1]

    name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")

    link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"

    url1=link

    html1 = requests.get(url1)

    soup1 = BeautifulSoup(html1.content, 'html.parser')

    weizhi = soup1.find('div', attrs={'id':'abstract'})

    if weizhi:

        jianjie =weizhi.get_text();

    print("ok")

    info = {}

    info['title'] = name

    info['link'] =link

    info['abstract']=jianjie

 

    lis.append(info)

 

cursor = db.cursor()

for i in range(len(lis)):

    cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())

    print(cols)  # '`name`, `age`'

 

    val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())

    print(val_cols)  # '%(name)s, %(age)s'

 

    sql = "insert into lunwen(%s) values(%s)"

    res_sql = sql % (cols, val_cols)

    print(res_sql)

 

    cursor.execute(res_sql, lis[i])  # 将字典a传入

    db.commit()

    print("ok")

 

 

 实验结果如下:

数据库界面

 

 

web界面