这一周,建民哥给我们进行了抗糖测验冲刺,冲刺的内容是论文爬虫系统,

第一阶段是进行论文的增删改查以及爬虫爬取网页中的论文数据,经过了几天的努力终于完成了,代码截图如下:

 

 

论文添加:

 

 

论文修改:

 

 

论文部分查询:

 

 

爬虫部分代码:

package bai.utils;

import bai.pojo.Essay;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class pa {
    Elements elements,elements1;
    List<Essay> essays=new ArrayList<>();
    public void PaQu() throws IOException {
        System.out.println("1");
        String url="https://openaccess.thecvf.com/CVPR2020?day=2020-06-18";
        Document document = Jsoup.parse(new URL(url), 30000);
        Element element=document.getElementById("content");
        elements=element.getElementsByClass("bibref");
        elements1=element.getElementsByClass("ptitle");
        System.out.println("2");
        System.out.println(elements.text());

    }
    public List<Essay> dispose(){
        int a=0;
        for (Element el:elements){
            Essay essay=new Essay();
            String attr=el.text();
            String[] res=attr.split("=");
            for(int j=0;j<=5;j++){
                res[j]=res[j].trim();
            }
            int idx1=res[1].lastIndexOf("}");
            essay.setAuthor(res[1].substring(1,idx1));
            int idx2=res[2].lastIndexOf("}");
            essay.setTitle(res[2].substring(1,idx2));
            int idx3=res[3].lastIndexOf("}");
            essay.setBookTitle(res[3].substring(1,idx3));
            int idx4=res[4].lastIndexOf("}");
            essay.setDate(res[4].substring(1,idx4));
            essay.setDate(essay.getDate()+(res[5].substring(1,5)));
            essays.add(essay);
        }
        for (Element el:elements1){
            if(a<essays.size()){
                String address=el.getElementsByTag("a").attr("href");
                essays.get(a).setAddress(address);
            }
            a++;
        }
        return essays;
    }
}