博主首页

爬取网站记录,爬取动态生成网页,java爬取动态生成网页

        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.13.1</version>
        </dependency>
    public static void main(String[] args) throws Exception {
        Document document = Jsoup.connect("https://www.xbiquge.la/29/29770/14341237.html").get();
        Element body = document.body();
        String s = body.toString();
        s=s.substring(s.indexOf("<div id=\"content\">"));
        s=s.substring(0,s.indexOf("read4()"))
                .replaceAll("&nbsp;","")
                .replaceAll("<br>","")
                .replaceAll("\n","")
                .replaceAll(" ","");
        while (s.length()>=51){
            System.out.println(s.substring(0,50));
            s=s.substring(50);
            System.out.println();
        }
        System.out.println(s);
    }

 

posted @ 2021-07-05 17:22  笑~笑  阅读(117)  评论(1编辑  收藏  举报