Jsoup实现下载layui离线文档

 在银行开发项目的时候,不允许连接外网,每次都得一顿很复杂的操作才能去官方网站看文档,想找个离线本地版的没有找到,还是自己动手爬到本地靠谱点;

 1 package com.bcc.customer.utils;
 2 
 3 import java.io.File;
 4 import java.io.IOException;
 5 
 6 import org.easitline.common.utils.kit.FileKit;
 7 import org.jsoup.Connection;
 8 import org.jsoup.Jsoup;
 9 import org.jsoup.nodes.Document;
10 import org.jsoup.nodes.Element;
11 import org.jsoup.select.Elements;
12 
13 public class Download {
14 
15     public static void loadPage(String url){
16         Connection connection=Jsoup.connect("https://www.layui.com"+url);
17         try {
18             String[] u=url.split("/");
19             String name=u[u.length-1];
20             String f=u[u.length-2];
21             connection.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");
22             connection.header("referer","https://www.layui.com/doc/base/infrastructure.html");
23             connection.cookie("cookie", "Hm_lvt_d214947968792b839fd669a4decaaffc=1567350006,1567437784,1567518319,1567518344; Hm_lpvt_d214947968792b839fd669a4decaaffc=1567518950");
24             Document document = connection.get();
25             String title=document.selectFirst("title").text();
26             String html=document.html();
27             html=html.replaceAll("<script async src=\"https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>","");
28             html=html.replaceAll("s.parentNode.insertBefore(hm, s);","");
29             html=html.replaceAll("//res.layui.com","/layui-doc");
30             html=html.replaceAll("//res.layui.com/staitc","/layui-doc/static");
31             html=html.replaceAll("/doc","/layui-doc/html");
32             
33             String p="D:/develop/layui-doc/html/"+f;
34             File file=new File(p);
35             if(!file.exists()){
36                 file.mkdirs();
37             }
38             FileKit.saveToFile(html, p+"/"+name);
39         } catch (IOException e) {
40             e.printStackTrace();
41         }
42     }
43     public static void main(String[] args) {
44         
45         Connection connection=Jsoup.connect("https://www.layui.com/doc/base/element.html");
46         try {
47             Document document=connection.get();
48             Elements urls=document.select(".site-tree a");
49             for(Element ele:urls){
50                 String url=ele.attr("href");
51                 System.out.println(url);
52                 loadPage(url);
53             }
54             
55         } catch (IOException e) {
56             e.printStackTrace();
57         }
58     }
59 
60 }

 

 1 public static void saveToFile(InputStream in, String fileName) throws IOException {
 3         FileOutputStream out=null;
 4         try {
 5             fileName=sysPath(fileName);
 6             File outFile = new File(fileName);
 7             if (!outFile.exists()) {
 8                 outFile.createNewFile();
 9             }
10             out = new FileOutputStream(fileName);
11             byte buffer[] = new byte[1024];
12             int read = -1;
13             while ((read = in.read(buffer, 0, 1024)) != -1) {
14                 out.write(buffer, 0, read);
15             }
16             out.flush();
17             
18         } catch (IOException e) {
19             throw e;
20         } catch (Throwable e) {
21             throw new RuntimeException("文件写入错误:fileName="+fileName, e);
22         } finally {
23             if (out != null) {
24                 try {
25                     out.close();
26                 } catch (Exception e1) {
27                 }}
28             if (in != null) {
29                     try {
30                         in.close();
31                     } catch (Exception e1) {
32                     }
33             }
34         }
35     }

 

https://repo1.maven.org/maven2/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar

 

posted @ 2019-10-21 17:32  chenxiaofeng  阅读(1465)  评论(0编辑  收藏  举报