jvppeteer 工具

  1. 依赖
<dependency>
  <groupId>io.github.fanyong920</groupId>
  <artifactId>jvppeteer</artifactId>
  <version>1.1.5</version>
</dependency>
  1. 工具

package org.xiaog.news.util;

import com.ruiyun.jvppeteer.core.Puppeteer;
import com.ruiyun.jvppeteer.core.browser.Browser;
import com.ruiyun.jvppeteer.core.page.Page;
import com.ruiyun.jvppeteer.options.LaunchOptions;
import com.ruiyun.jvppeteer.options.LaunchOptionsBuilder;
import com.ruiyun.jvppeteer.options.Viewport;
import com.ruiyun.jvppeteer.protocol.network.Cookie;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

@Slf4j
public class ChromeUtil {

    @Getter
    private static Browser browser;

    private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";

    private static final String USER_DATA_DIR = "C:/ProgramData/ChromeData";

    @Getter
    private static String cookie;

    public static void openBrowser() {
        openBrowser(isWindows() ? USER_DATA_DIR : "/home/chrome/ChromeData");
    }

    public static void openBrowser(String userDataDir) {
        try {
            String exePath = isWindows() ? getChromeExePath() : "/usr/bin/google-chrome";
            ArrayList<String> argList = new ArrayList<>();
            Viewport viewport = new Viewport();
            viewport.setWidth(1920);
            viewport.setHeight(1024);
            viewport.setHasTouch(true);

            argList.add("--no-sandbox");
            argList.add("--disable-setuid-sandbox");
            argList.add("--disable-infobars");
//            argList.add("--incognito"); // 无痕模式
            argList.add("--disable-gpu");
            argList.add("--disable-web-security");
            argList.add("--disable-dev-shm-usage");
            argList.add("--ignore-certificate-errors");
            argList.add("--allow-running-insecure-content");
            argList.add("--mute-audio");
            argList.add("--start-maximized");


            LaunchOptionsBuilder builder = new LaunchOptionsBuilder();
            builder.withArgs(argList)
                    .withHeadless(!isWindows())
                    .withViewport(viewport)
                    .withIgnoreDefaultArgs(Arrays.asList("--enable-automation"))
                    .withIgnoreHTTPSErrors(true)
                    .withExecutablePath(exePath);
            if (userDataDir != null) {
                builder = builder.withUserDataDir(userDataDir);
            }
            LaunchOptions options = builder.build();
            browser = Puppeteer.launch(options);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void openPureBrowser() {
        openBrowser(null);
    }

    private static boolean isWindows() {
        String osName = System.getProperty("os.name").toLowerCase();
        return osName.contains("windows");
    }

    public static void closeBrowser() {
        if (browser != null) {
            try {
                browser.close();
            }catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public static void preHandler(Page page) {
        page.setUserAgent(USER_AGENT);
        page.setCacheEnabled(true);
        page.evaluateOnNewDocument("() => {const newProto = navigator.__proto__;delete newProto.webdriver; navigator.__proto__ = newProto;}");
        page.evaluateOnNewDocument("() => {window.chrome = {};window.chrome.app = {InstallState: 'hehe',RunningState: 'haha',getDetails: 'xixi',getIsInstalled: 'ohno',};window.chrome.csi = function () {};window.chrome.loadTimes = function () {};window.chrome.runtime = function () {};}");
        page.evaluateOnNewDocument("() => {Object.defineProperty(navigator, 'userAgent', {get: () =>'"+USER_AGENT+"',});}");
        page.evaluateOnNewDocument("() => {Object.defineProperties(navigator,{ webdriver:{ get: () => undefined } }) }");
        page.evaluateOnNewDocument("() => {Object.defineProperty(navigator, 'plugins', {get: () => [{0: {type: 'application/x-google-chrome-pdf',suffixes: 'pdf',description: 'Portable Document Format',enabledPlugin: Plugin,},description: 'Portable Document Format',filename: 'internal-pdf-viewer',length: 1,name: 'Chrome PDF Plugin',},{0: {type: 'application/pdf',suffixes: 'pdf',description: '',enabledPlugin: Plugin,},description: '',filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai',length: 1,name: 'Chrome PDF Viewer',},{0: {type: 'application/x-nacl',suffixes: '',description: 'Native Client Executable',enabledPlugin: Plugin,},1: {type: 'application/x-pnacl'," +
                "suffixes: '',description: 'Portable Native Client Executable',enabledPlugin: Plugin,},description: '',filename: 'internal-nacl-plugin',length: 2,name: 'Native Client',},],});}");
        page.evaluateOnNewDocument("() => {const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) =>parameters.name === 'notifications'  ?  Promise.resolve({ state: Notification.permission })   :  originalQuery(parameters);}");
        page.evaluateOnNewDocument("() => {const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function (parameter) {if (parameter === 37445) {return 'Intel Inc.';}if (parameter === 37446) {return 'Intel(R) Iris(TM) Graphics 6100';}return getParameter(parameter);};}");
        page.evaluateOnNewDocument("() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }");
    }

    public static String getPage(String url) {
        String html = null;
        Page page = null;
        try {
            page = browser.newPage();
            preHandler(page);
            page.goTo(url);
            setCookie(page.cookies());
            html = page.content();
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (page != null) {
                try {
                    page.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        return html;
    }

    public static String getPage(String url,long delay) {
        String html = null;
        Page page = null;
        try {
            page = browser.newPage();
            preHandler(page);
            page.goTo(url);
            Thread.sleep(delay);
            page.evaluate("() => window.stop()");
            setCookie(page.cookies());
            html = page.content();
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (page != null) {
                try {
                    page.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        return html;
    }

    public static void setCookie(List<Cookie> cookieList) {
        cookie = cookieList.stream()
                .map(x -> x.getName()+"="+x.getValue())
                .collect(Collectors.joining("; "));
    }

    public static String getPage(String url,String waitElementSelector) {
        String html = null;
        Page page = null;
        try {
            page = browser.newPage();
            preHandler(page);
            page.goTo(url);
            page.waitForSelector(waitElementSelector);
            setCookie(page.cookies());
            html = page.content();
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (page != null) {
                try {
                    page.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        return html;
    }

    public static String getPageByScroll(String url,int delay,int scrollNum,long scrollDelay) {
        String html = null;
        Page page = null;
        try {
            page = browser.newPage();
            preHandler(page);
            page.goTo(url);
            Thread.sleep(delay);
            setCookie(page.cookies());
            long half = scrollDelay / 2;
            for (int i = 0; i < scrollNum; i++) {
                Thread.sleep(half);
                page.evaluate("() => window.scrollTo({top: document.body.scrollHeight, behavior:\"smooth\"})");
                Thread.sleep(half);
                page.evaluate("() => window.scrollTo({top: 0, behavior:\"smooth\"})");
            }
            html = page.content();
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (page != null) {
                try {
                    page.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        return html;
    }

    public static String getPageByScroll(String url,int scrollNum,long scrollDelay,String waitElementSelector) {
        String html = null;
        Page page = null;
        try {
            page = browser.newPage();
            preHandler(page);
            page.goTo(url);
            page.waitForSelector(waitElementSelector);
            setCookie(page.cookies());
            long half = scrollDelay / 2;
            for (int i = 0; i < scrollNum; i++) {
                Thread.sleep(half);
                page.evaluate("() => window.scrollTo({top: document.body.scrollHeight, behavior:\"smooth\"})");
                Thread.sleep(half);
                page.evaluate("() => window.scrollTo({top: 0, behavior:\"smooth\"})");
            }
            html = page.content();
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (page != null) {
                try {
                    page.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        return html;
    }

    public static Object executeJs(String jsCode) {
        try {
            Page page = browser.pages().get(0);
            return page.evaluate(jsCode);
        }catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    private static String getChromeExePath() {
        File exeFile = Paths.get(System.getProperty("user.home"),"\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe").toFile();
        if (exeFile.exists()) {
            return exeFile.getAbsolutePath();
        }
        return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
    }
}

posted @ 2024-04-28 15:40  小小爬虫  阅读(105)  评论(0编辑  收藏  举报