jvppeteer 工具
- 依赖
<dependency>
<groupId>io.github.fanyong920</groupId>
<artifactId>jvppeteer</artifactId>
<version>1.1.5</version>
</dependency>
- 工具
package org.xiaog.news.util;
import com.ruiyun.jvppeteer.core.Puppeteer;
import com.ruiyun.jvppeteer.core.browser.Browser;
import com.ruiyun.jvppeteer.core.page.Page;
import com.ruiyun.jvppeteer.options.LaunchOptions;
import com.ruiyun.jvppeteer.options.LaunchOptionsBuilder;
import com.ruiyun.jvppeteer.options.Viewport;
import com.ruiyun.jvppeteer.protocol.network.Cookie;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@Slf4j
public class ChromeUtil {
@Getter
private static Browser browser;
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
private static final String USER_DATA_DIR = "C:/ProgramData/ChromeData";
@Getter
private static String cookie;
public static void openBrowser() {
openBrowser(isWindows() ? USER_DATA_DIR : "/home/chrome/ChromeData");
}
public static void openBrowser(String userDataDir) {
try {
String exePath = isWindows() ? getChromeExePath() : "/usr/bin/google-chrome";
ArrayList<String> argList = new ArrayList<>();
Viewport viewport = new Viewport();
viewport.setWidth(1920);
viewport.setHeight(1024);
viewport.setHasTouch(true);
argList.add("--no-sandbox");
argList.add("--disable-setuid-sandbox");
argList.add("--disable-infobars");
// argList.add("--incognito"); // 无痕模式
argList.add("--disable-gpu");
argList.add("--disable-web-security");
argList.add("--disable-dev-shm-usage");
argList.add("--ignore-certificate-errors");
argList.add("--allow-running-insecure-content");
argList.add("--mute-audio");
argList.add("--start-maximized");
LaunchOptionsBuilder builder = new LaunchOptionsBuilder();
builder.withArgs(argList)
.withHeadless(!isWindows())
.withViewport(viewport)
.withIgnoreDefaultArgs(Arrays.asList("--enable-automation"))
.withIgnoreHTTPSErrors(true)
.withExecutablePath(exePath);
if (userDataDir != null) {
builder = builder.withUserDataDir(userDataDir);
}
LaunchOptions options = builder.build();
browser = Puppeteer.launch(options);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void openPureBrowser() {
openBrowser(null);
}
private static boolean isWindows() {
String osName = System.getProperty("os.name").toLowerCase();
return osName.contains("windows");
}
public static void closeBrowser() {
if (browser != null) {
try {
browser.close();
}catch (Exception e) {
e.printStackTrace();
}
}
}
public static void preHandler(Page page) {
page.setUserAgent(USER_AGENT);
page.setCacheEnabled(true);
page.evaluateOnNewDocument("() => {const newProto = navigator.__proto__;delete newProto.webdriver; navigator.__proto__ = newProto;}");
page.evaluateOnNewDocument("() => {window.chrome = {};window.chrome.app = {InstallState: 'hehe',RunningState: 'haha',getDetails: 'xixi',getIsInstalled: 'ohno',};window.chrome.csi = function () {};window.chrome.loadTimes = function () {};window.chrome.runtime = function () {};}");
page.evaluateOnNewDocument("() => {Object.defineProperty(navigator, 'userAgent', {get: () =>'"+USER_AGENT+"',});}");
page.evaluateOnNewDocument("() => {Object.defineProperties(navigator,{ webdriver:{ get: () => undefined } }) }");
page.evaluateOnNewDocument("() => {Object.defineProperty(navigator, 'plugins', {get: () => [{0: {type: 'application/x-google-chrome-pdf',suffixes: 'pdf',description: 'Portable Document Format',enabledPlugin: Plugin,},description: 'Portable Document Format',filename: 'internal-pdf-viewer',length: 1,name: 'Chrome PDF Plugin',},{0: {type: 'application/pdf',suffixes: 'pdf',description: '',enabledPlugin: Plugin,},description: '',filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai',length: 1,name: 'Chrome PDF Viewer',},{0: {type: 'application/x-nacl',suffixes: '',description: 'Native Client Executable',enabledPlugin: Plugin,},1: {type: 'application/x-pnacl'," +
"suffixes: '',description: 'Portable Native Client Executable',enabledPlugin: Plugin,},description: '',filename: 'internal-nacl-plugin',length: 2,name: 'Native Client',},],});}");
page.evaluateOnNewDocument("() => {const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) =>parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters);}");
page.evaluateOnNewDocument("() => {const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function (parameter) {if (parameter === 37445) {return 'Intel Inc.';}if (parameter === 37446) {return 'Intel(R) Iris(TM) Graphics 6100';}return getParameter(parameter);};}");
page.evaluateOnNewDocument("() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }");
}
public static String getPage(String url) {
String html = null;
Page page = null;
try {
page = browser.newPage();
preHandler(page);
page.goTo(url);
setCookie(page.cookies());
html = page.content();
}catch (Exception e) {
e.printStackTrace();
}finally {
if (page != null) {
try {
page.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return html;
}
public static String getPage(String url,long delay) {
String html = null;
Page page = null;
try {
page = browser.newPage();
preHandler(page);
page.goTo(url);
Thread.sleep(delay);
page.evaluate("() => window.stop()");
setCookie(page.cookies());
html = page.content();
}catch (Exception e) {
e.printStackTrace();
}finally {
if (page != null) {
try {
page.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return html;
}
public static void setCookie(List<Cookie> cookieList) {
cookie = cookieList.stream()
.map(x -> x.getName()+"="+x.getValue())
.collect(Collectors.joining("; "));
}
public static String getPage(String url,String waitElementSelector) {
String html = null;
Page page = null;
try {
page = browser.newPage();
preHandler(page);
page.goTo(url);
page.waitForSelector(waitElementSelector);
setCookie(page.cookies());
html = page.content();
}catch (Exception e) {
e.printStackTrace();
}finally {
if (page != null) {
try {
page.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return html;
}
public static String getPageByScroll(String url,int delay,int scrollNum,long scrollDelay) {
String html = null;
Page page = null;
try {
page = browser.newPage();
preHandler(page);
page.goTo(url);
Thread.sleep(delay);
setCookie(page.cookies());
long half = scrollDelay / 2;
for (int i = 0; i < scrollNum; i++) {
Thread.sleep(half);
page.evaluate("() => window.scrollTo({top: document.body.scrollHeight, behavior:\"smooth\"})");
Thread.sleep(half);
page.evaluate("() => window.scrollTo({top: 0, behavior:\"smooth\"})");
}
html = page.content();
}catch (Exception e) {
e.printStackTrace();
}finally {
if (page != null) {
try {
page.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return html;
}
public static String getPageByScroll(String url,int scrollNum,long scrollDelay,String waitElementSelector) {
String html = null;
Page page = null;
try {
page = browser.newPage();
preHandler(page);
page.goTo(url);
page.waitForSelector(waitElementSelector);
setCookie(page.cookies());
long half = scrollDelay / 2;
for (int i = 0; i < scrollNum; i++) {
Thread.sleep(half);
page.evaluate("() => window.scrollTo({top: document.body.scrollHeight, behavior:\"smooth\"})");
Thread.sleep(half);
page.evaluate("() => window.scrollTo({top: 0, behavior:\"smooth\"})");
}
html = page.content();
}catch (Exception e) {
e.printStackTrace();
}finally {
if (page != null) {
try {
page.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return html;
}
public static Object executeJs(String jsCode) {
try {
Page page = browser.pages().get(0);
return page.evaluate(jsCode);
}catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static String getChromeExePath() {
File exeFile = Paths.get(System.getProperty("user.home"),"\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe").toFile();
if (exeFile.exists()) {
return exeFile.getAbsolutePath();
}
return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
}
}
不积跬步无以至千里