首图 首都图书馆 国家图书馆 自动查借阅书籍 脚本puppeteer汇总

(15条消息) puppeteer_彭争杰的博客-CSDN博客

api: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md

examples: https://github.com/puppeteer/puppeteer/tree/main/examples

 阮一峰:js 异步操作 - JavaScript 教程 - 网道 (wangdoc.com) 

      Promise 对象 - ECMAScript 6入门 (ruanyifeng.com)

 Node: Puppeteer + 图像识别 实现百度指数爬虫 - 掘金 (juejin.cn)

Puppeteer 用来做爬虫太 Low 了!但用在这里很合适! - 掘金 (juejin.cn)

使用puppeteer控制浏览器中视频播放 - 掘金 (juejin.cn)

奶奶都能轻松入门的 Puppeteer 教程 - 掘金 (juejin.cn)

 

cnpm install puppeteer-core   默认不自带浏览器

cnpm install puppeteer -S

国家图书馆 借书查询 国图

const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
executablePath: "C:\\Program Files\\Google\\Chrome Dev\\Application\\chrome.exe",//process.env.CHROME_PATH, // Path to chromium build with autofill domain
headless: true,
args: ['--start-maximized'],
defaultViewport: { width: 1800, height: 1000 },
slowMo: 0
});
async function visitNationalLibrary(card, name) {
const page = await browser.newPage();
// 添加控制台监听器
//page.on('console', msg => console.log('- [Browser Console Log]', msg.text()));
const session = await page.target().createCDPSession();
console.log('---------------------------')
console.log(card + ' ' + name)
await page.goto("http://opac.nlc.cn/F", {
waitUntil: 'load', // Remove the timeout
timeout: 0
});
try {
await page.waitForXPath("/html/body/form/center/div/table/tbody/tr/td[2]/input")
await page.type("input[name=bor_id]", card, { delay: 100 })
await page.type("input[name=bor_verification]", secret)
await page.$eval('form[name=form1', form => form.submit());
// await page.waitForTimeout(500)
await page.waitForXPath('//*[@id="history"]/a[1]/table/tbody/tr[1]/td[2]/a')
// 检查元素的值并决定是否点击
const shouldClick = await page.evaluate(() => {
const element = document.querySelector('#history > a:nth-child(1) > table > tbody > tr:nth-child(1) > td.td1 > a');
if (element) {
const text = element.textContent.trim();
//console.log("you have books?", parseInt(text, 10)); // 这行仍然会输出到浏览器控制台
return { shouldClick: parseInt(text, 10) > 0, text: text };
}
return { shouldClick: false, text: null };
});
if (!shouldClick.shouldClick) {
console.log("No Book."); // 这行会输出到 Node.js 控制台
return [];
}
await page.click('#history > a:nth-child(1) > table > tbody > tr:nth-child(1) > td.td1 > a');
await page.waitForSelector('#baseinfo > center > table:nth-child(6) > tbody > tr.tr1 > th:nth-child(5)')
//await page.waitForSelector('#baseinfo > center > table:nth-child(6) > tbody > tr:nth-child(2)')
const data0 = await page.evaluate(() => {
//const tds = Array.from(document.querySelectorAll('#baseinfo > center > table:nth-child(6) tr'));
const tds = Array.from(document.querySelectorAll('#baseinfo > center > table:nth-child(6) > tbody > tr:nth-child(n+2)'));
return tds.map(tr => {
const cells = Array.from(tr.querySelectorAll('td'));
console.log('Data inside evaluate:', cells.length);
if (cells.length > 0) {
return {
"题名": cells[3]?.innerText.trim(),
"应还日期": cells[5]?.innerText.trim(),
};
}
}).filter(Boolean);
});
console.log(data0) //输出借的书
} catch (err) {
console.error(err)
}
await page.close();
}
var names = new Map()
// 添加数据到Map中
/**/
names.set("8888888888888888", "user1");
names.set("8888888888888888", "user2");
const secret="808080";
for (let [card, name] of names) {
await visitNationalLibrary(card, name);
}
await browser.close();
})().catch(e => { console.error(e) });

首都图书馆自动登录查询脚本

需要安装 tesseract.js

cnpm install tesseract.js (用淘宝国内源 cnpm)

const puppeteer = require('puppeteer');
const fs = require('fs');
const Tesseract = require('tesseract.js');
//browser path: "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"
// "C:\\Program Files\\Google\\Chrome Dev\\Application\\chrome.exe"
const chrome_path = "C:\\Program Files\\Google\\Chrome Dev\\Application\\chrome.exe"
// 验证码处理
async function handleCaptcha(page) {
let loginSuccess = false;
let maxAttempts = 5;
for (let attempt = 0; attempt < maxAttempts && !loginSuccess; attempt++) {
// 使用 fetch API 下载验证码图片
const captchaImage = await page.$eval('#loginform-verifycode-image', img => img.src);
const buffer = await page.evaluate(async (src) => {
const response = await fetch(src);
const buffer = await response.arrayBuffer();
return Array.from(new Uint8Array(buffer));
}, captchaImage);
// 转换为 Node.js Buffer
const imageBuffer = Buffer.from(buffer);
// 调用Tesseract.js进行OCR识别
const { data: { text } } = await Tesseract.recognize(
imageBuffer, // 图像数据,可以是Buffer或Uint8Array
'eng', // 语言代码(英语)
{
//logger: info => console.log(info) // 可选的日志输出
}
);
console.log('识别结果:', text);
if (text == "") {
console.log("识别结果为空,马上重试!");
continue;
}
// 输入识别结果到输入框
await page.waitForXPath("/html/body/div/div[2]/div/form/div[3]/div/div[1]/input")
await page.$eval("input[id=loginform-verifycode]", input => input.value = "");
await page.type("input[id=loginform-verifycode]", text, { delay: 10 })
// 等待验证码检查结果,执行一会儿再去检查,检查是否出现了错误信息
await page.waitForTimeout(2000);
const errorElement = await page.$('.error');
if (errorElement) {
const errorStyle = await page.evaluate(el => el.style.display, errorElement);
//console.log(errorStyle)
if (errorStyle !== 'none') {
console.log('验证码不正确,请刷新后重试');
// 点击验证码图片,获取新的验证码
await page.click('#loginform-verifycode-image');
await page.waitForTimeout(2000); // 等待一段时间让图片加载完成
console.log(`登录失败,已刷新。正在尝试第 ${attempt + 2} 次...`);
} else {
loginSuccess = true;
}
} else {
// 如果没有找到 .error 元素,我们认为页面已经跳转了。就是认证通过了。
loginSuccess = true;
}
/* 竞争,获取结果
const errorElement = await page.waitForSelector('.error', { timeout: 3000 }).catch(() => null);
const navigationPromise = page.waitForNavigation().catch(e => null);
const elementPromise = page.waitForSelector("#borInfo > div.borrows > div > div:nth-child(1) > ul > button").catch(e => null);
const result = await Promise.race([navigationPromise, elementPromise,errorElement]);
//page.click('button[name=login-button]')
if (result === navigationPromise) {
console.log('跳转了');
loginSuccess = true;
} else if (result === elementPromise) {
console.log('查看出来了');
loginSuccess = true;
} else if (errorElement) {
console.log('验证码不正确,请刷新后重试');
// 点击验证码图片,获取新的验证码
await page.click('#loginform-verifycode-image');
await page.waitForTimeout(2000); // 等待一段时间让图片加载完成
console.log(`登录失败,正在尝试第 ${attempt + 2} 次...`);
} else {
loginSuccess = true;
}
*/
}//for
console.log(`验证码验证结果:`, loginSuccess);
return loginSuccess;
}
async function showTable(page) {
const data = await page.$$eval('#w0 table.table tbody tr', rows => {
return rows.map(row => {
const properties = {};
const secondTd = row.querySelector('td:nth-child(2)');
const fourthTd = row.querySelector('td:nth-child(4)');
properties.title = secondTd ? secondTd.innerText : null;
properties.dueDate = fourthTd ? fourthTd.innerText : null;
return properties;
});
});
console.log(data);
}
//----------------- main ------------------------------------
(async () => {
const browser = await puppeteer.launch({
executablePath: chrome_path,
//process.env.CHROME_PATH, // Path to chromium build with autofill domain
headless: true,
args: ['--start-maximized', '--no-sandbox', '--disable-setuid-sandbox'],
defaultViewport: { width: 1800, height: 1000 },
slowMo: 0,
//dumpio: true,
timeout: 60000,
});
async function visit(card, name) {
const page = await browser.newPage();
console.log(card + ' ' + name)
var url = 'https://www.clcn.net.cn/user/auth/login'
//url="https://www.clcn.net.cn/"
await page.goto(url, {
//waitUntil: 'load', // Remove the timeout
waitUntil: 'domcontentloaded',
timeout: 10000
});
await page.waitForXPath("/html/body/div/div[2]/div/form/div[1]/input")
const inputElement = await page.$('input[name="LoginForm[username]"]');
await inputElement.type(card, { delay: 10 });
await page.waitForXPath("/html/body/div/div[2]/div/form/div[2]/div/input")
await page.type("input[id=loginform-password]", '123456', { delay: 10 })//密码
let loginSuccess = false;
loginSuccess = await handleCaptcha(page)
//console.log('loginSuccess:', loginSuccess)
if (loginSuccess) {
//console.log('等待查看')
await page.waitForSelector("#borInfo > div.borrows > div > div:nth-child(1) > ul > button");
//console.log('点击查看')
await page.click("#borInfo > div.borrows > div > div:nth-child(1) > ul > button");
//console.log('等待table')
await page.waitForXPath("/html/body/div/div[2]/div/div[3]/div[2]/div/table")
//显示
await showTable(page)
}
const exit_selector='#container > div.container.user > div > div.userinfo > div > div.col-lg-4.col-md-4.col-sm-4.col-xs-12.userinfo-btn > form > button'
await page.click(exit_selector);
await page.close()
}
var names = new Map()
// 添加数据到Map中
names.set("88888", "name0");
names.set("9999", "name1");
// 遍历Map中的数据
console.log()
for (let [card, name] of names) {
await visit(card, name);
}
await browser.close();
})().catch(err => {
console.error(err);
process.exit(1);
});

https://datacadamia.com/web/dom/innerhtml

首都图书馆的老接口:

const puppeteer = require('puppeteer');
const fs = require('fs');
const Tesseract = require('tesseract.js');
//browser path: "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"
// "C:\\Program Files\\Google\\Chrome Dev\\Application\\chrome.exe"
const chrome_path = "C:\\Program Files\\Google\\Chrome Dev\\Application\\chrome.exe"
async function showTable(page) {
const myprofile = '#exlidMyAccount > a'
await page.waitForSelector(myprofile);
//console.log('点击查看')
await page.click(myprofile);
const result_table = '#LoansTable' //
await page.waitForSelector(result_table);
const table_fullpath = '#LoansTable > tbody > tr';
await page.waitForFunction('document.querySelector("#LoansTable > tbody > tr") !== null');
const data = await page.$$eval(table_fullpath, rows => {
/**====== 重要 =============== */
//这里的 console.log 打印不出来。断点也无法断住!!!!!!!!
const rows0 = document.querySelectorAll("#LoansTable > tbody > tr");
console.log(rows0.length); // 打印行的数量
//no borrow
const noDataElement = document.querySelector('#w0 table.table tbody tr td div.empty');
if (noDataElement) {
console.log("=== You don't borrow the book ===");
return ["=== You don't borrow the book ==="]; // Return an empty array to indicate no data.
}
//get th table header
const headerRow = document.querySelector('#LoansTable > thead > tr');
if (!headerRow) {
console.log('Table header not found.');
return [headerRow];
}
const headerCells = Array.from(headerRow.querySelectorAll('th'));
if (headerCells.length === 0) {
console.error('No header cells found.');
return ['No header cells found.'];
}
return rows.map(row => {
const properties = {};
const fourthTd = row.querySelector('[id^="titleSTL"]');
const pos = row.querySelector('[id^="locationSTL"]')
const date = row.querySelector('[id^="dueDateSTL"]');
console.error('No header cells found.', headerCells);
properties[headerCells[2].innerText.trim()] = fourthTd ? fourthTd.innerText : null;
properties[headerCells[4].innerText.trim()] = date ? date.innerText : null;
properties[headerCells[7].innerText.trim()] = pos ? pos.innerText : null;
return properties;
});
});
await data;
if (Array.isArray(data) && data.length === 0) {
console.log("=== No table. You don't borrow the book. ===");
} else {
console.log('total books:' + data.length)
console.log(data);
}
}
//----------------- main ------------------------------------
var browser;
var url = 'https://primo.clcn.net.cn/primo_library/libweb/action/loginpage.do?targetURL=https%3a%2f%2fprimo.clcn.net.cn%2fprimo_library%2flibweb%2faction%2fsearch.do%3fvid%3dST%26amp%3bdscnt%3d0%26amp%3bdstmp%3d1705632515141%26amp%3binitializeIndex%3dtrue&isMobile=false'
// https://www.clcn.net.cn/user/my/index
//url="https://www.clcn.net.cn/"
(async () => {
browser = await puppeteer.launch({
executablePath: chrome_path,
//process.env.CHROME_PATH, // Path to chromium build with autofill domain
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],//'--start-maximized',
defaultViewport: { width: 1800, height: 1000 },
slowMo: 0,
//dumpio: true,
timeout: 60000,
});
async function visit(card, name) {
const page = await browser.newPage();
try {
console.log(card + ' ' + name)
await page.goto(url, {
//waitUntil: 'load', // Remove the timeout
waitUntil: 'domcontentloaded',
timeout: 10000
});
//登录
await page.waitForXPath("/html/body/div[2]/div[2]/form/md-card/md-card-content/md-input-container[1]/input")
//user
const inputElement = await page.$('input[name="username"]');
await inputElement.type(card, { delay: 10 });
//pswd
//await page.waitForXPath("/html/body/div[2]/div[2]/form/md-card/md-card-content/md-input-container[2]/input")
await page.type("input[id=input_1]", '888888', { delay: 10 })
// await page.waitForSelector("body > div.EXLPRMLoginCard.layout-align-center-start.layout-row > div.EXLPRMLoginColumn.layout-column.flex-xs-100.flex-sm-100.flex-25 > form > md-card > div.md-actions.layout-align-end-center.layout-row > a.EXLPRMLoginButtonSubmit.md-button.md-ink-ripple");
//click to login
await page.click("body > div.EXLPRMLoginCard.layout-align-center-start.layout-row > div.EXLPRMLoginColumn.layout-column.flex-xs-100.flex-sm-100.flex-25 > form > md-card > div.md-actions.layout-align-end-center.layout-row > a.EXLPRMLoginButtonSubmit.md-button.md-ink-ripple");
//console.log('等待table')
await page.waitForXPath("/html/body/div[1]/div[2]/div[2]/ul/li[3]/a")
//显示
await showTable(page)
const exit_selector = '#exlidSignOut > a'
await page.click(exit_selector);
} catch (error) {
console.error(error);
}
await page.close()
}
var names = new Map()
// 添加数据到Map中
names.set("000000000000", "zhang san");
names.set("888888888888", "li si");
// 遍历Map中的数据
for (let [card, name] of names) {
await visit(card, name);
}
console.log("https://www.clcn.net.cn/user/my/index")
await browser.close();
})().catch(err => {
console.error(err);
}
).finally(err => {
//console.log("browser.close")
browser.close();
process.exit(1);
});

完全替换元素内容:

let htmlFragment = "<p>Replacing the whole body node content with a paragraph</p>";
document.body.innerHTML += htmlFragment

script elements 使用innerHTML 不会执行

DOM - InsertAdjacent 比 appendChild更高级,能执行script

https://datacadamia.com/web/dom/insertadjacent

let bodySibling = document.createElement('script');
bodySibling.text = 'console.log("Hello World !");';
document.body.insertAdjacentElement('beforeend', bodySibling);

插入element

let pSibling = document.createElement('p');
pSibling.innerText = 'A paragraph';
document.body.insertAdjacentElement('afterbegin', pSibling);

插入html

document.body.insertAdjacentHTML('afterend', '<p>Body Sibling HTML</p>');

注入js

https://www.tabnine.com/code/javascript/functions/puppeteer/Page/%2524eval

async function main() {
const browser = await puppeteer.launch({
headless: false,
userDataDir: path.join(process.cwd(), "ChromeSession")
});
const page = await browser.newPage();
await page.goto('https://web.whatsapp.com', {
waitUntil: 'networkidle0',
timeout: 0
});
await page.waitForSelector('*[data-icon=chat]',
{
polling: 1000,
timeout: 0
})
console.log("Logged in!")
var filepath = path.join(__dirname, "WAPI.js");
await page.addScriptTag({ path: require.resolve(filepath) });
filepath = path.join(__dirname, "inject.js");
await page.addScriptTag({path: require.resolve(filepath)});
//await browser.close();
}

获取元素用 page.$ or evaluate

https://helloworldmaster.com/article/get-a-dom-element-using-puppeteer

posted @   Bigben  阅读(161)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
历史上的今天:
2019-04-11 Spring Boot整合Elasticsearch
点击右上角即可分享
微信分享提示