Puppeteer 前端截图 爬虫

https://zhuanlan.zhihu.com/p/76237595

 

https://juejin.cn/post/6882332163052994574

 

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');

String.prototype.hashCode = function () {
var hash = 0;
if (this.length == 0)
return hash;
for (let i = 0; i < this.length; i++) {
var charCode = this.charCodeAt(i);
hash = ((hash << 7) - hash) + charCode;
hash = hash & hash;
}
return hash;
};

async function getWelfareImage (url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
page.setViewport({ width: 1280, height: 926 });

console.log("start..." + url)
let hash = url.hashCode()
fs.mkdir("./images/" + hash, { recursive: true }, (err) => {
if (err) {
throw err;
}
})
fs.writeFile(`images/${hash}/url`, url, (err) => {
if (err) {
console.log(err)
}
})
page.on('response', async (response) => {
// const matches = /upload.*\.(jpg|png|svg|gif)$/.exec(response.url());
const matches = /upload.*\/([^\/]*)\.(jpg|png|svg)$/.exec(response.url());
if (matches && (matches.length === 3)) {
const name = matches[1];
const extension = matches[2];
const buffer = await response.buffer();
fs.writeFileSync(`images/${hash}/image-${name}.${extension}`, buffer, 'base64');
}
});

await page.goto(url
, {
waitUntil: 'networkidle2',
timeout: 0
});

console.log("end..." + url)
await browser.close();
}


async function getAllThreadList () {
console.log("start...")
fs.mkdir("./images", { recursive: true }, (err) => {
if (err) {
throw err;
}
})

const browser = await puppeteer.launch();
const page = await browser.newPage();
page.on('load', async () => {
console.log('Page loaded!')

const urls = await page.evaluate(() => {
console.log('Page evaluate!')
let refs = document.querySelectorAll('a')
let url = []
for (let i = 0; i < refs.length; i++) {
u = refs[i].getAttribute("href")
if (!u.startsWith('thread-index-fid-')) {
continue;
}
url.push(u)
}
return url
}
)
console.log(urls)
for (let i = 0; i < urls.length; i++) {
await getWelfareImage('http://www.btbtt17.com/' + urls[i])
}

await browser.close();
console.log("end...")
})

await page.goto('http://www.btbtt17.com/xxxx'
, {
waitUntil: 'load',
timeout: 0
});
}

// getAllThreadList()

var urls = [
]
for (let index = 0; index < urls.length; index++) {
const element = urls[index];
getWelfareImage(element)
}

 

posted on 2022-09-26 15:19  阿笨猫  阅读(89)  评论(0编辑  收藏  举报