node抓取图片
const https = require('https') const http = require('http') /* 方式二时使用*/ const fs = require('fs') const cheerio = require('cheerio') const request = require('request') const path = require('path'); const imgDir = path.join(__dirname, 'img'); let url = 'https://www.3dmgame.com/gl/3749617.html' // const title = "猎人" const list1 = [ {url:"https://www.3dmgame.com/gl/3748911.html", title:"./img/战士"}, {url:"https://www.3dmgame.com/gl/3749617.html", title:"./img/猎人"}, {url:"https://www.3dmgame.com/gl/3749938.html", title:"./img/机器人"}, ]; const getImg = (url, title) => { fs.mkdirSync(`${title}`, {recursive: true});//同步创建目录 https.get(url, (res) => { // 安全判断 const { statusCode } = res const contentType = res.headers['content-type'] console.log(statusCode, contentType) let err = null if (statusCode !== 200) { err = new Error('请求状态错误') } else if (!/^text\/html/.test(contentType)) { err = new Error('请求类型错误') } if (err) { console.log(err) res.resume() //重置缓存 return false } let resData = '' res.on('data', (data) => { resData += data.toString('utf8') }) res.on('end', () => { //将请求数据保存在本地 let $ = cheerio.load(resData) console.error($('img').length); let id = 0; $('img').each((index, el) => { let imgUrl = $(el).attr('src') // console.log($(el).attr('src')) if (imgUrl) { // let filename = imgUrl.split('/').pop() // /* 方式一*/ // // request('http:'+imgUrl).pipe(fs.createWriteStream(imgDir + '/' + filename)); // /* 方式二*/ // var req = http.get('http:'+imgUrl, function (res) { // var imgData = ""; // res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开 // res.on("data", function (chunk) { // imgData += chunk; // }); // res.on("end", function () { // let filename = imgUrl.split('/').pop() // fs.writeFile(imgDir + '/' + filename, imgData, "binary", function (err) { // if (err) { // console.log("保存失败"); // } // console.log("保存成功"); // }); // }); // res.on("error", function (err) { // console.log("请求失败"); // }); // }); if (!imgUrl.includes("https://img.3dmgame.com/uploads/images/news")) { return; } const ext = imgUrl.substring(imgUrl.length - 4, imgUrl.length); console.error(`ext=${ext}`); if (imgUrl.substring(imgUrl.length - 4, imgUrl.length) === ".jpg") { return; } console.error(imgUrl); var writeStream = fs.createWriteStream(`${title}//${++id}_${imgUrl.substring(imgUrl.length - 10, imgUrl.length - 4)}.png`); var readStream = request(imgUrl); readStream.pipe(writeStream); readStream.on('end', function () { console.log('文件下载成功'); }); readStream.on('error', function () { console.log(1); // console.log("错误信息:"+ err) }) writeStream.on("finish", function () { console.log("文件写入成功"); writeStream.end(); }); } }); console.log('数据传输完毕') }) }).on('error', (err) => { console.log('请求错误') }) } for(let item of list1){ getImg(item.url, item.title); }
这里主要是抓取网页上的所有图片,然后过滤图片。
posted on 2021-09-08 10:08 gongzhuiau 阅读(77) 评论(0) 编辑 收藏 举报