背景
- 这个可以用来提取话题界面中的文章链接
const fs = require('fs');
const cheerio = require('cheerio');
let path = `./html`;
fs.readdir(path, function (err, files) {
files.forEach((file) => {
console.log(file);
if (file.split('.')[1] === 'html') {
fs.readFile(path + '/' + file, 'utf-8', (err, data) => {
const $ = cheerio.load(data);
// 1. 提取目录
const writeStream = fs.createWriteStream(
path + '/目录/' + file,
'utf-8'
);
$(
'#js_content_overlay > div.album.js_album_container.album-rich_media_area_primary_full > div > div.album__content.js_album_bd > ul li'
).each((index, ele) => {
writeStream.write($(ele).attr('data-link'));
writeStream.write('\n');
});
writeStream.end();
});
}
});
});