背景
- 微信公众号的目录界面源码提取出来,然后进一步提取其中的链接,然后批量下载下来
const fs = require('fs');
const cheerio = require('cheerio');
let path = `./html`;
fs.readdir(path, function (err, files) {
files.forEach((file) => {
console.log(file);
if (file.split('.')[1] === 'txt') {
fs.readFile(path + '/' + file, 'utf-8', (err, data) => {
const $ = cheerio.load(data);
// 1. 提取目录
const writeStream = fs.createWriteStream(
path + '/目录/' + file,
'utf-8'
);
$('#js_history_list h4').each((index, ele) => {
writeStream.write($(ele).attr('hrefs'));
writeStream.write('\n');
});
writeStream.end();
});
}
});
});