axios+cheerio抓一本小说
axiosNovel.msj
import axios from 'axios'; import fs from 'fs-extra'; import path from 'path'; import * as cheerio from 'cheerio' const instance = axios.create(); async function main(){ const data={ url:atob('aHR0cHM6Ly93d3cuM3l0Lm9yZy9tbC8xODk4NDEv'), name:'', author:'', tags:[], intro:'', list:[] } const res=await instance.get(data.url) const $ = cheerio.load(res.data) data.name=$('#info h1').text() data.author=$('#info > p:nth-child(2) > a').text() data.intro=$('#intro').text() console.log(data) const nList=[] $('#list a').each(function (index,ele){ const title=$(ele).text() const hasM=title.match(/第(\d+)章/) if(!hasM){ return } const inx=parseInt(hasM[1]) nList[inx]={ title, href:data.url+path.join('../..',$(ele).attr('href')) } }) for(let i=1;i<=nList.length;i++){ if(!nList[i]){continue} const url=nList[i].href console.log(i,url) const res=await instance.get(url) const $ = cheerio.load(res.data) const item={} item.title=$('.bookname > h1').text() item.info=$('#content').html() data.list.push(item) } fs.writeFileSync(data.name+'.json',JSON.stringify(data,null,2)) } main();
buildTxt.mjs
import glob from 'glob' import fs from 'fs-extra'; async function main(){ const files=glob.sync('src/*.json') console.log(files) for(let i=0;i<files.length;i++){ const data=fs.readJsonSync(files[i]) let txt=[data.url,data.name,data.author,data.intro].join('\n\n') data.list.forEach(({title,info})=>{ txt=txt+'\n'+title+'\n'+info }) txt=txt.replace(/(<p>|<\/p>|<br>|<\/br>)/g,'\n') fs.writeFileSync(files[i].replace('.json','.txt'),txt) } } main()