axios+cheerio抓一本小说

axiosNovel.msj

import axios from 'axios';
import fs from 'fs-extra';
import path from 'path';
import * as cheerio from 'cheerio'
const instance = axios.create();


async function main(){
    const data={
        url:atob('aHR0cHM6Ly93d3cuM3l0Lm9yZy9tbC8xODk4NDEv'),
        name:'',
        author:'',
        tags:[],
        intro:'',
        list:[]
    }
    const res=await instance.get(data.url)
    const $ = cheerio.load(res.data)
    data.name=$('#info h1').text()
    data.author=$('#info > p:nth-child(2) > a').text()
    data.intro=$('#intro').text()
    console.log(data)
    const nList=[]
    $('#list a').each(function (index,ele){
        const title=$(ele).text()
        const hasM=title.match(/第(\d+)章/)
        if(!hasM){
            return
        }
        const inx=parseInt(hasM[1])
        nList[inx]={
            title,
            href:data.url+path.join('../..',$(ele).attr('href'))
        }
    })
    for(let i=1;i<=nList.length;i++){
        if(!nList[i]){continue}
        const url=nList[i].href
        console.log(i,url)
        const res=await instance.get(url)
        const $ = cheerio.load(res.data)
        const item={}
        item.title=$('.bookname > h1').text()
        item.info=$('#content').html()
        data.list.push(item)
    }
    fs.writeFileSync(data.name+'.json',JSON.stringify(data,null,2))
}
main();

 

buildTxt.mjs

import glob from 'glob'
import fs from 'fs-extra';

async function main(){
    const files=glob.sync('src/*.json')

    console.log(files)
    for(let i=0;i<files.length;i++){
        const data=fs.readJsonSync(files[i])
        let txt=[data.url,data.name,data.author,data.intro].join('\n\n')
        data.list.forEach(({title,info})=>{
            txt=txt+'\n'+title+'\n'+info
        })
        txt=txt.replace(/(<p>|<\/p>|<br>|<\/br>)/g,'\n')
        fs.writeFileSync(files[i].replace('.json','.txt'),txt)
    }

}

main()

 

posted @ 2024-08-19 20:30  无工时代  阅读(2)  评论(0编辑  收藏  举报