使用 superagent 和 cheerio 爬取、解析网页(nodejs)

安装依赖:

npm install superagent cheerio --save

同步代码:

const superagent = require('superagent')
const cheerio = require('cheerio')
const fs = require('fs')

function getNews() {
  return new Promise((resolve, reject) => {
    superagent.get('https://a.b.c.cn/').end((err, data) => {
      if (err) {
        reject('error')
      } else {
        const $ = cheerio.load(data.text)
        var lst = []
        $('#blk_cjkjqcfc_011  a').each((index, item) => {
          var tex = $(item).text()
          if (tex && !tex.endsWith('|')) {
            lst.push(tex.replaceAll('\n', ''))
          }
        })
        resolve(lst)
      }
    })
  })
}

async function main() {
  var res = await getNews()
  console.log(res)
}

main()
posted @ 2023-02-19 23:01  egu0o  阅读(82)  评论(0编辑  收藏  举报