node js 爬虫抓取节点信息

以博客园新闻为例

var express = require('express');
var cheerio = require('cheerio');
var superagent = require('superagent');
var app = express();

app.get('/', function (req, res, next) {
  //res.header("Access-Control-Allow-Origin", "*");
  superagent.get('https://news.cnblogs.com/').end(function (err, sres) {
    if (err) {
      return next(err)
    }
    var $ = cheerio.load(sres.text);
    var items = [];
    $(".news_entry a").each(function (index, ele) {
      var element = $(ele);
      items.push({
        text: element.text(),
        href: element.attr('href')
      })
    })
    res.send(items)
  })

})

app.listen(3000, function () {
  console.log('app is now listen at port 3000')
})

 

posted @ 2017-12-08 17:43  李元夕cool  阅读(315)  评论(0编辑  收藏  举报