Node.js抓取新浪新闻标题

"use strict";

let cheerio = require("cheerio");
let http = require("http");
let iconv = require("iconv-lite");

let mainUrl = "http://news.sina.com.cn/world/";

http.get(mainUrl, function(sres) {
    var chunks = [];
    sres.on('data', function(chunk) {
        chunks.push(chunk);
    });

    sres.on('end', function() {

        var html = iconv.decode(Buffer.concat(chunks), 'utf8');
        var $ = cheerio.load(html, {decodeEntities: false});

        $('.content a').each(function (idx,element){
            let ele = $(element);
            let title = ele.text();
            title = !!title?title.trim():'';
            let url = ele.attr('href');
            url = !!url?url.trim():'';
            if(title.length>4 && url.length>0&&url.indexOf('javascript')===-1){
                console.log(title+'|'+url);
            }

        })

    });
});

 

posted on 2018-04-10 22:02  尼古拉斯bug  阅读(159)  评论(0编辑  收藏  举报

导航