nodejs express cheerio request爬虫

const express = require('express')
const cheerio = require('cheerio')
const request = require("request")
const app = express()

app.get("/:key", function (req, res) {
    let spider = new Spider()
    console.log(req.params.key)
    spider.fetch("http://www.baidu.com/s?wd=" + req.params.key, (err, $) => {
        spider.parse(err, $, res)
    })
})
app.post('/postData', function (req, res) {  //这里参数加上刚刚的解析的而且这里不是get了
    let result = req.body
    res.send(result);
})
app.listen(3000, () => {
    console.log("开启服务,端口3000")
})


class Spider {
    fetch(url, callback) {
        request({url: url, encoding: null}, (err, response, body) => {
            if (!err && response.statusCode === 200) {
                callback(null, cheerio.load('<body>' + body + '</body>'));
            } else {
                callback(err, cheerio.load('<body></body>'));
            }
        })
    }

    parse(err, $, res) {
        if (!err) {
            let result = $('body').find("#content_left").html()
            res.send(result)
        }
    }

}

封装request

class Spider {
    fetch(url, method = "GET") {
        return new Promise((resolve, reject) => {
            var option = {
                url: url,
                method: method,
                headers: {
                    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36",
                },
            }
            if (method == "POST") {
                option = {
                    url: url,
                    method: method,
                    headers: {
                        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
                        'cookie': 'JUTE_SESSION_ID='
                    },
                    body: "{}"
                }
            }
            request(option, function (error, response, body) {
                if (!error && response.statusCode == 200) {
                    resolve(body)
                }
            });
        });

    }
}

posted @ 2019-11-12 10:13  公众号python学习开发  阅读(323)  评论(0编辑  收藏  举报