node爬虫技术初探

//加载http模块
var http = require('http');
var fs = require('fs');

//目标网站
// var opt = {
//     hostname: 'nodejs.cn',
//     path: '/download/',
// };
var opt = {
    hostname: 'puui.qpic.cn',
    path: '/video_caps/0/i07552ruffw.q4.jpg/0',
};
http.get(opt, res => {
    var arr = [];
    var str = '';
    res.on('data', buffer => {
        arr.push(buffer);
        str += buffer;
    });
    res.on('end', () => {
        // 图片需要Buffer转码
        let imgBuffer = Buffer.concat(arr);
        fs.writeFile('aaa.jpg', imgBuffer, 'utf-8');
        // fs.writeFile('download.html', arr, 'utf-8');
    });
});

封装

//加载http模块
var fs = require('fs');
const url = require('url');

function GetUrl(getUrl, success) {
    var urlObj = url.parse(getUrl);
    var http = '';
    console.log(urlObj,88);
    if (urlObj.protocol === 'http:') {
        http = require('http');
    } else if (urlObj.protocol === 'htts:') {
        http = require('https');
    }
    let req = http.get({
        hostname: urlObj.hostname,
        path: urlObj.path
    }, (res) => {
        var arr = [];
        res.on('data', buffer => {
            arr.push(buffer);
        });
        res.on('end', () => {
            // 图片需要Buffer转码
            let imgBuffer = Buffer.concat(arr);
            success && success(imgBuffer);
        });
    });
    req.end();
    // req.on('error');
}
GetUrl('http://puui.qpic.cn/video_caps/0/i07552ruffw.q4.jpg/0', data => {
    fs.writeFile('bbb.jpg', data, 'utf-8');
});
posted @ 2018-10-20 19:01  福小松  阅读(142)  评论(0编辑  收藏  举报