Node、MySQL爬虫

const http = require('http');
const cheerio = require('cheerio');
const fs = require('fs');
const mysql = require('mysql');
const child_process = require('child_process');
const urls = [];
const datas = [];

// 遍历url
function startGetHtml() {
    for (var i=1; i<=33; i++) {
        urls.push('http://www.imooc.com/course/list?page='+i);
    }
    urls.forEach((url, page) =>  {
        parseHtml(url, page);
    });
}

// 解析html
function parseHtml(url, page) {
    http.get(url, (res) => {
        var html;
        res.on('data', (data) => {
            html += data;
        });

        res.on('end', () => {
            var $ = cheerio.load(html);
            $('.course-card').each((index, ele) => {
                var title = $(ele).find('.course-card-name').text();
                var imgUrl = $(ele).find('.course-banner').attr('src');
                var videoUrl = 'http://www.imooc.com'+$(ele).attr('href');
                datas[index] = [page+1+'-'+index, title, videoUrl];
                saveImg('http:'+imgUrl, title);
                saveText(title, videoUrl);
            });
            saveDatabase(datas);
        });
    });
}

// 保存为图片
function saveImg(url, title) {
    http.get(url, (res) => {
        var imgData = "";
        res.setEncoding("binary");
        res.on("data", (chunk) => {
            imgData += chunk;
        });
        res.on("end", () =>{
            fs.writeFile("node_download/img/"+title+".jpg", imgData, "binary", (err) => {
                if (err) {
                    console.log(err);
                } else {
                    console.log(title);
                }
            });
        });
    });
}

// 保存为文本
function saveText(title, videoUrl) {
    fs.writeFile("node_download/txt/"+title+".txt", videoUrl, 'utf8', (err) => {
        if (err) {
            console.log(err);
        } else {
            console.log(title);
        }
    });
}

// 保存到MySQL
function saveDatabase(datas) {
    var connection = mysql.createConnection({
        host  : 'localhost',
        user  : 'root',
        password : '123456',
        database : 'download'
    });
    var sql = "INSERT INTO imooc(`page`,`title`,`url`) VALUES ?";
    connection.query(sql, [datas], (err) => {
        if (err) {
            console.log('INSERT ERROR - ', err.message);
        } else {
            console.log("INSERT SUCCESS");
        }
    });
}

// 查询MySQL
function getDatabase() {
    var connection = mysql.createConnection({
        host     : 'localhost',
        user     : 'root',
        password : '123456',
        port: '3306',
        database: 'download'
    });
    var sql = 'SELECT * FROM imooc';
    connection.query(sql, (err, result) => {
        if (err) {
            console.log('[SELECT ERROR] - ',err.message);
        } else {
            http.createServer((req, res) => {
                res.writeHead(200, {'Content-Type': 'text/html;charset=utf-8'});
                res.end(JSON.stringify(result));
            }).listen(3000);
        }
    });
}

//startGetHtml();
//getDatabase();

 

posted @ 2017-12-13 18:00  鱿鱼须须  阅读(161)  评论(0编辑  收藏  举报