Node、MySQL爬虫
const http = require('http'); const cheerio = require('cheerio'); const fs = require('fs'); const mysql = require('mysql'); const child_process = require('child_process'); const urls = []; const datas = []; // 遍历url function startGetHtml() { for (var i=1; i<=33; i++) { urls.push('http://www.imooc.com/course/list?page='+i); } urls.forEach((url, page) => { parseHtml(url, page); }); } // 解析html function parseHtml(url, page) { http.get(url, (res) => { var html; res.on('data', (data) => { html += data; }); res.on('end', () => { var $ = cheerio.load(html); $('.course-card').each((index, ele) => { var title = $(ele).find('.course-card-name').text(); var imgUrl = $(ele).find('.course-banner').attr('src'); var videoUrl = 'http://www.imooc.com'+$(ele).attr('href'); datas[index] = [page+1+'-'+index, title, videoUrl]; saveImg('http:'+imgUrl, title); saveText(title, videoUrl); }); saveDatabase(datas); }); }); } // 保存为图片 function saveImg(url, title) { http.get(url, (res) => { var imgData = ""; res.setEncoding("binary"); res.on("data", (chunk) => { imgData += chunk; }); res.on("end", () =>{ fs.writeFile("node_download/img/"+title+".jpg", imgData, "binary", (err) => { if (err) { console.log(err); } else { console.log(title); } }); }); }); } // 保存为文本 function saveText(title, videoUrl) { fs.writeFile("node_download/txt/"+title+".txt", videoUrl, 'utf8', (err) => { if (err) { console.log(err); } else { console.log(title); } }); } // 保存到MySQL function saveDatabase(datas) { var connection = mysql.createConnection({ host : 'localhost', user : 'root', password : '123456', database : 'download' }); var sql = "INSERT INTO imooc(`page`,`title`,`url`) VALUES ?"; connection.query(sql, [datas], (err) => { if (err) { console.log('INSERT ERROR - ', err.message); } else { console.log("INSERT SUCCESS"); } }); } // 查询MySQL function getDatabase() { var connection = mysql.createConnection({ host : 'localhost', user : 'root', password : '123456', port: '3306', database: 'download' }); var sql = 'SELECT * FROM imooc'; connection.query(sql, (err, result) => { if (err) { console.log('[SELECT ERROR] - ',err.message); } else { http.createServer((req, res) => { res.writeHead(200, {'Content-Type': 'text/html;charset=utf-8'}); res.end(JSON.stringify(result)); }).listen(3000); } }); } //startGetHtml(); //getDatabase();