Node.js爬虫
var http = require('http') var cheerio = require('cheerio') var url = 'http://www.imooc.com/learn/348' function filterChapters(html){ var $ = cheerio.load(html) var chapters =$('.chapter') var courseData=[] console.log('章节标题:'+'\n') chapters.each(function(item){ var chapter = $(this) var chapterTitle = chapter.find('h3').text() console.log(chapterTitle+'\n') var videos =chapter.find('.video').children('li') var chapterData = { chapterTitle: chapterTitle, videos:[] } courseData.push(chapterData) }) return courseData } http.get(url, function(res){ var html = '' res.on('data', function(data){ html += data; }) res.on('end', function(){ // filterChapters(html) var courseData = filterChapters(html) //console.log(courseData+'finish'+'\n') // printCourseInfo(courseData) }) }).on('error',function(){ console.log('获取课程数据出错') })
效果