iron_sheet

导航

 

Node.js爬虫

 

var http = require('http') var cheerio = require('cheerio') var url = 'http://www.imooc.com/learn/348' function filterChapters(html){ var $ = cheerio.load(html) var chapters =$('.chapter') var courseData=[] console.log('章节标题:'+'\n') chapters.each(function(item){ var chapter = $(this) var chapterTitle = chapter.find('h3').text() console.log(chapterTitle+'\n') var videos =chapter.find('.video').children('li') var chapterData = { chapterTitle: chapterTitle, videos:[] } courseData.push(chapterData) }) return courseData } http.get(url, function(res){ var html = '' res.on('data', function(data){ html += data; }) res.on('end', function(){ // filterChapters(html) var courseData = filterChapters(html) //console.log(courseData+'finish'+'\n') // printCourseInfo(courseData) }) }).on('error',function(){ console.log('获取课程数据出错') })

 

效果

 

posted on 2018-11-20 17:41  iron_sheet  阅读(97)  评论(0编辑  收藏  举报