用nodejs抓取豆瓣音乐并存入mongodb

之前用python抓取豆瓣的音乐,保存到sxl中,最近在玩mongodb,就有了如下代码,主要涉及到nodejs连接mongodb并操作它,和http模块抓取网页内容

没什么深奥的原理,不多说,代码如下:


var mongo = require('mongodb'),
	Server = mongo.Server,
	Db = mongo.Db,
	server = new Server('localhost', 27017, {auto_reconnect: true}),
    db_name = 'test',
    db_user = 'root',
    db_pass = '111111',
    table_name = 'douban',
	db = new Db(db_name, server);
var http = require('http'),
	channel = 1,
	options = {
		host: 'douban.fm',
		port: 80,
		path: '/j/mine/playlist?type=n&channel='+channel,
	},
	interval = 3000;

var globalIndex=0;

db.open(function(err, db) {
	if(!err) {
		db.authenticate(db_user, db_pass, function(err, result) {
			db.collection(table_name, function(err, collection) {
				var timer = setInterval(function(){
					sigleIndex = 0;
					http.get(options,function(res){
						var buffers = [];
						res.on('data', function(buffer) {
							buffers.push(buffer);
						});
						res.on('end', function() {
							var html = buffers.join('');
							var data = JSON.parse( html );
							var song = data.song;
							for(var i=0;i<song.length;i++){
								song[i]['channel'] = channel;
								collection.insert(song[i],{safe:true},function(err, result){
									if( !err ){
										globalIndex++;
									}
								});
							}
							console.log("新增: " + globalIndex + "条记录");
						});
					}).on('error', function(e) {
						console.log("Got error: " + e.message);
					});
				},interval);
			});
			db.close();
		});
	}
});
posted @ 2012-09-14 15:46  nodejs  阅读(523)  评论(0编辑  收藏  举报