推荐算法-协同过滤

最近看了一篇协同过滤的文章"A Guide to Singular Value Decomp osition for Collaborative Filtering",主要为协同过滤设计了一种有效的svd算法,

V是偏好分数矩阵,I{i,j}=1代表用户i对item j 有偏好,否则为I{i, j}=0,一般V是稀疏的。协同过滤的目的是预测稀疏矩阵中没有展现的评论分数,协同过滤算法一种普遍的评价方法是Root Mean Square Error(RMSE),预测矩阵P,真实矩阵为A,且J为预测的A的指示器,对应I的定义,则RMSE定义为:

 


SVD
算法的目的是找到两个特征矩阵U(用户*特征) ,M(item*特征)


 代码如下:

 

 

#include<iostream>
#include<string>
#include<fstream>
#include<math.h>

using namespace std;
const int USERMAX = 1000;
const int ITEMMAX = 2000;
const int FEATURE = 50;
const int ITER_MAX = 30;

double rating[USERMAX][ITEMMAX];
int I[USERMAX][ITEMMAX]; //indicate if the item is rated
double UserF[USERMAX][FEATURE];
double ItemF[ITEMMAX][FEATURE];
double BIASU[USERMAX];
double BIASI[ITEMMAX];
double lambda = 0.15;
double gamma = 0.05;
double mean;
double predict(int i, int j) {
	double rate = mean + BIASU[i] + BIASI[j];
	for(int f = 0; f < FEATURE; f++) {
		rate += UserF[i][f] * ItemF[j][f];
	}
	if(rate < 1) {
		rate = 1;
	} else if (rate > 5) {
		rate = 5;
	}
	return rate;
}

double calRMSE() 
{
	double total = 0; 
	int cnt = 0;
	for(int i = 0; i < USERMAX; i++) {
		for(int j = 0; j < ITEMMAX; j++) {
			double rate = predict(i, j);
			total += I[i][j] * (rating[i][j] - rate)* (rating[i][j] - rate);
			cnt += I[i][j];
		}
	}
	double rmse = pow(total/cnt, 0.5);
	return rmse;
}
double calMean() 
{
	double total = 0;
	int cnt = 0;
	for (int i = 0; i < USERMAX; ++i) {
		for(int j = 0; j < ITEMMAX; ++j) {
			total += I[i][j] * rating[i][j];
			cnt += I[i][j];
		}
	}
	return total/cnt;
}
void initBias() 
{
	memset(BIASU, 0, sizeof(BIASU));
	memset(BIASI, 0, sizeof(BIASI));
	mean = calMean();
	for(int i = 0; i < USERMAX; i++) {
		double total = 0;
		int cnt = 0;
		for(int j = 0; j < ITEMMAX; j++) {
			if(I[i][j]) {
				total += rating[i][j] - mean;
				cnt++;
			}
		}
		if(cnt > 0) {
			BIASU[i] = total/cnt;
		} else {
			BIASU[i] = 0;
		}
	}
	for(int j = 0; j < ITEMMAX; j++) {
		double total = 0;
		int cnt = 0;
		for(int i = 0; i < USERMAX; i++) {
			if(I[i][j]) {
				total += rating[i][j] - mean;
				cnt++;
			}
		}
		if(cnt > 0) {
			BIASI[j] = total/cnt;
		} else {
			BIASI[j] = 0;
		}
	}
}

void train() 
{
	memset(rating, 0, sizeof(rating));
	memset(I, 0, sizeof(I));
	ifstream in("D:\\dataset\\ml-100k\\ub.base",ios::in);
	if(!in) {
		cout << "file not exist" << endl;
		exit(1);
	}
	int userId, itemId, rate;
	string timeStamp;
	while(in >> userId >> itemId >> rate >> timeStamp) {
		rating[userId][itemId] = rate;
		I[userId][itemId] = 1;
	}
	initBias();
	//train matrix decomposation
	/*for (int i = 0; i < USERMAX; ++i) {
		for(int f = 0; i < FEATURE; i++) {
			UserF[i][f] = (rand()%10)/10.0;
		}
	}
	for (int j = 0; j < ITEMMAX; ++j) {
		for (int f = 0; f < FEATURE; ++f) {
			ItemF[j][f] = (rand()%10)/10.0;
		}
	}*/
	//初始化
	for(int f = 0; f < FEATURE; f++) {
		for(int i = 0; i < USERMAX; i++) {
			UserF[i][f] = (rand()%100)/100.0 + 0.001;
		}
		for(int j = 0; j < ITEMMAX; j++) {
			ItemF[j][f] = (rand()%100)/100.0 + 0.001;
		}
	}
	int iterCnt = 0;
	while(iterCnt < ITER_MAX) {
		for(int i = 0; i < USERMAX; i++) {
			for(int j = 0; j < ITEMMAX;j++) {
				if(I[i][j]) {
					double predictRate = predict(i, j);
					double eui = rating[i][j] - predictRate;
					BIASU[i] += gamma*(eui - lambda*BIASU[i]);
					BIASI[j] += gamma*(eui - lambda*BIASI[j]);
					for(int f = 0; f < FEATURE; f++) {
						UserF[i][f] += gamma*(eui*ItemF[j][f] - lambda*UserF[i][f]);
						ItemF[j][f] += gamma*(eui*UserF[i][f] - lambda*ItemF[j][f]);
					}
				}
			}
		}
		double rmse = calRMSE();
		cout << "LOOP" << iterCnt << ": rmse is " << rmse << endl;
		iterCnt++;
	}
}
void test() 
{
	ifstream in("D:\\dataset\\ml-100k\\ub.test");
	int userId, itemId, rate;
	string timeStamp;
	double total = 0;
	int cnt = 0;
	while(in >> userId >> itemId >> rate >> timeStamp) {
		double predictRate = predict(userId, itemId);
		total += (rate - predictRate) * (rate - predictRate);
		cnt++;
	}
	double rmse = pow(total / cnt, 0.5);
	cout << "test: rmse is " << rmse << endl;
	
}
int main() 
{
	train();
	test();
	return 0;
}

 

  

 

posted @ 2015-02-25 17:43  bitgirl_coder  阅读(273)  评论(0编辑  收藏  举报