CF Code

user-knn

  1 import numpy
  2 import csv
  3 from numpy import *
  4 
  5 '''
  6 data from (1,1)->(user, item)
  7 (user, 0) mean the mean rating of user u
  8 (0, item) mean the mean rating of item i 
  9 '''
 10 
 11 def toInt(arr):
 12     print('toInt() startting...')
 13     arr = mat(arr)
 14     m, n = shape(arr)
 15     nArr = zeros((m, n))
 16     for i in range(m):
 17         for j in range(n):
 18             nArr[i, j] = int(arr[i, j])
 19     print('toInt() ending...')
 20     return nArr
 21 
 22 def loadTrainData(path):
 23     print('loadTrainData startting...')
 24     l = []
 25     with open(path, 'r') as file:
 26         lines = csv.reader(file)
 27         for line in lines:
 28             l.append(line)
 29     l = array(l)
 30     print('loadTrainData ending...')
 31     return toInt(l)
 32 
 33 def loadTestData(path):
 34     print('loadTestData startting...')
 35     l = []
 36     with open(path) as file:
 37         lines = csv.reader(file)
 38         for line in lines:
 39             l.append(line)
 40     l = array(l)
 41     print('loadTestData ending...')
 42     return toInt(l)
 43 
 44 def fillUIMatrix(uimatrix, train_data):
 45     print('fillUIMatrix startting...')
 46     train_data = mat(train_data)
 47     m, n = shape(train_data)
 48     for i in range(m):
 49         uimatrix[train_data[i, 0], train_data[i, 1]] = train_data[i, 2]
 50     print('fillUIMatrix ending...')
 51 
 52 def calAverageRating(uimatrix):
 53     print('calAverageRating starting...')
 54     uimatrix = mat(uimatrix)
 55     m, n = shape(uimatrix)
 56     for i in range(1, m):
 57         rating = 0
 58         cnt = 0
 59         for j in range(1, n):
 60             rating += uimatrix[i, j]
 61             if uimatrix[i, j] != 0:
 62                 cnt += 1
 63         uimatrix[i, 0] = rating / cnt
 64     
 65     for i in range(1, n):
 66         rating = 0
 67         cnt = 0
 68         for j in range(1, m):
 69             rating += uimatrix[j, i]
 70             if uimatrix[j, i] != 0:
 71                 cnt += 1
 72         if cnt == 0: uimatrix[0, i] = 0
 73         else: uimatrix[0, i] = rating / cnt
 74     print('calAverageRating ending...')
 75 
 76 def calPerson(l1, l2, rating1, rating2):
 77     print('calPerson startting...')
 78     r1 = 0.0; r2 = 0.0; r3 = 0.0;
 79     for i in range(len(l1)):
 80         r1 += (l1[i]-rating1)*(l2[i]-rating2)
 81         r2 += (l1[i]-rating1)*(l1[i]-rating1)
 82         r3 += (l2[i]-rating2)*(l2[i]-rating2)
 83     r = r1 / (sqrt(r2)*sqrt(r3))
 84     print('calPerson ending...')
 85     return abs(r)
 86     
 87 def rSort(r_list, index_list):
 88     print('rSort startting...')
 89     for i in range(len(r_list)-1):
 90         for j in range(len(r_list)-1-i):
 91             if r_list[j] < r_list[j+1]:
 92                 tmp = r_list[j]
 93                 r_list[j] = r_list[j+1]
 94                 r_list[j+1] = tmp
 95                 tmp = index_list[j]
 96                 index_list[j] = index_list[j+1]
 97                 index_list[j+1] = tmp
 98     for i in range(len(r_list)):
 99         print(i, ':', r_list[i])
100     print('rSort ending...')
101     
102 def calSim(uimatrix, index):
103     print('calSim startting...')
104     uimatrix = mat(uimatrix)
105     m, n = shape(uimatrix)
106     r_list = [];         # sim list
107     index_list = [];     # mapping sim and index
108     for i in range(1, m):
109         l1 = []; l2 = [];
110         if i == index: continue
111         for j in range(1, n):
112             if uimatrix[i, j] != 0 and uimatrix[index, j] != 0:
113                 l1.append(uimatrix[index, j]);
114                 l2.append(uimatrix[i, j])
115         if l1 != []:
116             rating1 = 0; rating2 = 0;
117             for j in range(len(l1)):
118                 rating1 += l1[j]
119             for j in range(len(l2)):
120                 rating2 += l2[j]
121             rating1 /= len(l1); rating2 /= len(l2);
122             r = calPerson(l1, l2, rating1, rating2)
123             if math.isnan(r) == True: r = 0.0
124             r_list.append(r)
125             index_list.append(i)
126     rSort(r_list, index_list)
127     print('calSim ending...')
128     return r_list, index_list
129 
130 def calRMSE(uimatrix, test_data, users):
131     print('calRMSE startting...')
132     test_data = mat(test_data)
133     m, n = shape(test_data)
134     tmp1 = 0
135     tmp2 = 0
136     for k in range(1, users+1):
137         for i in range(m):
138             if test_data[i, 0] == k:
139                 if uimatrix[k, test_data[i, 1]] == 0.0: 
140                     uimatrix[k, test_data[i, 1]] = uimatrix[k, 0]
141                 uimatrix[k, test_data[i, 1]] = round(uimatrix[k, test_data[i, 1]])
142                 tmp1 += (test_data[i, 2]-uimatrix[k, test_data[i, 1]])**2
143                 tmp2 += 1
144                 print(test_data[i, 1], ' real rating:', test_data[i, 2], ' predict:', uimatrix[k, test_data[i, 1]])
145     print('calRMSE ending...')
146     return sqrt(tmp1/tmp2)
147 
148 select_top = 30
149 users = 943
150 items = 1682
151 user_item_matrix = zeros((users+1, items+1))
152 train_path = 'C:\\Users\\think\\Desktop\\data\\u2.base'
153 test_path = 'C:\\Users\\think\\Desktop\\data\\u2.test'
154 
155 train_data = loadTrainData(train_path)
156 test_data = loadTestData(test_path)
157 
158 fillUIMatrix(user_item_matrix, train_data)
159 calAverageRating(user_item_matrix)
160 uimatrix = user_item_matrix
161 uimatrix = mat(uimatrix)
162 
163 for i in range(1,users):
164     r_list, index_list = calSim(uimatrix, i)
165     for j in range(1, items):
166         if uimatrix[i, j] == 0:
167             tmp1 = 0.0; tmp2 = 0.0;
168             for k in range(select_top):
169                 if math.isnan(r_list[k]) == False and uimatrix[index_list[k], j] != 0:
170                     tmp1 += r_list[k]*(uimatrix[index_list[k], j]-uimatrix[index_list[k], 0])
171                     tmp2 += r_list[k]
172             print(j, tmp1, tmp2)
173             if tmp2 == 0: uimatrix[i, j] = uimatrix[i, 0]
174             else: uimatrix[i, j] = uimatrix[i, 0] + tmp1/tmp2
175     
176 RMSE = calRMSE(uimatrix, test_data, users)
177 print(RMSE)
View Code

lfm1

  1 from numpy import *
  2 import csv
  3 import time
  4 
  5 def RMSE(estimation, truth):
  6     num = len(estimation)
  7     
  8     sse = sum(square(truth - estimation))
  9     return sqrt(divide(sse, num-1.0))
 10 
 11 class matrixFactorization():
 12     def __init__(self, num_user, num_item, num_feature, train_data, test_data, **params):
 13         self._num_user = num_user
 14         self._num_item = num_item
 15         self._num_featrue = num_feature
 16         self._train_data = train_data
 17         self._test_data = test_data
 18         
 19         self.batch_size = int(params.get('batch_size', 1000000))
 20         
 21         self.epsilon = float(params.get('epsilon', 100.0))
 22         self.lam = float(params.get('lam', 0.00001))
 23         
 24         self.max_rating = params.get('max_rating')
 25         self.min_rating = params.get('min_rating')
 26         
 27         if self.max_rating:
 28             self.max_rating = float(self.max_rating)
 29         if self.min_rating:
 30             self.min_rating = float(self.min_rating)
 31         
 32         self._mean_rating = mean(self._train_data[:, 2])
 33         
 34         self._user_feature = 0.3 * random.rand(num_user, num_feature)
 35         self._item_feature = 0.3 * random.rand(num_item, num_feature)
 36         
 37         self.train_errors = []
 38         self.test_errors = []
 39         
 40     def estimate(self, iterations = 50, converge = 1e-4):
 41         last_rmse = None
 42         for iteration in range(iterations):
 43             data = self._train_data
 44             #compute gradient
 45             u_features = (self._user_feature)[data[:, 0], :]
 46             i_features = (self._item_feature)[data[:, 1], :]
 47             ratings = data[:, 2] - self._mean_rating
 48             preds = sum(u_features*i_features, 1)
 49             errs = preds - ratings
 50             err_mat = tile(errs, (self._num_featrue, 1)).T
 51             
 52             u_grads = u_features * err_mat + self.lam * i_features
 53             i_grads = i_features * err_mat + self.lam * u_features
 54             
 55             u_feature_grads = zeros((self._num_user, self._num_featrue))
 56             i_feature_grads = zeros((self._num_item, self._num_featrue))
 57             
 58             for i in range(shape(data)[0]):
 59                 user = data[i, 0]
 60                 item = data[i, 1]
 61                 u_feature_grads[user, :] += u_grads[i, :]
 62                 i_feature_grads[item, :] += i_grads[i, :]
 63             
 64             self._user_feature = self._user_feature - (self.epsilon / self.batch_size) * u_feature_grads
 65             self._item_feature = self._item_feature - (self.epsilon / self.batch_size) * i_feature_grads
 66         
 67             train_preds = self.predict(self._train_data)
 68             train_rmse = RMSE(train_preds, float16(self._train_data[:, 2]))
 69             
 70             test_preds = self.predict(self._test_data)
 71             test_rmse = RMSE(test_preds, float16(self._test_data[:, 2]))
 72             
 73             self.train_errors.append(train_rmse)
 74             self.test_errors.append(test_rmse)
 75             
 76             print('iterations: %3d, train RMSE: %.6f, test RMSE: %.6f') % (iteration+1, train_rmse, test_rmse)
 77             
 78             if last_rmse:
 79                 if abs(train_rmse - last_rmse) < converge:
 80                     break
 81             last_rmse = train_rmse
 82     
 83     def predict(self, data):
 84         u_features = self._user_feature[data[:, 0], :]
 85         i_features = self._item_feature[data[:, 1], :]
 86         preds = sum(u_features*i_features, 1) + self._mean_rating
 87         
 88         if self.max_rating:
 89             preds[preds > self.max_rating] = self.max_rating
 90         if self.min_rating:
 91             preds[preds < self.min_rating] = self.min_rating
 92         return preds
 93 
 94 def toInt(arr):
 95     print('toInt() startting...')
 96     arr = mat(arr)
 97     m, n = shape(arr)
 98     nArr = zeros((m, n), dtype='int8')
 99     for i in range(m):
100         for j in range(n):
101             nArr[i, j] = int(arr[i, j])
102     print('toInt() ending...')
103     return nArr
104 
105 def loadTrainData(path):
106     print('loadTrainData startting...')
107     l = []
108     with open(path, 'r') as file:
109         lines = csv.reader(file)
110         for line in lines:
111             l.append(line)
112     l = array(l)
113     print('loadTrainData ending...')
114     return toInt(l)
115 
116 def loadTestData(path):
117     print('loadTestData startting...')
118     l = []
119     with open(path) as file:
120         lines = csv.reader(file)
121         for line in lines:
122             l.append(line)
123     l = array(l)
124     print('loadTestData ending...')
125     return toInt(l)
126 
127 train_path = 'C:\\Users\\think\\Desktop\\data\\u1.base'
128 test_path = 'C:\\Users\\think\\Desktop\\data\\u1.test'
129 
130 train_data = loadTrainData(train_path)
131 test_data = loadTestData(test_path)
132 num_feature = 15
133 max_iter = 20000
134 num_user = 943
135 num_item = 1682
136 rec = matrixFactorization(num_user, num_item, num_feature, train_data, test_data, max_rating=5, min_rating=1)
137 rec.estimate(max_iter)
View Code

lfm2

  1 from __future__ import division
  2 import numpy as np
  3 import scipy as sp
  4 from numpy import *
  5 from numpy.random import random
  6 import csv
  7 
  8 class  SVD_C:
  9     def __init__(self,X,k=20):
 10         '''
 11             k  is the length of vector
 12         '''
 13         self.X=np.array(X)
 14         self.k=k
 15         self.ave=np.mean(self.X[:,2])
 16         print "the input data size is ",self.X.shape
 17         self.bi={}
 18         self.bu={}
 19         self.qi={}
 20         self.pu={}
 21         self.movie_user={}
 22         self.user_movie={}
 23         for i in range(self.X.shape[0]):
 24             uid=self.X[i][0]
 25             mid=self.X[i][1]
 26             rat=self.X[i][2]
 27             self.movie_user.setdefault(mid,{})
 28             self.user_movie.setdefault(uid,{})
 29             self.movie_user[mid][uid]=rat
 30             self.user_movie[uid][mid]=rat
 31             self.bi.setdefault(mid,0)
 32             self.bu.setdefault(uid,0)
 33             self.qi.setdefault(mid,random((self.k,1))/10*(np.sqrt(self.k)))
 34             self.pu.setdefault(uid,random((self.k,1))/10*(np.sqrt(self.k)))
 35     def pred(self,uid,mid):
 36         self.bi.setdefault(mid,0)
 37         self.bu.setdefault(uid,0)
 38         self.qi.setdefault(mid,np.zeros((self.k,1)))
 39         self.pu.setdefault(uid,np.zeros((self.k,1)))
 40         if (self.qi[mid]==None):
 41             self.qi[mid]=np.zeros((self.k,1))
 42         if (self.pu[uid]==None):
 43             self.pu[uid]=np.zeros((self.k,1))
 44         ans=self.ave+self.bi[mid]+self.bu[uid]+np.sum(self.qi[mid]*self.pu[uid])
 45         if ans>5:
 46             return 5
 47         elif ans<1:
 48             return 1
 49         return ans
 50     def train(self,steps=50,gamma=0.04,Lambda=0.15):
 51         for step in range(steps):
 52             print 'the ',step,'-th  step is running'
 53             rmse_sum=0.0
 54             kk=np.random.permutation(self.X.shape[0])
 55             for j in range(self.X.shape[0]):
 56                 i=kk[j]
 57                 uid=self.X[i][0]
 58                 mid=self.X[i][1]
 59                 rat=self.X[i][2]
 60                 eui=rat-self.pred(uid,mid)
 61                 rmse_sum+=eui**2
 62                 self.bu[uid]+=gamma*(eui-Lambda*self.bu[uid])
 63                 self.bi[mid]+=gamma*(eui-Lambda*self.bi[mid])
 64                 temp=self.qi[mid]
 65                 self.qi[mid]+=gamma*(eui*self.pu[uid]-Lambda*self.qi[mid])
 66                 self.pu[uid]+=gamma*(eui*temp-Lambda*self.pu[uid])
 67             gamma=gamma*0.93
 68             print "the rmse of this step on train data is ",np.sqrt(rmse_sum/self.X.shape[0])
 69             #self.test(test_data)
 70     def test(self,test_X):
 71         output=[]
 72         sums=0
 73         test_X=np.array(test_X)
 74         #print "the test data size is ",test_X.shape
 75         for i in range(test_X.shape[0]):
 76             pre=self.pred(test_X[i][0],test_X[i][1])
 77             output.append(pre)
 78             #print pre,test_X[i][2]
 79             sums+=(pre-test_X[i][2])**2
 80         rmse=np.sqrt(sums/test_X.shape[0])
 81         print "the rmse on test data is ",rmse
 82         return output
 83 
 84     
 85 def toInt(arr):
 86     print('toInt() startting...')
 87     arr = mat(arr)
 88     m, n = shape(arr)
 89     nArr = zeros((m, n), dtype='int8')
 90     for i in range(m):
 91         for j in range(n):
 92             nArr[i, j] = int(arr[i, j])
 93     print('toInt() ending...')
 94     return nArr
 95 
 96 def loadTrainData(path):
 97     print('loadTrainData startting...')
 98     l = []
 99     with open(path, 'r') as file:
100         lines = csv.reader(file)
101         for line in lines:
102             l.append(line)
103     l = array(l)
104     print('loadTrainData ending...')
105     return toInt(l)
106 
107 def loadTestData(path):
108     print('loadTestData startting...')
109     l = []
110     with open(path) as file:
111         lines = csv.reader(file)
112         for line in lines:
113             l.append(line)
114     l = array(l)
115     print('loadTestData ending...')
116     return toInt(l)
117 
118 train_path = 'C:\\Users\\think\\Desktop\\data\\u1.base'
119 test_path = 'C:\\Users\\think\\Desktop\\data\\u1.test'
120 
121 train_data = loadTrainData(train_path)
122 test_data = loadTestData(test_path)
123 
124 a = SVD_C(train_data, 30)
125 a.train()
126 a.test(test_data)
View Code

 rbm //rmse有点高啊,1.1多,不知道哪儿的问题Orz

  1 #include <iostream>
  2 #include <string>
  3 #include <cstdlib>
  4 #include <cmath>
  5 #include <algorithm>
  6 #include <vector>
  7 #include <utility>
  8 #include <cstdio>
  9 #include <cstring>
 10 
 11 using namespace std;
 12 
 13 const int num_of_user = 943;
 14 const int num_of_movies = 1682;
 15 const int num_of_rating = 5;
 16 const int num_of_hidden = 200;
 17 const int num_of_visible = 1682;
 18 
 19 double uniform(double, double);
 20 int binomial(double);
 21 
 22 class RBM
 23 {
 24 public:
 25     int N;
 26     int n_visible;
 27     int n_hidden;
 28     int rating;
 29     double W[num_of_hidden][num_of_visible][num_of_rating];
 30     double hbias[num_of_hidden];
 31     double vbias[num_of_rating][num_of_visible];
 32 
 33     RBM(int, int, int, int);
 34     void contrastiveDivergence(int[][1682], double, int);
 35     void sample_h_given_v(int[][1682], double*, int*);
 36     double sigmoid(double);
 37     double Vtoh_sigm(int [][1682], double [][5], double);
 38     void gibbs_hvh(int*, double[][1682], int[][1682], double*, int*);
 39     double HtoV_sigm(int*, int, int, int);
 40     void sample_v_given_h(int* , double [][1682], int [][1682]);
 41     void reconstruct(int[][1682], double[][1682]);
 42 };
 43 
 44 void RBM::contrastiveDivergence(int train_data[][1682], double learning_rate, int k)
 45 {
 46     //train_data 5 * 1682
 47     double ph_sigm_out[num_of_hidden]; // 10
 48     int ph_sample[num_of_hidden]; // 10
 49     double nv_sigm_outs[num_of_rating][num_of_visible]; // 5 * 1682
 50     int nv_samples[num_of_rating][num_of_visible]; // 5 * 1692
 51     double nh_sigm_outs[num_of_hidden]; // 10
 52     int nh_samples[num_of_hidden]; // 10
 53 
 54     sample_h_given_v(train_data, ph_sigm_out, ph_sample);
 55 
 56     for (int i = 0; i < k; ++i)
 57     {
 58         if (i == 0)
 59             gibbs_hvh(ph_sample, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples);
 60         else
 61             gibbs_hvh(nh_samples, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples);
 62     }
 63 
 64     for (int i = 0; i < n_hidden; ++i)
 65     {
 66         for (int j = 0; j < n_visible; ++j)
 67         {
 68             for (int kk = 0; kk < rating; ++kk)
 69             {
 70                 W[i][j][kk] += learning_rate * (ph_sigm_out[i] * train_data[kk][j] - nh_sigm_outs[i] * nv_samples[kk][j]);
 71             }
 72         }
 73         hbias[i] += learning_rate * (ph_sigm_out[i] - nh_sigm_outs[i]) ;
 74     }
 75 
 76     for (int i = 0; i < rating; ++i)
 77     {
 78         for (int j = 0; j < n_visible; ++j)
 79         {
 80             vbias[i][j] += learning_rate * (train_data[i][j] - nv_samples[i][j]) ;
 81         }
 82     }
 83 }
 84 
 85 void RBM::gibbs_hvh(int* ph_sample, double nv_sigm_outs[][1682], int nv_samples[][1682], double* nh_sigm_outs, int* nh_samples)
 86 {
 87     sample_v_given_h(ph_sample, nv_sigm_outs, nv_samples);
 88     sample_h_given_v(nv_samples, nh_sigm_outs, nh_samples);
 89 }
 90 
 91 void RBM::sample_h_given_v(int train_data[][1682], double* ph_sigm_out, int* ph_sample)
 92 {
 93     for (int i = 0; i < n_hidden; ++i)
 94     {
 95         ph_sigm_out[i] = Vtoh_sigm(train_data, W[i], hbias[i]);
 96         ph_sample[i] = binomial(ph_sigm_out[i]);
 97     }
 98 }
 99 
100 void RBM::sample_v_given_h(int* h0_sample, double nv_sigm_outs[][1682], int nv_samples[][1682])
101 {
102     for (int i = 0; i < rating; ++i)
103     {
104         for (int j = 0; j < n_visible; ++j)
105         {
106             nv_sigm_outs[i][j] = HtoV_sigm(h0_sample, j, vbias[i][j], i);
107             nv_samples[i][j] = binomial(nv_sigm_outs[i][j]);
108         }
109     }
110 }
111 
112 double RBM::HtoV_sigm(int* h0_sample, int i, int vbias, int kk)
113 {
114     double temp = 0;
115     for (int j = 0; j < n_hidden; ++j)
116     {
117         temp += W[j][i][kk] * h0_sample[j];
118     }
119     temp += vbias;
120     return sigmoid(temp);
121 }
122 
123 double RBM::Vtoh_sigm(int train_data[][1682], double W[][5], double hbias)
124 {
125     double temp = 0.0;
126     for (int i = 0; i < rating; ++i)
127     {
128         for (int j = 0; j < n_visible; ++j)
129             temp += W[j][i] * train_data[i][j];
130     }
131     temp += hbias;
132     return sigmoid(temp);
133 }
134 
135 double RBM::sigmoid(double x)
136 {
137     return 1.0 / (1.0 + exp(-x));
138 }
139 
140 RBM::RBM(int train_N, int n_v, int n_h, int rt)
141 {
142     N = train_N;
143     n_visible = num_of_visible;
144     n_hidden = num_of_hidden;
145     rating = num_of_rating;
146 
147     double a = 1.0 / n_visible;
148     for (int i = 0; i < n_hidden; ++i)
149         for (int j = 0; j < n_visible; ++j)
150             for (int k = 0; k < rating; ++k)
151                 W[i][j][k] = uniform(-a, a);
152 
153 
154     for (int i = 0; i < n_hidden; ++i)
155         hbias[i] = 0.0;
156 
157     for (int i = 0; i < rating; ++i)
158         for (int j = 0; j < n_visible; ++j)
159             vbias[i][j] = 0.0;
160 }
161 
162 void RBM::reconstruct(int test_data[][1682], double reconstruct_data[][1682])
163 {
164     double h[num_of_hidden];
165     double temp = 0;
166 
167     for (int i = 0; i < n_hidden; ++i)
168     {
169         h[i] = Vtoh_sigm(test_data, W[i], hbias[i]);
170     }
171 
172     for (int i = 0; i < rating; ++i)
173     {
174         for (int j = 0; j < n_visible; ++j)
175         {
176             temp = 0;
177             for (int kk = 0; kk < n_hidden; ++kk)
178             {
179                 temp += W[kk][j][i] * h[kk];
180             }
181             temp += vbias[i][j];
182             reconstruct_data[i][j] = sigmoid(temp);
183         }
184     }
185 }
186 
187 double uniform(double min, double max)
188 {
189     return rand() / (RAND_MAX + 1.0) * (max - min) + min;
190 }
191 
192 int binomial(double p)
193 {
194     if (p < 0 || p > 1) return 0;
195     double r = rand() / (RAND_MAX + 1.0);
196     if (r < p) return 1;
197     else return 0;
198 }
199 
200 double make_predict(RBM rbm, int train_data[][1682], int u, vector<pair<int, int> >& v)
201 {
202     double hidden[num_of_hidden];
203     for (int i = 0; i < num_of_hidden; ++i)
204     {
205         double temp = 0.0;
206         for (int j = 0; j < num_of_rating; ++j)
207         {
208             for (int kk = 0; kk < num_of_movies; ++kk)
209             {
210                 temp += train_data[j][kk] * rbm.W[i][kk][j];
211             }
212         }
213         temp += rbm.hbias[i];
214         hidden[i] = rbm.sigmoid(temp);
215     }
216     int size = v.size();
217     double ret = 0;
218     for (int i = 0; i < size; ++i)
219     {
220         double vp[num_of_rating];
221         int item = v[i].first;
222         int real_rating = v[i].second;
223 
224         for (int j = 0; j < num_of_rating; ++j)
225         {
226             double temp = 0;
227             for (int kk = 0; kk < num_of_hidden; ++kk)
228             {
229                 temp += hidden[kk]*rbm.W[kk][item][j];
230             }
231             temp += rbm.vbias[j][item];
232             temp = exp(temp);
233             vp[j] = temp;
234         }
235         double mx = 0, mxi = 0;
236         for (int j = 0; j < num_of_rating; ++j)
237         {
238             if (vp[j] > mx) mx = vp[j], mxi = j;
239         }
240         ret += (mxi - real_rating) * (mxi - real_rating);
241     }
242     return ret;
243 }
244 
245 void get_train_data(int train_data[][5][1682])
246 {
247     FILE *fp;
248     freopen("E:\\DL\\MovieLens\\ml-100k\\u1.base", "r", stdin);
249     int u, m, r;
250     long long t;
251     printf("a\n");
252     long long int cnt = 0;
253     while (~scanf("%d %d %d %lld", &u, &m, &r, &t))
254     {
255         u--, m--, r--;
256         train_data[u][r][m] = 1;
257     }
258     fclose(stdin);
259 }
260 
261 void get_test_data(vector<pair<int, int> > td[])
262 {
263     FILE* fp;
264     freopen("E:\\DL\\MovieLens\\ml-100k\\u1.test", "r", stdin);
265     int u, m, r;
266     long long t;
267     while (~scanf("%d %d %d %lld", &u, &m, &r, &t))
268     {
269         u--, m--, r--;
270         td[u].push_back(make_pair(m, r));
271     }
272     fclose(stdin);
273 }
274 
275 void train()
276 {
277     srand(0);
278     int train_N = 100;
279     int n_visible = num_of_visible;
280     int n_hidden = num_of_hidden;
281     int rating = num_of_rating;
282     int train_iter = 1000;
283     double learning_rate = 0.0001;
284     int training_num = 1000;
285     int k = 1;
286     int train_data[943][5][1682];
287     memset(train_data, 0, sizeof(train_data));
288     get_train_data(train_data);
289 
290     double hbias[num_of_user][num_of_hidden];
291     memset(hbias, 0, sizeof(hbias));
292 
293     vector<pair<int, int> > test_data[num_of_user];
294     get_test_data(test_data);
295 
296 
297     RBM rbm = RBM(train_N, n_visible, n_hidden, rating);
298 
299     for (int iter = 0; iter < train_iter; ++iter)
300     {
301         for (int i = 0; i < num_of_user; ++i)
302         {
303             rbm.contrastiveDivergence(train_data[i], learning_rate, 1);
304         }
305         int cnt = 0;
306         double error = 0;
307         for (int i = 0; i < num_of_user; ++i)
308         {
309             error += make_predict(rbm, train_data[i], i, test_data[i]);
310             cnt += test_data[i].size();
311         }
312         double rmse = sqrt(error / cnt);
313         printf("epoch: %d, rmse: %f\n",iter, rmse);
314         learning_rate *= 0.9;
315     }
316 
317     for (int i = 0; i < num_of_hidden; ++i)
318         printf("%lf ", rbm.hbias[i]);
319     printf("-----------------------------");
320 
321     int cnt = 0;
322     double error = 0;
323     for (int i = 0; i < num_of_user; ++i)
324     {
325         error += make_predict(rbm, train_data[i], i, test_data[i]);
326         cnt += test_data[i].size();
327     }
328     double rmse = sqrt(error / cnt);
329     printf("rmse: %f\n", rmse);
330 
331 }
332 
333 // 943 users
334 // 1682 items
335 // 100000 ratings
336 
337 int main()
338 {
339     train();
340 
341     return 0;
342 }
View Code

 

posted on 2016-05-12 19:09  JustForCS  阅读(318)  评论(0编辑  收藏  举报

导航