Reducing the Dimensionality of data with neural networks / A fast learing algorithm for deep belief net

Deeplearning原文作者Hinton代码注解

  1 Matlab示例代码为两部分,分别对应不同的论文:
  2 
  3 1. Reducing the Dimensionality of data with neural networks 
  4 
  5   ministdeepauto.m   backprop.m   rbmhidlinear.m
  6 
  7 2. A fast learing algorithm for deep belief net
  8 
  9   mnistclassify.m   backpropclassfy.m  
 10 
 11  其余部分代码通用。
 12 
 13 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 14 mnistclassify.m
 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 16 
 17 clear all
 18 close all
 19 
 20 maxepoch=50; %迭代次数
 21 numhid=500; numpen=500; numpen2=2000;
 22 
 23 fprintf(1,'Converting Raw files into Matlab format \n');
 24 converter;
 25 
 26 fprintf(1,'Pretraining a deep autoencoder. \n');
 27 fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);
 28 
 29 makebatches;%分批数据 
 30 [numcases numdims numbatches]=size(batchdata); %获取batchdata数据大小
 31 %%numcases 每批数据的个数
 32 %%numdims 数据元组的维度
 33 %%numbtches 数据批数
 34 
 35 fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid);%图像输入层到第一个隐藏层
 36 restart=1; %设置初始化参数
 37 rbm; %调用RBM训练数据 
 38 hidrecbiases=hidbiases; %获取隐藏层偏置值
 39 save mnistvhclassify vishid hidrecbiases visbiases; %
 40 
 41 fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen);%第一个隐藏层到第二个隐藏层
 42 batchdata=batchposhidprobs; %上一个RBM的隐藏层输出,读入作为这个RBM的输入
 43 numhid=numpen;%设置隐藏层的节点数,输入的节点数已经由读入数据给出
 44 restart=1;
 45 rbm;
 46 hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases; %同上,提取权值,偏置,
 47 save mnisthpclassify hidpen penrecbiases hidgenbiases;
 48 
 49 fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2);%第二个隐藏层到第三层隐藏层,其余同上
 50 batchdata=batchposhidprobs;
 51 numhid=numpen2;
 52 restart=1;
 53 rbm;
 54 hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
 55 save mnisthp2classify hidpen2 penrecbiases2 hidgenbiases2;
 56 
 57 backpropclassify;
 58 
 59  
 60 
 61 
 62 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 63 backpropclassify.m
 64 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 65 maxepoch=200;
 66 fprintf(1,'\nTraining discriminative model on MNIST by minimizing cross entropy error. \n');%最小化交叉熵
 67 fprintf(1,'60 batches of 1000 cases each. \n');
 68 
 69 load mnistvhclassify%加载各层之间的权值,以及偏置
 70 load mnisthpclassify
 71 load mnisthp2classify
 72 
 73 makebatches;%分批数据
 74 [numcases numdims numbatches]=size(batchdata);
 75 N=numcases; %获取每批数据向量数
 76 
 77 %%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 78 
 79 w1=[vishid; hidrecbiases];%第一层到第二层的权重,以及第二层的偏置
 80 w2=[hidpen; penrecbiases];%类上
 81 w3=[hidpen2; penrecbiases2];%类上
 82 w_class = 0.1*randn(size(w3,2)+1,10);%随机生成第四层列数+1行,10列的矩阵
 83 %%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 84 
 85 l1=size(w1,1)-1;%获取每层的单元个数
 86 l2=size(w2,1)-1;
 87 l3=size(w3,1)-1;
 88 l4=size(w_class,1)-1;%最高层的单元个数
 89 l5=10; %label层单元个数
 90 test_err=[];%
 91 train_err=[];%
 92 
 93 
 94 for epoch = 1:maxepoch
 95 
 96 %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 97 err=0; 
 98 err_cr=0;
 99 counter=0;
100 [numcases numdims numbatches]=size(batchdata);
101 %%numcases 每批数据的个数
102 %%numdims 数据元组的维度
103 %%numbtches 数据批数
104 N=numcases;%%每批次数据向量个数
105 for batch = 1:numbatches
106 data = [batchdata(:,:,batch)];%读取一批次数据
107 target = [batchtargets(:,:,batch)];%读取当前批次的目标值
108 data = [data ones(N,1)];%在原数据后添加N行1列数据
109 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%sigmod计算各层的概率值,参见BP算法
110 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
111 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
112 
113 targetout = exp(w3probs*w_class);%计算最后的输出值N行10列
114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115 %对最后的label的输出处理过程,见公式6.1,其中w3probs*w_class是label的输入
116 %最后只能有一个单元被激活,激活单元的选择即通过下面计算得出的概率来进行选择
117 %10个单元组成的“softmax”组
118 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
119 targetout = targetout./repmat(sum(targetout,2),1,10);%计算最后10个label输出除以输出值的总和
120 
121 [I J]=max(targetout,[],2);%取计算结果每行中的最大值,以及其列标
122 [I1 J1]=max(target,[],2);%取原先设定目标值的最大值以及列标
123 counter=counter+length(find(J==J1));%统计正确的条数
124 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ; %%%%????
125 end
126 train_err(epoch)=(numcases*numbatches-counter);%总的错误条数???
127 train_crerr(epoch)=err_cr/numbatches;%平均每批次错误率???
128 
129 %%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
130 
131 %%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
132 err=0;
133 err_cr=0;
134 counter=0;
135 [testnumcases testnumdims testnumbatches]=size(testbatchdata);
136 
137 N=testnumcases;
138 for batch = 1:testnumbatches
139 data = [testbatchdata(:,:,batch)];
140 target = [testbatchtargets(:,:,batch)];
141 data = [data ones(N,1)];
142 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];
143 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
144 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
145 targetout = exp(w3probs*w_class);
146 targetout = targetout./repmat(sum(targetout,2),1,10);
147 
148 [I J]=max(targetout,[],2);
149 [I1 J1]=max(target,[],2);
150 counter=counter+length(find(J==J1));
151 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
152 end
153 test_err(epoch)=(testnumcases*testnumbatches-counter);
154 test_crerr(epoch)=err_cr/testnumbatches;
155 fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d) \t \t \n',...
156 epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);
157 
158 %%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
159 
160 tt=0; 
161 for batch = 1:numbatches/10
162 fprintf(1,'epoch %d batch %d\r',epoch,batch);
163 
164 %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
165 %组合10个小批次为1000样例的批次,然后用conjugate gradient来进行微调
166 tt=tt+1; 
167 data=[];
168 targets=[]; 
169 for kk=1:10
170 data=[data 
171 batchdata(:,:,(tt-1)*10+kk)]; %10个小批次合成
172 targets=[targets
173 batchtargets(:,:,(tt-1)*10+kk)];
174 end
175 
176 %%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
177 max_iter=3; %设置线性搜索的次数
178 
179 if epoch<6 % First update top-level weights holding other weights fixed. 
180 N = size(data,1); %获取数据的行数 
181 XX = [data ones(N,1)]; %每行数据后面增加1,用来增加偏置
182 w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)];
183 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
184 w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs ones(N,1)];
185 
186 VV = [w_class(:)']'; %VV将随机生成的向量w_class展开成一列???为什么展开成一列与minimize的参数有关
187 %
188 Dim = [l4; l5]; %记录最后两层的单元节点数,即2000的隐藏层和10的label层
189 [X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets);%只训练两层 %%%详细见函数定义
190 %minimize is Cari Rasmusssen's "minimize" code
191 %%------------------参数含义------------------%%
192 %VV 随机权重向量的展开 ,其作为输入参数,列必须为1(D by 1) 
193 %X 函数f="CG_CLASSIFY_INIT"的最优化参数
194 %fX 函数f对X的偏导
195 %max_iter 如果为正,表示线性搜索次数,为负,函数的最大值个数
196 %%-------------------------------------------------%
197 w_class = reshape(X,l4+1,l5);%恢复权值矩阵结构
198 
199 else %进入整体微调过程
200 VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; %将所有权值按列展开成一列
201 Dim = [l1; l2; l3; l4; l5]; %记录各层单元个数传入
202 [X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets);
203 
204 w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %恢复W1权值1.0
205 xxx = (l1+1)*l2; %临时变量,用于恢复权值单元
206 w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
207 xxx = xxx+(l2+1)*l3;
208 w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
209 xxx = xxx+(l3+1)*l4;
210 w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
211 
212 end
213 %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
214 
215 end
216 
217 save mnistclassify_weights w1 w2 w3 w_class
218 save mnistclassify_error test_err test_crerr train_err train_crerr;
219 
220 end
221 
222  
223 
224 
225 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
226 rbm.m
227 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\
228 epsilonw = 0.1; % Learning rate for weights 
229 epsilonvb = 0.1; % Learning rate for biases of visible units 
230 epsilonhb = 0.1; % Learning rate for biases of hidden units 
231 weightcost = 0.0002; 
232 initialmomentum = 0.5;
233 finalmomentum = 0.9;
234 
235 [numcases numdims numbatches]=size(batchdata);
236 %%numcases 每批数据的个数
237 %%numdims 数据元组的维度
238 %%numbtches 数据批数
239 
240 if restart ==1,
241 restart=0;
242 epoch=1;
243 
244 % Initializing symmetric weights and biases. 初始化对称权值和偏置
245 vishid = 0.1*randn(numdims, numhid); %初始化生成可视层到隐藏层的权值
246 hidbiases = zeros(1,numhid);%隐藏单元的偏置值
247 visbiases = zeros(1,numdims);%可见单元的偏置值
248 
249 poshidprobs = zeros(numcases,numhid); %正向的隐藏单元概率生成
250 neghidprobs = zeros(numcases,numhid);%反向的隐藏单元概率生成
251 posprods = zeros(numdims,numhid);%正向可见单元概率生成
252 negprods = zeros(numdims,numhid);%反向可见单元概率生成
253 vishidinc = zeros(numdims,numhid);%%%%%可视单元和隐藏单元之间的权值增量
254 hidbiasinc = zeros(1,numhid);%%隐藏单元的偏置增量
255 visbiasinc = zeros(1,numdims);%%可视单元的偏置增量
256 batchposhidprobs=zeros(numcases,numhid,numbatches);%存储每次迭代计算好的每层的隐藏层概率,作为下一个RBM的输入
257 end
258 
259 %%%%%%%%%%%%%%%%简单输出 迭代次数 处理的批次%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
260 for epoch = epoch:maxepoch, %迭代处理
261 fprintf(1,'epoch %d\r',epoch); 
262 errsum=0; %初始化输出错误为0
263 for batch = 1:numbatches, %每次处理一批次的数据
264 fprintf(1,'epoch %d batch %d\r',epoch,batch);
265 
266 %%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
267 data = batchdata(:,:,batch); %读取当前批次的全部数据vi
268 poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %计算前向传播的隐藏层概率hi
269 batchposhidprobs(:,:,batch)=poshidprobs;%将计算好的概率赋值给当前批次前向传播的隐藏层最后一次计算好的值作为下一层的输入
270 posprods = data' * poshidprobs;%contrastive divergence过程<vi,hi>
271 
272 poshidact = sum(poshidprobs);%average-wise隐藏层激活概率值
273 posvisact = sum(data);%average-wise可视层激活概率值
274 
275 %%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
276 poshidstates = poshidprobs > rand(numcases,numhid);%gibbs抽样,设定状态
277 
278 %%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
279 negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));%根据hi计算vi+1
280 neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); %根据vi+1计算hi+1
281 negprods = negdata'*neghidprobs;%contrastive divergence <vi+1,hi+1>
282 
283 neghidact = sum(neghidprobs);
284 negvisact = sum(negdata);
285 
286 %%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287 err= sum(sum( (data-negdata).^2 )); %重新构建数据的方差
288 errsum = err + errsum;%整体方差
289 
290 if epoch>5, %迭代次数不同调整冲量
291 momentum=finalmomentum;
292 else
293 momentum=initialmomentum;
294 end;
295 
296 %%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
297 vishidinc = momentum*vishidinc + ...
298 epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);%权重增量计算
299 visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);%偏置增量计算
300 hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);%隐藏层增量计算
301 
302 vishid = vishid + vishidinc;
303 visbiases = visbiases + visbiasinc;
304 hidbiases = hidbiases + hidbiasinc;
305 
306 %%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
307 
308 end
309 fprintf(1, 'epoch %4i error %6.1f \n', epoch, errsum); 
310 end;
311 
312  
313 
314 
315 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
316 CG_CLASSIFY_INIT.M
317 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\
318 function [f, df] = CG_CLASSIFY_INIT(VV,Dim,w3probs,target);%CG对最上面两层的训练
319 l1 = Dim(1);
320 l2 = Dim(2);
321 N = size(w3probs,1); 
322 % Do decomversion.
323 w_class = reshape(VV,l1+1,l2); %恢复权重,
324 w3probs = [w3probs ones(N,1)]; %一列,偏置
325 
326 targetout = exp(w3probs*w_class); %计算label层的输出结果为numbercase*lablesnumber的矩阵
327 targetout = targetout./repmat(sum(targetout,2),1,10); %选择最后的激活单元,见backpropclassify.m 的76行
328 f = -sum(sum( target(:,1:end).*log(targetout))) ; %交叉熵 只采用了前边部分
329 
330 IO = (targetout-target(:,1:end)); % 输入和输出结果之间的差值
331 Ix_class=IO; %
332 dw_class = w3probs'*Ix_class;%导数F(x)((1-F(x))乘以输出结果的偏差..其中F为sigmoid函数
333 
334 df = [dw_class(:)']';
335 
336  
337 
338  
339 
340 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
341 CG_CLASSIFY.M
342 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
343 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
344 % 该段代码对所有权重进行整体微调
345 % 各部分过程见 CG_CLASSIFY_INIT.m注解
346 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
347 function [f, df] = CG_CLASSIFY(VV,Dim,XX,target);
348 
349 
350 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
351 rbmhidlinear.m
352 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
353 %除了最后计算单元值采用的是线性单元其余过程全部一样
354 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
355 
356 复制代码

 

posted @ 2015-07-03 17:45  菜鸡一枚  阅读(704)  评论(0编辑  收藏  举报