deep learning:DBN源码 ----- C++

其代码原型来自于yusugomori,DBN其主要就是RBM的堆叠,在代码实现里需要区分好第几个RBM,其每个RBM的隐层是下一个RBM的输入层。以及微调的原理就是根据已有的标签数据和最后一个隐含层生成的输出层进行调整。初学小娃。

DBN.h

 1 class DBN {
 2 
 3 public:
 4     int N;
 5     int n_ins;
 6     int *hidden_layer_sizes;
 7     int n_outs;
 8     int n_layers;
 9     HiddenLayer **sigmoid_layers;
10     RBM **rbm_layers;
11     LogisticRegression *log_layer;
12     DBN(int, int, int*, int, int);
13     ~DBN();
14     void pretrain(int*, double, int, int);
15     void finetune(int*, int*, double, int);
16     void predict(int*, double*);
17 };

HiddenLayer.h

 1 class HiddenLayer {
 2 
 3 public:
 4     int N;
 5     int n_in;
 6     int n_out;
 7     double **W;
 8     double *b;
 9     HiddenLayer(int, int, int, double**, double*);
10     ~HiddenLayer();
11     double output(int*, double*, double);
12     void sample_h_given_v(int*, int*);
13 };

LogisticRegression.h

 1 class LogisticRegression {
 2 
 3 public:
 4     int N;
 5     int n_in;
 6     int n_out;
 7     double **W;
 8     double *b;
 9     LogisticRegression(int, int, int);
10     ~LogisticRegression();
11     void train(int*, int*, double);
12     void softmax(double*);
13     void predict(int*, double*);
14 };

RBM.h

 1 class RBM {
 2 
 3 public:
 4     int N;
 5     int n_visible;
 6     int n_hidden;
 7     double **W;
 8     double *hbias;
 9     double *vbias;
10     RBM(int, int, int, double**, double*, double*);
11     ~RBM();
12     void contrastive_divergence(int*, double, int);
13     void sample_h_given_v(int*, double*, int*);
14     void sample_v_given_h(int*, double*, int*);
15     double propup(int*, double*, double);
16     double propdown(int*, int, double);
17     void gibbs_hvh(int*, double*, int*, double*, int*);
18     void reconstruct(int*, double*);
19 };

DBN.cpp

  1 #include <iostream>
  2 #include <cmath>
  3 #include "HiddenLayer.h"
  4 #include <stdlib.h>
  5 #include "RBM.h"
  6 #include "LogisticRegression.h"
  7 #include "DBN.h"
  8 using namespace std;
  9 
 10 
 11 double uniform(double min, double max)
 12 {
 13     return rand() / (RAND_MAX + 1.0) * (max - min) + min;
 14 }
 15 
 16 int binomial(int n, double p)
 17 {
 18     if(p < 0 || p > 1) return 0;
 19 
 20     int c = 0;
 21     double r;
 22 
 23     for(int i=0; i<n; i++) {
 24         r = rand() / (RAND_MAX + 1.0);
 25         if (r < p) c++;
 26     }
 27 
 28     return c;
 29 }
 30 
 31 double sigmoid(double x)
 32 {
 33     return 1.0 / (1.0 + exp(-x));
 34 }
 35 
 36 
 37 // DBN
 38 //各种初始化
 39 DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l)
 40 {
 41     int input_size;
 42 
 43     N = size;
 44     n_ins = n_i;
 45     hidden_layer_sizes = hls;
 46     n_outs = n_o;
 47     n_layers = n_l;
 48 
 49     sigmoid_layers = new HiddenLayer*[n_layers];
 50     rbm_layers = new RBM*[n_layers];
 51 
 52   //初始化为多少层,在本代码是有2个RBM组成,所以是2个
 53     for(int i=0; i<n_layers; i++)
 54     {
 55         if(i == 0)
 56         {
 57         input_size = n_ins;//第一层就是原始输入层
 58         }
 59         else
 60         {
 61             input_size = hidden_layer_sizes[i-1];//第二层为第一个隐含层作为下一层的输入层
 62         }
 63 
 64     //构造隐含层
 65     sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL);
 66 
 67     //构造RBM层
 68     rbm_layers[i] = new RBM(N, input_size, hidden_layer_sizes[i],sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);
 69     }
 70      //根据上面的构造可以发现构造出的是2层RBM结构
 71     //初始化logistic,很显然起输入层是最后一层隐含层,输出层是整个模型的输出层
 72     log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);
 73 }
 74 
 75 DBN::~DBN()
 76 {
 77     delete log_layer;
 78 
 79     for(int i=0; i<n_layers; i++)
 80     {
 81         delete sigmoid_layers[i];
 82         delete rbm_layers[i];
 83     }
 84     delete[] sigmoid_layers;
 85     delete[] rbm_layers;
 86 }
 87 
 88 //数据处理,注意是两层RBM,所以需要分开处理,另外要得到最后一层隐藏层。
 89 void DBN::pretrain(int *input, double lr, int k, int epochs)
 90 {
 91     int *layer_input;
 92     int prev_layer_input_size;
 93     int *prev_layer_input;
 94 
 95     int *train_X = new int[n_ins];
 96 
 97     for(int i=0; i<n_layers; i++)
 98     {
 99         for(int epoch=0; epoch<epochs; epoch++)
100         {
101             for(int n=0; n<N; n++)
102             {
103                 for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
104                 for(int l=0; l<=i; l++)
105                 {
106                     if(l == 0)
107                     {
108                         layer_input = new int[n_ins];
109                         for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
110                     }
111                     else
112                     {
113                         if(l == 1) prev_layer_input_size = n_ins;
114                         else prev_layer_input_size = hidden_layer_sizes[l-2];
115                         prev_layer_input = new int[prev_layer_input_size];
116                         for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
117                         delete[] layer_input;
118 
119                         layer_input = new int[hidden_layer_sizes[l-1]];
120                         sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
121                         delete[] prev_layer_input;
122                     }
123                 }
124                 rbm_layers[i]->contrastive_divergence(layer_input, lr, k);
125             }
126         }
127     }
128     delete[] train_X;
129     delete[] layer_input;
130 }
131 //微调就是根据标签数据的标签来进行微调,过程和LR类似。
132 void DBN::finetune(int *input, int *label, double lr, int epochs)
133 {
134     int *layer_input;
135     int *prev_layer_input;
136 
137     int *train_X = new int[n_ins];
138     int *train_Y = new int[n_outs];
139 
140     for(int epoch=0; epoch<epochs; epoch++)
141     {
142         for(int n=0; n<N; n++)
143         {
144             for(int m=0; m<n_ins; m++)  train_X[m] = input[n * n_ins + m];
145             for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];
146             for(int i=0; i<n_layers; i++)
147             {
148                 if(i == 0)
149                 {
150                     prev_layer_input = new int[n_ins];
151                     for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
152                 }
153                 else
154                 {
155                     prev_layer_input = new int[hidden_layer_sizes[i-1]];
156                     for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
157                     delete[] layer_input;
158                 }
159                 layer_input = new int[hidden_layer_sizes[i]];
160                 sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
161                 delete[] prev_layer_input;
162             }
163         log_layer->train(layer_input, train_Y, lr);
164         }
165     }
166     delete[] layer_input;
167     delete[] train_X;
168     delete[] train_Y;
169 }
170 
171 void DBN::predict(int *x, double *y)
172 {
173     double *layer_input;
174     double *prev_layer_input;
175 
176     double linear_output;
177     prev_layer_input = new double[n_ins];
178     for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
179     for(int i=0; i<n_layers; i++)
180     {
181         layer_input = new double[sigmoid_layers[i]->n_out];
182         for(int k=0; k<sigmoid_layers[i]->n_out; k++)
183         {
184             linear_output = 0.0;
185             for(int j=0; j<sigmoid_layers[i]->n_in; j++)
186             {
187                 linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
188             }
189             linear_output += sigmoid_layers[i]->b[k];
190             layer_input[k] = sigmoid(linear_output);
191         }
192         delete[] prev_layer_input;
193 
194         if(i < n_layers-1)
195         {
196             prev_layer_input = new double[sigmoid_layers[i]->n_out];
197             for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
198             delete[] layer_input;
199         }
200     }
201 
202     for(int i=0; i<log_layer->n_out; i++)
203     {
204         y[i] = 0;
205         for(int j=0; j<log_layer->n_in; j++)
206         {
207             y[i] += log_layer->W[i][j] * layer_input[j];
208         }
209         y[i] += log_layer->b[i];
210     }
211     log_layer->softmax(y);
212     delete[] layer_input;
213 }
214 
215 
216 // HiddenLayer
217 HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp)
218 {
219     N = size;
220     n_in = in;
221     n_out = out;
222 
223     if(w == NULL)
224     {
225         W = new double*[n_out];
226         for(int i=0; i<n_out; i++) W[i] = new double[n_in];
227         double a = 1.0 / n_in;
228 
229         for(int i=0; i<n_out; i++)
230         {
231             for(int j=0; j<n_in; j++)
232             {
233                 W[i][j] = uniform(-a, a);
234             }
235         }
236     }
237     else
238     {
239         W = w;
240     }
241 
242     if(bp == NULL)
243     {
244         b = new double[n_out];
245     }
246     else
247     {
248         b = bp;
249     }
250 }
251 
252 HiddenLayer::~HiddenLayer()
253 {
254     for(int i=0; i<n_out; i++) delete W[i];
255     delete[] W;
256     delete[] b;
257 }
258 
259 double HiddenLayer::output(int *input, double *w, double b)
260 {
261     double linear_output = 0.0;
262     for(int j=0; j<n_in; j++)
263     {
264         linear_output += w[j] * input[j];
265     }
266     linear_output += b;
267     return sigmoid(linear_output);
268 }
269 
270 void HiddenLayer::sample_h_given_v(int *input, int *sample)
271 {
272     for(int i=0; i<n_out; i++)
273     {
274         sample[i] = binomial(1, output(input, W[i], b[i]));
275     }
276 }
277 
278 
279 // RBM
280 RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb)
281 {
282     N = size;
283     n_visible = n_v;
284     n_hidden = n_h;
285 
286     if(w == NULL)
287     {
288         W = new double*[n_hidden];
289         for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
290         double a = 1.0 / n_visible;
291 
292         for(int i=0; i<n_hidden; i++)
293         {
294             for(int j=0; j<n_visible; j++)
295             {
296                 W[i][j] = uniform(-a, a);
297             }
298         }
299     }
300     else
301     {
302         W = w;
303     }
304 
305     if(hb == NULL)
306     {
307         hbias = new double[n_hidden];
308         for(int i=0; i<n_hidden; i++) hbias[i] = 0;
309     }
310     else
311     {
312         hbias = hb;
313     }
314 
315     if(vb == NULL)
316     {
317         vbias = new double[n_visible];
318         for(int i=0; i<n_visible; i++) vbias[i] = 0;
319     }
320     else
321     {
322         vbias = vb;
323     }
324 }
325 
326 RBM::~RBM()
327 {
328     delete[] vbias;
329 }
330 
331 
332 void RBM::contrastive_divergence(int *input, double lr, int k)
333 {
334     double *ph_mean = new double[n_hidden];
335     int *ph_sample = new int[n_hidden];
336     double *nv_means = new double[n_visible];
337     int *nv_samples = new int[n_visible];
338     double *nh_means = new double[n_hidden];
339     int *nh_samples = new int[n_hidden];
340 
341   /* CD-k */
342     sample_h_given_v(input, ph_mean, ph_sample);//获得h0
343 
344     for(int step=0; step<k; step++)
345     {
346         if(step == 0)
347         {
348             gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);//获得V1,h1
349         }
350         else
351         {
352             gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
353         }
354     }
355 
356     //更新权值,双向偏移量。由于hinton提出的CD-K,可以知道其v0代表的是原始数据x
357     //h0即ph_sigm_out,h0近似等于对v0下h的概率
358     //v1即代表的是经过一次转换后的x,近似等于对h0下v的概率。
359     //h1同理。CD-K主要就是求出这个三个数据,便能够很好的近似计算梯度。至于为什么我也不知道。
360     for(int i=0; i<n_hidden; i++)
361     {
362         for(int j=0; j<n_visible; j++)
363         {
364       //可以根据权重公式发现,其实P(hi=1|v)代表的就是h0,p(hi=1|Vyk)和Vyk代表的就是h1和V1.
365             W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
366         }
367         hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
368     }
369 
370     for(int i=0; i<n_visible; i++)
371     {
372         vbias[i] += lr * (input[i] - nv_samples[i]) / N;
373     }
374 
375     delete[] ph_mean;
376     delete[] ph_sample;
377     delete[] nv_means;
378     delete[] nv_samples;
379     delete[] nh_means;
380     delete[] nh_samples;
381 }
382 
383 void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample)
384 {
385     for(int i=0; i<n_hidden; i++)
386     {
387         mean[i] = propup(v0_sample, W[i], hbias[i]);
388         sample[i] = binomial(1, mean[i]);
389     }
390 }
391 
392 void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample)
393 {
394     for(int i=0; i<n_visible; i++)
395     {
396         mean[i] = propdown(h0_sample, i, vbias[i]);
397         sample[i] = binomial(1, mean[i]);
398     }
399 }
400 
401 double RBM::propup(int *v, double *w, double b)
402 {
403     double pre_sigmoid_activation = 0.0;
404     for(int j=0; j<n_visible; j++)
405     {
406         pre_sigmoid_activation += w[j] * v[j];
407     }
408     pre_sigmoid_activation += b;
409     return sigmoid(pre_sigmoid_activation);
410 }
411 
412 double RBM::propdown(int *h, int i, double b)
413 {
414     double pre_sigmoid_activation = 0.0;
415     for(int j=0; j<n_hidden; j++)
416     {
417         pre_sigmoid_activation += W[j][i] * h[j];
418     }
419     pre_sigmoid_activation += b;
420     return sigmoid(pre_sigmoid_activation);
421 }
422 
423 void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples,double *nh_means, int *nh_samples)
424 {
425     sample_v_given_h(h0_sample, nv_means, nv_samples);
426     sample_h_given_v(nv_samples, nh_means, nh_samples);
427 }
428 
429 void RBM::reconstruct(int *v, double *reconstructed_v)
430 {
431     double *h = new double[n_hidden];
432     double pre_sigmoid_activation;
433 
434     for(int i=0; i<n_hidden; i++)
435     {
436         h[i] = propup(v, W[i], hbias[i]);
437     }
438 
439     for(int i=0; i<n_visible; i++)
440     {
441         pre_sigmoid_activation = 0.0;
442         for(int j=0; j<n_hidden; j++)
443         {
444             pre_sigmoid_activation += W[j][i] * h[j];
445         }
446         pre_sigmoid_activation += vbias[i];
447         reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
448     }
449     delete[] h;
450 }
451 
452 
453 // LogisticRegression
454 LogisticRegression::LogisticRegression(int size, int in, int out)
455 {
456     N = size;
457     n_in = in;
458     n_out = out;
459 
460     W = new double*[n_out];
461     for(int i=0; i<n_out; i++) W[i] = new double[n_in];
462     b = new double[n_out];
463 
464     for(int i=0; i<n_out; i++)
465     {
466         for(int j=0; j<n_in; j++)
467         {
468             W[i][j] = 0;
469         }
470         b[i] = 0;
471     }
472 }
473 
474 LogisticRegression::~LogisticRegression()
475 {
476     for(int i=0; i<n_out; i++) delete[] W[i];
477     delete[] W;
478     delete[] b;
479 }
480 
481 
482 void LogisticRegression::train(int *x, int *y, double lr)
483 {
484     double *p_y_given_x = new double[n_out];
485     double *dy = new double[n_out];
486 
487     for(int i=0; i<n_out; i++)
488     {
489         p_y_given_x[i] = 0;
490         for(int j=0; j<n_in; j++)
491         {
492             p_y_given_x[i] += W[i][j] * x[j];
493         }
494         p_y_given_x[i] += b[i];
495     }
496     softmax(p_y_given_x);
497 
498     for(int i=0; i<n_out; i++)
499     {
500         dy[i] = y[i] - p_y_given_x[i];
501         for(int j=0; j<n_in; j++)
502         {
503             W[i][j] += lr * dy[i] * x[j] / N;
504         }
505         b[i] += lr * dy[i] / N;
506     }
507 
508     delete[] p_y_given_x;
509     delete[] dy;
510 }
511 
512 void LogisticRegression::softmax(double *x)
513 {
514     double max = 0.0;
515     double sum = 0.0;
516 
517     for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
518     for(int i=0; i<n_out; i++)
519     {
520         x[i] = exp(x[i] - max);
521         sum += x[i];
522     }
523 
524     for(int i=0; i<n_out; i++) x[i] /= sum;
525 }
526 
527 void LogisticRegression::predict(int *x, double *y)
528 {
529     for(int i=0; i<n_out; i++)
530     {
531         y[i] = 0;
532         for(int j=0; j<n_in; j++)
533         {
534             y[i] += W[i][j] * x[j];
535         }
536         y[i] += b[i];
537     }
538     softmax(y);
539 }
540 
541 void test_dbn()
542 {
543     srand(0);
544 
545     double pretrain_lr = 0.1;
546     int pretraining_epochs = 1000;
547     int k = 1;
548     double finetune_lr = 0.1;
549     int finetune_epochs = 500;
550 
551     int train_N = 6;
552     int test_N = 3;
553     int n_ins = 6;
554     int n_outs = 2;
555     int hidden_layer_sizes[] = {3, 3};
556     int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
557 
558   // training data
559     int train_X[6][6] = {
560         {1, 1, 1, 0, 0, 0},
561         {1, 0, 1, 0, 0, 0},
562         {1, 1, 1, 0, 0, 0},
563         {0, 0, 1, 1, 1, 0},
564         {0, 0, 1, 1, 0, 0},
565         {0, 0, 1, 1, 1, 0}
566     };
567 
568     int train_Y[6][2] = {
569         {1, 0},
570         {1, 0},
571         {1, 0},
572         {0, 1},
573         {0, 1},
574         {0, 1}
575     };
576     //构造RBN包括构造多层,隐藏层,RBM及LR
577     DBN dbn(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
578     //预处理过程
579     dbn.pretrain(*train_X, pretrain_lr, k, pretraining_epochs);
580     //微调
581     dbn.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
582   // test data
583     int test_X[3][6] = {
584         {1, 1, 0, 0, 0, 0},
585         {0, 0, 0, 1, 1, 0},
586         {1, 1, 1, 1, 1, 0}
587     };
588     double test_Y[3][2];
589   // test
590     for(int i=0; i<test_N; i++)
591     {
592         dbn.predict(test_X[i], test_Y[i]);
593         for(int j=0; j<n_outs; j++)
594         {
595             cout << test_Y[i][j] << " ";
596         }
597         cout << endl;
598     }
599 }
600 
601 int main()
602 {
603     test_dbn();
604     return 0;
605 }

 

posted on 2013-11-29 16:31  Ja °  阅读(2433)  评论(0编辑  收藏  举报

导航