c++实现mlp神经网络
之前一直用theano训练样本,最近需要转成c或c++实现。在网上参考了一下其它代码,还是喜欢c++。但是看了几份cpp代码之后,发现都多少有些bug,很不爽。由于本人编码能力较弱,还花了不少时间改正。另外又添加了写权值和读权值的功能,可以保存训练的结果。下面是代码实现的基本功能描述。
问题描述:
用cpp重写mlp,即普通的多层神经网络。需要实现多个隐藏层与输出层互连,分类层采用softmax分类。
测试例子:
测试例子为自己构造,将3位二进制转为10进制有8中可能,我分别让它们对应label0-7。例如:001对应的label为1,111对应的label为7,以此类推
下面看代码:
main.cpp
#include <iostream> #include "NeuralNetwork.h" #include "util.h" using namespace std; /*main函数中调用的两个函数功能一样 *将3位二进制分类成十进制 *test_lr用的是单层的softmax回归 *mlp是含有多个隐藏层的神经网络 */ int main() { cout << "****softmax****" << endl; test_lr(); cout << "****mlp****" << endl; mlp(); return 0; }
test_lr函数为softmax测试,先看它的相关文件
LogisticRegression.h
#ifndef LOGISTICREGRESSIONLAYER #define LOGISTICREGRESSIONLAYER class LogisticRegression { public: LogisticRegression(int n_i, int i_o, int); ~LogisticRegression(); void forward_propagation(double* input_data); void back_propagation(double* input_data, double* label, double lr); void softmax(double* x); void printwb(); void writewb(const char *pcname); long readwb(const char *pcname, long); void setwb(double ppdw[][3], double [8]); void train(double *x, double *y, double lr); int predict(double *); double cal_error(double **ppdtest, double* pdlabel, int ibatch); //double cal_error(double* label); void makeLabels(int* pimax, double (*pplabels)[8]); //本层前向传播的输出值,也是最终的预测值 double* output_data; //反向传播时所需值 double* delta; public: int n_in; int n_out; int n_train; double** w; double* b; }; void test_lr(); void testwb(); #endif
头文件中的testwb为测试权值的读写而测试的,可以先不用理会
LogisticRegression.cpp
#include <cmath> #include <ctime> #include <iostream> #include "LogisticRegression.h" #include "util.h" using namespace std; LogisticRegression::LogisticRegression(int n_i, int n_o, int n_t) { n_in = n_i; n_out = n_o; n_train = n_t; w = new double* [n_out]; for(int i = 0; i < n_out; ++i) { w[i] = new double [n_in]; } b = new double [n_out]; double a = 1.0 / n_in; srand((unsigned)time(NULL)); for(int i = 0; i < n_out; ++i) { for(int j = 0; j < n_in; ++j) w[i][j] = uniform(-a, a); b[i] = uniform(-a, a); } delta = new double [n_out]; output_data = new double [n_out]; } LogisticRegression::~LogisticRegression() { for(int i=0; i < n_out; i++) delete []w[i]; delete[] w; delete[] b; delete[] output_data; delete[] delta; } void LogisticRegression::printwb() { cout << "'****w****\n"; for(int i = 0; i < n_out; ++i) { for(int j = 0; j < n_in; ++j) cout << w[i][j] << ' '; cout << endl; //w[i][j] = uniform(-a, a); } cout << "'****b****\n"; for(int i = 0; i < n_out; ++i) { cout << b[i] << ' '; } cout << endl; cout << "'****output****\n"; for(int i = 0; i < n_out; ++i) { cout << output_data[i] << ' '; } cout << endl; } void LogisticRegression::softmax(double* x) { double _max = 0.0; double _sum = 0.0; for(int i = 0; i < n_out; ++i) { if(_max < x[i]) _max = x[i]; } for(int i = 0; i < n_out; ++i) { x[i] = exp(x[i]-_max); _sum += x[i]; } for(int i = 0; i < n_out; ++i) { x[i] /= _sum; } } void LogisticRegression::forward_propagation(double* input_data) { for(int i = 0; i < n_out; ++i) { output_data[i] = 0.0; for(int j = 0; j < n_in; ++j) { output_data[i] += w[i][j]*input_data[j]; } output_data[i] += b[i]; } softmax(output_data); } void LogisticRegression::back_propagation(double* input_data, double* label, double lr) { for(int i = 0; i < n_out; ++i) { delta[i] = label[i] - output_data[i] ; for(int j = 0; j < n_in; ++j) { w[i][j] += lr * delta[i] * input_data[j] / n_train; } b[i] += lr * delta[i] / n_train; } } int LogisticRegression::predict(double *x) { forward_propagation(x); cout << "***result is ***" << endl; int iresult = getMaxIndex(output_data, n_out); cout << iresult << endl; if (iresult == 1) printArr(output_data, n_out); return iresult; } void LogisticRegression::train(double *x, double *y, double lr) { forward_propagation(x); back_propagation(x, y, lr); } //这个函数目前还用不到,大家不用看 double LogisticRegression::cal_error(double **ppdtest, double* pdlabel, int ibatch) { double error = 0.0, dmax = 0; int imax = -1, ierrNum = 0; for (int i = 0; i < ibatch; ++i) { imax = predict(ppdtest[i]); if (imax != pdlabel[i]) ++ierrNum; } error = (double)ierrNum / ibatch; return error; } void LogisticRegression::writewb(const char *pcname) { savewb(pcname, w, b, n_out, n_in); } long LogisticRegression::readwb(const char *pcname, long dstartpos) { return loadwb(pcname, w, b, n_out, n_in, dstartpos); } void LogisticRegression::setwb(double ppdw[][3], double szib[8]) { for (int i = 0; i < n_out; ++i) { for (int j = 0; j < n_in; ++j) w[i][j] = ppdw[i][j]; b[i] = szib[i]; } cout << "setwb----------" << endl; printArrDouble(w, n_out, n_in); printArr(b, n_out); } void LogisticRegression::makeLabels(int* pimax, double (*pplabels)[8]) { for (int i = 0; i < n_train; ++i) { for (int j = 0; j < n_out; ++j) pplabels[i][j] = 0; int k = pimax[i]; pplabels[i][k] = 1.0; } } void test_lr() { srand(0); double learning_rate = 0.1; double n_epochs = 200; int test_N = 2; const int trainNum = 8, n_in = 3, n_out = 8; //int n_out = 2; double train_X[trainNum][n_in] = { {1, 1, 1}, {1, 1, 0}, {1, 0, 1}, {1, 0, 0}, {0, 1, 1}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0} }; //szimax存储的是最大值的下标 int szimax[trainNum]; for (int i = 0; i < trainNum; ++i) szimax[i] = trainNum - i - 1; double train_Y[trainNum][n_out]; // construct LogisticRegression LogisticRegression classifier(n_in, n_out, trainNum); classifier.makeLabels(szimax, train_Y); // train online for(int epoch=0; epoch<n_epochs; epoch++) { for(int i=0; i<trainNum; i++) { //classifier.trainEfficient(train_X[i], train_Y[i], learning_rate); classifier.train(train_X[i], train_Y[i], learning_rate); } } const char *pcfile = "test.wb"; classifier.writewb(pcfile); LogisticRegression logistic(n_in, n_out, trainNum); logistic.readwb(pcfile, 0); // test data double test_X[2][n_out] = { {1, 0, 1}, {0, 0, 1} }; // test cout << "before readwb ---------" << endl; for(int i=0; i<test_N; i++) { classifier.predict(test_X[i]); cout << endl; } cout << "after readwb ---------" << endl; for(int i=0; i<trainNum; i++) { logistic.predict(train_X[i]); cout << endl; } cout << "*********\n"; } void testwb() { int test_N = 2; const int trainNum = 8, n_in = 3, n_out = 8; //int n_out = 2; double train_X[trainNum][n_in] = { {1, 1, 1}, {1, 1, 0}, {1, 0, 1}, {1, 0, 0}, {0, 1, 1}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0} }; double szib[n_out] = {1, 2, 3, 3, 3, 3, 2, 1}; // construct LogisticRegression LogisticRegression classifier(n_in, n_out, trainNum); classifier.setwb(train_X, szib); const char *pcfile = "test.wb"; classifier.writewb(pcfile); LogisticRegression logistic(n_in, n_out, trainNum); logistic.readwb(pcfile, 0); }
下面是mlp神经网络组合起来的代码,这个就是将隐藏层与softmax层互连,当时调试了好久好久。。。
NeuralNetwork.h
#ifndef NEURALNETWORK_H #define NEURALNETWORK_H #include "HiddenLayer.h" #include "LogisticRegression.h" class NeuralNetwork { public: NeuralNetwork(int n, int n_i, int n_o, int nhl, int*hls); ~NeuralNetwork(); void train(double** in_data, double** in_label, double lr, int epochs); void predict(double** in_data, int n); void writewb(const char *pcname); void readwb(const char *pcname); private: int N; //样本数量 int n_in; //输入维数 int n_out; //输出维数 int n_hidden_layer; //隐层数目 int* hidden_layer_size; //中间隐层的大小 e.g. {3,4}表示有两个隐层,第一个有三个节点,第二个有4个节点 HiddenLayer **sigmoid_layers; LogisticRegression *log_layer; }; void mlp(); #endif
mlp()就是测试整个神经网络基本功能的代码,写的比较乱。基本就是先用一个mlp训练,然后保存权值。之后另一个mlp读取权值,预测结果。
NeuralNetwork.cpp
#include <iostream> #include "NeuralNetwork.h" #include "util.h" //#include "HiddenLayer.h" //#include "LogisticRegression.h" using namespace std; const int n_train = 8, innode = 3, outnode = 8; NeuralNetwork::NeuralNetwork(int n, int n_i, int n_o, int nhl, int *hls) { N = n; n_in = n_i; n_out = n_o; n_hidden_layer = nhl; hidden_layer_size = hls; //构造网络结构 sigmoid_layers = new HiddenLayer* [n_hidden_layer]; for(int i = 0; i < n_hidden_layer; ++i) { if(i == 0) { sigmoid_layers[i] = new HiddenLayer(n_in, hidden_layer_size[i]);//第一个隐层 } else { sigmoid_layers[i] = new HiddenLayer(hidden_layer_size[i-1], hidden_layer_size[i]);//其他隐层 } } log_layer = new LogisticRegression(hidden_layer_size[n_hidden_layer-1], n_out, N);//最后的softmax层 } NeuralNetwork::~NeuralNetwork() { //二维指针分配的对象不一定是二维数组 for(int i = 0; i < n_hidden_layer; ++i) delete sigmoid_layers[i]; //删除的时候不能加[] delete[] sigmoid_layers; //log_layer只是一个普通的对象指针,不能作为数组delete delete log_layer;//删除的时候不能加[] } void NeuralNetwork::train(double** ppdinData, double** ppdinLabel, double dlr, int iepochs) { printArrDouble(ppdinData, N, n_in); cout << "******label****" << endl; printArrDouble(ppdinLabel, N, n_out); //反复迭代样本iepochs次训练 for(int epoch = 0; epoch < iepochs; ++epoch) { double e = 0.0; for(int i = 0; i < N; ++i) { //前向传播阶段 for(int n = 0; n < n_hidden_layer; ++ n) { if(n == 0) //第一个隐层直接输入数据 { sigmoid_layers[n]->forward_propagation(ppdinData[i]); } else //其他隐层用前一层的输出作为输入数据 { sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data); } } //softmax层使用最后一个隐层的输出作为输入数据 log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data); //e += log_layer->cal_error(ppdinLabel[i]); //反向传播阶段 log_layer->back_propagation(sigmoid_layers[n_hidden_layer-1]->output_data, ppdinLabel[i], dlr); for(int n = n_hidden_layer-1; n >= 1; --n) { if(n == n_hidden_layer-1) { sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data, log_layer->delta, log_layer->w, log_layer->n_out, dlr, N); } else { double *pdinputData; pdinputData = sigmoid_layers[n-1]->output_data; sigmoid_layers[n]->back_propagation(pdinputData, sigmoid_layers[n+1]->delta, sigmoid_layers[n+1]->w, sigmoid_layers[n+1]->n_out, dlr, N); } } //这里该怎么写? if (n_hidden_layer > 1) sigmoid_layers[0]->back_propagation(ppdinData[i], sigmoid_layers[1]->delta, sigmoid_layers[1]->w, sigmoid_layers[1]->n_out, dlr, N); else sigmoid_layers[0]->back_propagation(ppdinData[i], log_layer->delta, log_layer->w, log_layer->n_out, dlr, N); } //if (epoch % 100 == 1) //cout << "iepochs number is " << epoch << " cost function is " << e / (double)N << endl; } } void NeuralNetwork::predict(double** ppdata, int n) { for(int i = 0; i < n; ++i) { for(int n = 0; n < n_hidden_layer; ++ n) { if(n == 0) //第一个隐层直接输入数据 { sigmoid_layers[n]->forward_propagation(ppdata[i]); } else //其他隐层用前一层的输出作为输入数据 { sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data); } } //softmax层使用最后一个隐层的输出作为输入数据 log_layer->predict(sigmoid_layers[n_hidden_layer-1]->output_data); //log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data); } } void NeuralNetwork::writewb(const char *pcname) { for(int i = 0; i < n_hidden_layer; ++i) { sigmoid_layers[i]->writewb(pcname); } log_layer->writewb(pcname); } void NeuralNetwork::readwb(const char *pcname) { long dcurpos = 0, dreadsize = 0; for(int i = 0; i < n_hidden_layer; ++i) { dreadsize = sigmoid_layers[i]->readwb(pcname, dcurpos); cout << "hiddenlayer " << i + 1 << " read bytes: " << dreadsize << endl; if (-1 != dreadsize) dcurpos += dreadsize; else { cout << "read wb error from HiddenLayer" << endl; return; } } dreadsize = log_layer->readwb(pcname, dcurpos); if (-1 != dreadsize) dcurpos += dreadsize; else { cout << "read wb error from sofmaxLayer" << endl; return; } } //double **makeLabelSample(double **label_x) double **makeLabelSample(double label_x[][outnode]) { double **pplabelSample; pplabelSample = new double*[n_train]; for (int i = 0; i < n_train; ++i) { pplabelSample[i] = new double[outnode]; } for (int i = 0; i < n_train; ++i) { for (int j = 0; j < outnode; ++j) pplabelSample[i][j] = label_x[i][j]; } return pplabelSample; } double **maken_train(double train_x[][innode]) { double **ppn_train; ppn_train = new double*[n_train]; for (int i = 0; i < n_train; ++i) { ppn_train[i] = new double[innode]; } for (int i = 0; i < n_train; ++i) { for (int j = 0; j < innode; ++j) ppn_train[i][j] = train_x[i][j]; } return ppn_train; } void mlp() { //输入样本 double X[n_train][innode]= { {0,0,0},{0,0,1},{0,1,0},{0,1,1},{1,0,0},{1,0,1},{1,1,0},{1,1,1} }; double Y[n_train][outnode]={ {1, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}, }; const int ihiddenSize = 2; int phidden[ihiddenSize] = {5, 5}; //printArr(phidden, 1); NeuralNetwork neural(n_train, innode, outnode, ihiddenSize, phidden); double **train_x, **ppdlabel; train_x = maken_train(X); //printArrDouble(train_x, n_train, innode); ppdlabel = makeLabelSample(Y); neural.train(train_x, ppdlabel, 0.1, 3500); cout<<"trainning complete..."<<endl; //pcname存放权值 const char *pcname = "mlp55new.wb"; neural.writewb(pcname); NeuralNetwork neural2(n_train, innode, outnode, ihiddenSize, phidden); cout<<"readwb start..."<<endl; neural2.readwb(pcname); cout<<"readwb end..."<<endl; neural.predict(train_x, n_train); cout << "----------after readwb________" << endl; neural2.predict(train_x, n_train); for (int i = 0; i != n_train; ++i) { delete []train_x[i]; delete []ppdlabel[i]; } delete []train_x; delete []ppdlabel; cout<<endl; }
HiddenLayer.h
#ifndef HIDDENLAYER_H #define HIDDENLAYER_H class HiddenLayer{ public: HiddenLayer(int n_i, int n_o); ~HiddenLayer(); void forward_propagation(double* input_data); void back_propagation(double *pdinputData, double *pdnextLayerDelta, double** ppdnextLayerW, int iNextLayerOutNum, double dlr, int N); void writewb(const char *pcname); long readwb(const char *pcname, long); //本层前向传播的输出值,作为下一层的输入值 double* output_data; //反向传播时所需值 double* delta; public: int n_in; int n_out; double** w; double*b; }; #endif
HiddenLayer.cpp
#include <cmath> #include <cstdlib> #include <ctime> #include <iostream> #include "HiddenLayer.h" #include "util.h" using namespace std; HiddenLayer::HiddenLayer(int n_i, int n_o) { n_in = n_i; n_out = n_o; w = new double* [n_out]; for(int i = 0; i < n_out; ++i) { w[i] = new double [n_in]; } b = new double [n_out]; double a = 1.0 / n_in; srand((unsigned)time(NULL)); for(int i = 0; i < n_out; ++i) { for(int j = 0; j < n_in; ++j) w[i][j] = uniform(-a, a); b[i] = uniform(-a, a); } delta = new double [n_out]; output_data = new double [n_out]; } HiddenLayer::~HiddenLayer() { for(int i=0; i<n_out; i++) delete []w[i]; delete[] w; delete[] b; delete[] output_data; delete[] delta; } void HiddenLayer::forward_propagation(double* pdinputData) { for(int i = 0; i < n_out; ++i) { output_data[i] = 0.0; for(int j = 0; j < n_in; ++j) { output_data[i] += w[i][j]*pdinputData[j]; } output_data[i] += b[i]; output_data[i] = sigmoid(output_data[i]); } } void HiddenLayer::back_propagation(double *pdinputData, double *pdnextLayerDelta, double** ppdnextLayerW, int iNextLayerOutNum, double dlr, int N) { /* pdinputData 为输入数据 *pdnextLayerDelta 为下一层的残差值delta,是一个大小为iNextLayerOutNum的数组 **ppdnextLayerW 为此层到下一层的权值 iNextLayerOutNum 实际上就是下一层的n_out dlr 为学习率learning rate N 为训练样本总数 */ //sigma元素个数应与本层单元个数一致,而网上代码有误 //作者是没有自己测试啊,测试啊 //double* sigma = new double[iNextLayerOutNum]; double* sigma = new double[n_out]; //double sigma[10]; for(int i = 0; i < n_out; ++i) sigma[i] = 0.0; for(int i = 0; i < iNextLayerOutNum; ++i) { for(int j = 0; j < n_out; ++j) { sigma[j] += ppdnextLayerW[i][j] * pdnextLayerDelta[i]; } } //计算得到本层的残差delta for(int i = 0; i < n_out; ++i) { delta[i] = sigma[i] * output_data[i] * (1 - output_data[i]); } //调整本层的权值w for(int i = 0; i < n_out; ++i) { for(int j = 0; j < n_in; ++j) { w[i][j] += dlr * delta[i] * pdinputData[j]; } b[i] += dlr * delta[i]; } delete[] sigma; } void HiddenLayer::writewb(const char *pcname) { savewb(pcname, w, b, n_out, n_in); } long HiddenLayer::readwb(const char *pcname, long dstartpos) { return loadwb(pcname, w, b, n_out, n_in, dstartpos); }
下面是一个工具文件
util.h
#ifndef UTIL_H #define UTIL_H typedef unsigned char BYTE; double sigmoid(double x); double uniform(double _min, double _max); //void printArr(T *parr, int num); //void printArrDouble(double **pparr, int row, int col); void initArr(double *parr, int num); int getMaxIndex(double *pdarr, int num); void savewb(const char *pcname, double **ppw, double *pb, int irow, int icol); long loadwb(const char *pcname, double **ppw, double *pb, int irow, int icol, long dstartpos); void readonefile(const char *pcname); void writeonefile(const char *pcname); template <typename T> void printArr(T *parr, int num) { cout << "****printArr****" << endl; for (int i = 0; i < num; ++i) cout << parr[i] << ' '; cout << endl; } template <typename T> void printArrDouble(T **pparr, int row, int col) { cout << "****printArrDouble****" << endl; for (int i = 0; i < row; ++i) { for (int j = 0; j < col; ++j) { cout << pparr[i][j] << ' '; } cout << endl; } } #endif
util.cpp
#include "util.h" #include <iostream> #include <ctime> #include <cmath> using namespace std; int getMaxIndex(double *pdarr, int num) { double dmax = -1; int imax = -1; for(int i = 0; i < num; ++i) { if (pdarr[i] > dmax) { dmax = pdarr[i]; imax = i; } } return imax; } double sigmoid(double x) { return 1.0/(1.0+exp(-x)); } double uniform(double _min, double _max) { return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min; } void initArr(double *parr, int num) { for (int i = 0; i < num; ++i) parr[i] = 0.0; } void savewb(const char *pcname, double **ppw, double *pb, int irow, int icol) { FILE *pf; if( (pf = fopen(pcname, "ab" )) == NULL ) { printf( "File coulkd not be opened " ); return; } int isizeofelem = sizeof(double); for (int i = 0; i < irow; ++i) { if (fwrite((const void*)ppw[i], isizeofelem, icol, pf) != icol) { fputs ("Writing ppw error",stderr); return; } } if (fwrite((const void*)pb, isizeofelem, irow, pf) != irow) { fputs ("Writing ppw error",stderr); return; } fclose(pf); } long loadwb(const char *pcname, double **ppw, double *pb, int irow, int icol, long dstartpos) { FILE *pf; long dtotalbyte = 0, dreadsize; if( (pf = fopen(pcname, "rb" )) == NULL ) { printf( "File coulkd not be opened " ); return -1; } //让文件指针偏移到正确位置 fseek(pf, dstartpos , SEEK_SET); int isizeofelem = sizeof(double); for (int i = 0; i < irow; ++i) { dreadsize = fread((void*)ppw[i], isizeofelem, icol, pf); if (dreadsize != icol) { fputs ("Reading ppw error",stderr); return -1; } //每次成功读取,都要加到dtotalbyte中,最后返回 dtotalbyte += dreadsize; } dreadsize = fread(pb, isizeofelem, irow, pf); if (dreadsize != irow) { fputs ("Reading pb error",stderr); return -1; } dtotalbyte += dreadsize; dtotalbyte *= isizeofelem; fclose(pf); return dtotalbyte; } void readonefile(const char *pcname) { FILE *pf; if( (pf = fopen(pcname, "rb" )) == NULL ) { printf( "File could not be opened " ); return; } /*int isizeofelem = sizeof(BYTE); BYTE ielem;*/ int isizeofelem = sizeof(double); double ielem; while(1 == fread((void*)(&ielem), isizeofelem, 1, pf)) cout << ielem << endl; fclose(pf); } void writeonefile(const char *pcname) { FILE *pf; if( (pf = fopen(pcname, "wb" )) == NULL ) { printf( "File could not be opened " ); return; } //int isizeofelem = sizeof(BYTE); //BYTE ielem = (BYTE)16; int isizeofelem = sizeof(int); int ielem = 16; if(1 == fwrite((void*)(&ielem), isizeofelem, 1, pf)) cout << ielem << endl; fclose(pf); }
至此代码已经贴完了,我测试是可以运行的。本人编码较少,如果有什么问题,请见谅。
这里有vs2008建的工程代码,如果不想自己建工程,可以下载运行即可,工程里面只是少了保存权值的函数而已。
下载地址: 点击打开链接