基于HOG特征的Adaboost行人检测
原地址:http://blog.csdn.net/van_ruin/article/details/9166591
1.方向梯度直方图(Histogramof Oriented Gradient, HOG)特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。它通过计算和统计图像局部区域的梯度方向直方图来构成特征。基本知识可以参考博客:http://blog.csdn.net/zouxy09/article/details/7929348 2.Adaboost的基础知识可以参考书籍:统计学习方法,第八章-提升方法adaboost。 这里利用HOG来训练Adaboost行人检测。在Haar-Adaboost算法中,弱分类器仅对一维分类。但是在Hog特征中,特征是每个block的串联。如果仅对一维分类(一个cell的其中一个方向的权值),就不能有效利用block的归一化效果。所以我们使用logistic弱分类器对每个block进行分类(实验中,每个block包含4个cell,每个cell有9个bin,即36维特征)。 本实验需要注意的地方: 1. adaboost误差率需要计算权重 2. logistic回归需要使用带权重的logistic分类器 3. logistic分类可能与数据分布相反。需要计算两次。(相反的情况下,拟合没有意义,需要将数据反转(1->0,0->1)) 发现总结与问题 1. 公理1. 对于任何数据的二值分类,能够得到大于等于0.5的线性分类器。 2. 推论. 对于任何带权重数据的二值分类,能够得到大于等于0.5的线性分类器。 3. 推论? 对于任何带权重数据的n值分类,能够得到大于等于1/n的线性分类器。 4. 对于与logistic函数分布相反的数据,应该如何处理?(本实验的处理方式如前面所述)。 实验结果后的猜想 ß猜想1:Adaboost弱分类器所选取的特征仍然要保持一定的颗粒度。像素级的特征是无效的。 实验结果与分析 训练集: 500/500;测试集: 19/22(200个弱分类器) 测试数据较少,但是训练集的高正确率至少证明其能够由弱分类器(错误率普遍在0.25左右)提高样本数据集的精度。 17张图片中,有部分图片较模糊,行人影像较小,可能导致难以分辨。 下面给出代码,希望各位能够指正错误。说明:本代码全部由自己编写,所用函数未调用OpenCV实用库函数,及机器学习库函数(基本数据除外)。 /***********************************************************/ /** Copyright by Weidi Xu, S.C.U.T in Guangzhou, Guangdong**/ /***********************************************************/ #include <opencv2\opencv.hpp> #include <cstring> #include <cstdio> #include <cmath> #include <ctime> using std::clock_t; using std::clock; using namespace cv; //const parameters for image const int NUM_NEGIMAGE = 1000; const int NUM_POSIMAGE = 500; const int NUM_IMAGE = 1500; const int NUM_TESTIMAGE = 22; const int MAX_DIMENSION = 3781; const int IMAGE_ROWS = 128; const int IMAGE_COLS = 64; const int CELLSIZE = 8; const int BLOCKSIZE = 16; const int MOVELENGTH = 8; const int BINSIZE = 9; const double PI = 2*acos(0.0); const double eps = 1e-8; //mediate parameter const int NUM_BLOCK_ROWS = (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1; const int NUM_BLOCK_COLS = (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1; const int NUM_BLOCK_FEATURES = (BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE+1;//zero for theta[0] //data from image //since the features in the adaboost should contain a single block, it's better to define the feature of 3-dimension; double features[NUM_IMAGE][NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES]; double type[NUM_IMAGE]; //1 - pos, 0 - neg double y[NUM_IMAGE]; //1 - pos, -1 - neg //number of weak classifier(changing in experiment) const int NUM_WEAKCLASSIFIER = 100; //data for adaboost double weight[NUM_IMAGE]; //logistic function(dimension is given by NUM_BLOCK_FEATURES(37 in this setting)) double logistic(double theta[], double x[]) { double ans = 0; for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) { ans += theta[i]*x[i]; } return 1/(1+std::exp(-ans)); } struct WeakClassifier { double _theta[NUM_BLOCK_FEATURES]; //threshold classifier int _index_row; //classify by the features in block[_block_row][_block_y] int _index_col; int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg) double _alpha; double _error; void clear() { memset(_theta, 0.0, NUM_BLOCK_FEATURES*sizeof(double)); _alpha = 0.0; _error = 1; _index_row = -1; _index_col = -1; _isreverse = true; } //return 1 or -1 int cal(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES]) { int ans = logistic(_theta, x[_index_row][_index_col]); if(ans > 0.5) { if(_isreverse) return -1; else return 1; } else { if(_isreverse) return 1; else return -1; } } void print() { //theta for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) printf("%lf ", _theta[i]); printf("\n"); //int _index_row; printf("%d ",_index_row); //int _index_col; printf("%d ",_index_col); //int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg) printf("%d ",_isreverse); //double _alpha; printf("%lf ",_alpha); //double _error; printf("%lf \n",_error); } }weakClassifier[NUM_WEAKCLASSIFIER]; //Util Function double arc2angle(double arc) { return arc/PI*180.0; } double angle2arc(double angle) { return angle/180.0*PI; } void posfilename(int i, char* filename) { sprintf(filename, "pos/pos (%d).png", i); return; } void negfilename(int i, char* filename) { sprintf(filename, "neg/neg (%d).png", i); return; } void testfilename(int i, char* filename) { sprintf(filename, "test_pos/test (%d).png", i); return ; } //I(x,y) = sqrt(I(x,y)) void normalizeImage(Mat& inputImage) { // accept only char type matrices CV_Assert(inputImage.depth() != sizeof(uchar)); int channels = inputImage.channels(); int nRows = inputImage.rows ; int nCols = inputImage.cols* channels; if (inputImage.isContinuous()) { nCols *= nRows; nRows = 1; } int i,j; uchar* p; for( i = 0; i < nRows; ++i) { p = inputImage.ptr<uchar>(i); for ( j = 0; j < nCols; ++j) { p[j] = int(sqrt(p[j]*1.0)); } } return; } //I(x,y) 第一维的梯度为xGradient void calGredient(const Mat& inputImage, double xGradient[IMAGE_ROWS][IMAGE_COLS], double yGradient[IMAGE_ROWS][IMAGE_COLS]) { uchar* dataptr = inputImage.data; int nrows = inputImage.rows; int ncols = inputImage.cols; //cal xgradient for(int i = 1 ; i < nrows - 1; i++) { for(int j = 0 ; j < ncols; j++) { xGradient[i][j] = inputImage.at<uchar>(i+1,j) - inputImage.at<uchar>(i-1,j); } } //cal margin for(int i = 0 ; i < ncols; i++) { xGradient[0][i] = (inputImage.at<uchar>(1,i) - inputImage.at<uchar>(0,i))*2; xGradient[nrows-1][i] = (inputImage.at<uchar>(nrows-1,i) - inputImage.at<uchar>(nrows-2,i))*2; } //cal ygradient for(int i = 0 ; i < nrows ; i++) { for(int j = 1 ; j < ncols - 1; j++) { yGradient[i][j] = inputImage.at<uchar>(i,j+1) - inputImage.at<uchar>(i,j-1); } } //cal margin for(int i = 0 ; i < nrows; i++) { xGradient[i][0] = (inputImage.at<uchar>(i,1) - inputImage.at<uchar>(i,0))*2; xGradient[i][ncols-1] = (inputImage.at<uchar>(i,ncols-1) - inputImage.at<uchar>(i,ncols-2))*2; } } //cal the HogFeatures by block void calHogFeatures(Mat& inputImage, double outputFeature[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES]) { int nrows = inputImage.rows; int ncols = inputImage.cols; int type = inputImage.type(); if(nrows != IMAGE_ROWS || ncols != IMAGE_COLS) abort(); //cal x,yGradient double xGradient[IMAGE_ROWS][IMAGE_COLS]; double yGradient[IMAGE_ROWS][IMAGE_COLS]; calGredient(inputImage, xGradient, yGradient); //computation median double gradient[IMAGE_ROWS][IMAGE_COLS]; double direction[IMAGE_ROWS][IMAGE_COLS]; for(int i = 0 ; i < nrows; i++) { for(int j = 0 ; j < ncols; j++) { double gx = xGradient[i][j]; double gy = yGradient[i][j]; gradient[i][j] = sqrt(gx*gx + gy*gy); direction[i][j] = arc2angle(atan2(gy, gx)); } } //compute cellinfo 8*8 double cellinfo[IMAGE_ROWS/CELLSIZE][IMAGE_COLS/CELLSIZE][BINSIZE]; memset(cellinfo, 0, sizeof(cellinfo)); for(int i = 0; i < IMAGE_ROWS/CELLSIZE; i++) { for(int j = 0 ; j < IMAGE_COLS/CELLSIZE; j++) { double* cell = cellinfo[i][j]; //cal single cellinfo of 8*8 for(int ci = 0 ; ci < CELLSIZE; ci++) { for(int cj = 0; cj < CELLSIZE; cj++) { //find org pix; int px = i*CELLSIZE + ci; int py = j*CELLSIZE + cj; int binindex = int((direction[px][py]+180.0)/(360.0/BINSIZE)); //handle bound if(fabs(direction[px][py]-180) < eps) { binindex = BINSIZE-1; } if(fabs(direction[px][py]+180) < eps) { binindex = 0; } if(binindex < 0 || binindex >= BINSIZE) { printf("Wrong binindex: %d %lf %lf %lf", binindex, xGradient[px][py], yGradient[px][py], direction[px][py]); abort(); } cell[binindex] += gradient[px][py]; } } } } /*double blockinfo[(IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1][(IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1][(BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE];*/ if(MOVELENGTH%CELLSIZE != 0) { printf("MOVELENGTH%CELLSIZE != 0"); abort(); } //cal blockinfo for(int i = 0 ; i < (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH + 1; i++) { for(int j = 0 ; j < (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH + 1; j++) { int bfindex = 0; outputFeature[i][j][bfindex++] = 1; //cal the position of this block for(int c1 = 0; c1 < BLOCKSIZE/CELLSIZE; c1++) { for(int c2 = 0 ; c2 < BLOCKSIZE/CELLSIZE; c2++) { //cal the index of cell int cx = i*MOVELENGTH/CELLSIZE+c1; int cy = j*MOVELENGTH/CELLSIZE+c2; for(int binindex = 0 ; binindex < BINSIZE; binindex++) { outputFeature[i][j][bfindex++] = cellinfo[cx][cy][binindex]; } } } } } return; } //use global variables void trainLogisticRegression(int block_row,int block_col, double theta[], double& errorrate, int& isreverse) { double theta1[NUM_BLOCK_FEATURES], theta2[NUM_BLOCK_FEATURES]; memset(theta1, 0, NUM_BLOCK_FEATURES*sizeof(double)); memset(theta2, 0, NUM_BLOCK_FEATURES*sizeof(double)); double errorrate1 = 0; double errorrate2 = 0; double rightnum1 = 0; double rightnum2 = 0; isreverse = 0; //cal parameter thetas for(int k = 0 ; k < 100000; k++) { int i = rand()%NUM_IMAGE; int j = rand()%NUM_BLOCK_FEATURES; theta1[j] = theta1[j] + weight[i]*0.01*(type[i] - logistic(theta1, features[i][block_row][block_col]))*features[i][block_row][block_col][j]; } for(int i = 0 ; i < NUM_IMAGE; i++) { double tmp = logistic(theta1, features[i][block_row][block_col]); if(tmp > 0.5 && fabs(type[i] - 1) < eps) rightnum1 += 1.0*weight[i]; if(tmp < 0.5 && fabs(type[i] - 0) < eps) rightnum1 += 1.0*weight[i]; } errorrate1 = 1 - rightnum1; //calreverse for(int k = 0 ; k < 100000; k++) { int i = rand()%NUM_IMAGE; int j = rand()%NUM_BLOCK_FEATURES; theta2[j] = theta2[j] + weight[i]*0.01*(1- type[i] - logistic(theta2, features[i][block_row][block_col]))*features[i][block_row][block_col][j]; } for(int i = 0 ; i < NUM_IMAGE; i++) { double tmp = logistic(theta2, features[i][block_row][block_col]); if(tmp > 0.5 && fabs(type[i] - 0) < eps) rightnum2 += 1.0*weight[i]; if(tmp < 0.5 && fabs(type[i] - 1) < eps) rightnum2 += 1.0*weight[i]; } errorrate2 = 1 - rightnum2; if(errorrate1 < errorrate2) { for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) { theta[i] = theta1[i]; } isreverse = 0; errorrate = errorrate1 + eps; } else { for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) { theta[i] = theta2[i]; } isreverse = 1; errorrate = errorrate2 + eps; } return; } WeakClassifier trainClassifier() { WeakClassifier ansclassifier; double theta[NUM_BLOCK_FEATURES]; double errorrate = 1; int isreverse = 0; double best_theta[NUM_BLOCK_FEATURES]; double best_errorrate = 1; int best_row = -1; int best_col = -1; int best_isreverse = 0; //select best weak classifier for(int i = 0 ; i < NUM_BLOCK_ROWS; i++) { for(int j = 0 ; j < NUM_BLOCK_COLS; j++) { trainLogisticRegression(i,j,theta,errorrate, isreverse); if(errorrate < 0) { printf("Wrong errorrate < 0 : %lf", errorrate); abort(); } if(errorrate < best_errorrate) { for(int tempi = 0 ; tempi < NUM_BLOCK_FEATURES; tempi++) { best_theta[tempi] = theta[tempi]; } best_errorrate = errorrate; best_row = i; best_col = j; best_isreverse = isreverse; } } } if(best_errorrate > 0.5) { printf("The best_errorrate is greater than 0.5.\n"); abort(); } //set parameters; ansclassifier._alpha = 1.0/2*std::log((1-best_errorrate)/best_errorrate); ansclassifier._error = best_errorrate; ansclassifier._index_col = best_col; ansclassifier._index_row = best_row; ansclassifier._isreverse = best_isreverse; for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) ansclassifier._theta[i] = best_theta[i]; return ansclassifier; } int calByStrongClassifier(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES]) { double ans = 0; for(int i = 0 ; i < NUM_WEAKCLASSIFIER; i++) { ans += weakClassifier[i]._alpha * weakClassifier[i].cal(x); } if(ans > 0) return 1; else return -1; } /* size: 128*64; type: CV_8UC1; Block大小为18*18; Cell大小为6*6; Block在检测窗口中上下移动尺寸为6*6; 1个cell的梯度直方图化成9个bin; 滑动窗口在检测图片中滑动的尺寸为6*6; */ int main() { char filename[100]; IplImage* inputImage = NULL; clock_t timecount = clock(); //load posimage for(int i = 0 ; i < NUM_POSIMAGE; i++) { posfilename(i+1 ,filename); //load grey image: set the parameter to 0; inputImage = cvLoadImage(filename, 0); //cal features; Mat inputMat(inputImage); calHogFeatures(inputMat, features[i]); type[i] = 1; y[i] = 1; //printf("%d \n", inputMat.cols); //release memory inputMat.release(); cvReleaseImage(&inputImage); inputImage = NULL; } printf("The feature process of pos-image have done in %d second.\n", (clock()-timecount)/1000); timecount = clock(); //load neg images for(int i = 0; i < NUM_NEGIMAGE; i++) { negfilename(i+1, filename); //load grey image: set the parameter to 0; inputImage = cvLoadImage(filename, 0); type[NUM_POSIMAGE+i] = 0; y[NUM_POSIMAGE+i] = -1; Mat inputMat(inputImage); calHogFeatures(inputMat, features[NUM_POSIMAGE+i]); //release memory inputMat.release(); cvReleaseImage(&inputImage); inputImage = NULL; } printf("The feature process of neg-image have done in %d second.\n", (clock()-timecount)/1000); timecount = clock(); //init weight array for(int i = 0 ; i < NUM_IMAGE; i++) { weight[i] = 1.0/NUM_IMAGE; } //freopen freopen("HOG_CLASSIFIER.txt", "w", stdout); //print number of weakclassifiers; printf("%d\n", NUM_WEAKCLASSIFIER); //adaboost framework for(int classifierindex = 0 ; classifierindex < NUM_WEAKCLASSIFIER; classifierindex++) { weakClassifier[classifierindex] = trainClassifier(); double error = weakClassifier[classifierindex]._error; double alpha = weakClassifier[classifierindex]._alpha; //printf("%d classifier: %lf ====\n",classifierindex, error); //printf("_index_row %d _index_col %d\n", weakClassifier[classifierindex]._index_row, weakClassifier[classifierindex]._index_col); double identitysum = 0; for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++) { weight[sampleindex] *= std::exp(-alpha*y[sampleindex]*weakClassifier[classifierindex].cal(features[sampleindex])); identitysum += weight[sampleindex]; } //reweight for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++) { weight[sampleindex] /= identitysum; } weakClassifier[classifierindex].print(); } freopen("CON", "w", stdout); int rightnum = 0; for(int testindex = 0 ;testindex < NUM_TESTIMAGE; testindex ++) { //posfilename(testindex+1, filename); testfilename(testindex+1, filename); inputImage = cvLoadImage(filename, 0); double testfeatures[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES]; memset(testfeatures, 0, sizeof(testfeatures)); Mat inputMat(inputImage); calHogFeatures(inputMat, testfeatures); if(calByStrongClassifier(testfeatures) == 1) { rightnum++; //printf("Yes\n"); } else //printf("No\n"); inputMat.release(); } printf("Accuracy: %d\n", rightnum); } //测试数据是网上流行的128*64灰度行人图像数据。