OpenCV:OpenCV目标检测Hog+SWindow源代码分析
参考文章:OpenCV中的HOG+SVM物体分类
此文主要描述出HOG分类的调用堆栈。
使用OpenCV作图像检测, 使用HOG检测过程,其中一部分源代码如下:
1.HOG 检测底层栈的检测计算代码:
貌似在计算过程中仅使用滑窗方法?
void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, vector<double>& weights, double hitThreshold, Size winStride, Size padding, const vector<Point>& locations) const { hits.clear(); if( svmDetector.empty() ) return; if( winStride == Size() ) winStride = cellSize; Size cacheStride(gcd(winStride.width, blockStride.width), gcd(winStride.height, blockStride.height)); size_t nwindows = locations.size(); padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); if( !nwindows ) nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); const HOGCache::BlockData* blockData = &cache.blockData[0]; int nblocks = cache.nblocks.area(); int blockHistogramSize = cache.blockHistogramSize; size_t dsize = getDescriptorSize(); double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0; vector<float> blockHist(blockHistogramSize); for( size_t i = 0; i < nwindows; i++ ) { Point pt0; if( !locations.empty() ) { pt0 = locations[i]; if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width || pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height ) continue; } else { pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding); CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0); } double s = rho; const float* svmVec = &svmDetector[0]; #ifdef HAVE_IPP int j; #else int j, k; #endif for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize ) { const HOGCache::BlockData& bj = blockData[j]; Point pt = pt0 + bj.imgOffset; const float* vec = cache.getBlock(pt, &blockHist[0]); #ifdef HAVE_IPP Ipp32f partSum; ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum); s += (double)partSum; #else for( k = 0; k <= blockHistogramSize - 4; k += 4 ) s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] + vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3]; for( ; k < blockHistogramSize; k++ ) s += vec[k]*svmVec[k]; #endif } if( s >= hitThreshold ) { hits.push_back(pt0); weights.push_back(s); } } }
2. HOG invoker的对象重载:
void operator()( const Range& range ) const { int i, i1 = range.start, i2 = range.end; double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows); Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale)); Mat smallerImgBuf(maxSz, img.type()); vector<Point> locations; vector<double> hitsWeights; for( i = i1; i < i2; i++ ) { double scale = levelScale[i]; Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale)); Mat smallerImg(sz, img.type(), smallerImgBuf.data); if( sz == img.size() ) smallerImg = Mat(sz, img.type(), img.data, img.step); else resize(img, smallerImg, sz); //使用HOG 进行检测 hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding); Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale)); mtx->lock(); for( size_t j = 0; j < locations.size(); j++ ) { vec->push_back(Rect(cvRound(locations[j].x*scale), cvRound(locations[j].y*scale), scaledWinSize.width, scaledWinSize.height)); if (scales) { scales->push_back(scale); } } mtx->unlock(); if (weights && (!hitsWeights.empty())) { mtx->lock(); for (size_t j = 0; j < locations.size(); j++) { weights->push_back(hitsWeights[j]); } mtx->unlock(); } } }
3.使用HOG特征进行多尺度检测
void HOGDescriptor::detectMultiScale( const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights, double hitThreshold, Size winStride, Size padding, double scale0, double finalThreshold, bool useMeanshiftGrouping) const { double scale = 1.; int levels = 0; vector<double> levelScale; for( levels = 0; levels < nlevels; levels++ ) { levelScale.push_back(scale); if( cvRound(img.cols/scale) < winSize.width || cvRound(img.rows/scale) < winSize.height || scale0 <= 1 ) break; scale *= scale0; } levels = std::max(levels, 1); levelScale.resize(levels); std::vector<Rect> allCandidates; std::vector<double> tempScales; std::vector<double> tempWeights; std::vector<double> foundScales; Mutex mtx; parallel_for_(Range(0, (int)levelScale.size()), HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &mtx, &tempWeights, &tempScales)); std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales)); foundLocations.clear(); std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations)); foundWeights.clear(); std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights)); if ( useMeanshiftGrouping ) { groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize); } else { groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2); } }
其中得到HogCache也是重要的一环:
独立为init函数:
HOGCache::HOGCache(const HOGDescriptor* _descriptor, const Mat& _img, Size _paddingTL, Size _paddingBR, bool _useCache, Size _cacheStride) { init(_descriptor, _img, _paddingTL, _paddingBR, _useCache, _cacheStride); } void HOGCache::init(const HOGDescriptor* _descriptor, const Mat& _img, Size _paddingTL, Size _paddingBR, bool _useCache, Size _cacheStride) { descriptor = _descriptor; cacheStride = _cacheStride; useCache = _useCache; descriptor->computeGradient(_img, grad, qangle, _paddingTL, _paddingBR); imgoffset = _paddingTL; winSize = descriptor->winSize; Size blockSize = descriptor->blockSize; Size blockStride = descriptor->blockStride; Size cellSize = descriptor->cellSize; int i, j, nbins = descriptor->nbins; int rawBlockSize = blockSize.width*blockSize.height; nblocks = Size((winSize.width - blockSize.width)/blockStride.width + 1, (winSize.height - blockSize.height)/blockStride.height + 1); ncells = Size(blockSize.width/cellSize.width, blockSize.height/cellSize.height); blockHistogramSize = ncells.width*ncells.height*nbins; if( useCache ) { Size cacheSize((grad.cols - blockSize.width)/cacheStride.width+1, (winSize.height/cacheStride.height)+1); blockCache.create(cacheSize.height, cacheSize.width*blockHistogramSize); blockCacheFlags.create(cacheSize); size_t cacheRows = blockCache.rows; ymaxCached.resize(cacheRows); for(size_t ii = 0; ii < cacheRows; ii++ ) ymaxCached[ii] = -1; } Mat_<float> weights(blockSize); float sigma = (float)descriptor->getWinSigma(); float scale = 1.f/(sigma*sigma*2); for(i = 0; i < blockSize.height; i++) for(j = 0; j < blockSize.width; j++) { float di = i - blockSize.height*0.5f; float dj = j - blockSize.width*0.5f; weights(i,j) = std::exp(-(di*di + dj*dj)*scale); } blockData.resize(nblocks.width*nblocks.height); pixData.resize(rawBlockSize*3); // Initialize 2 lookup tables, pixData & blockData. // Here is why: // // The detection algorithm runs in 4 nested loops (at each pyramid layer): // loop over the windows within the input image // loop over the blocks within each window // loop over the cells within each block // loop over the pixels in each cell // // As each of the loops runs over a 2-dimensional array, // we could get 8(!) nested loops in total, which is very-very slow. // // To speed the things up, we do the following: // 1. loop over windows is unrolled in the HOGDescriptor::{compute|detect} methods; // inside we compute the current search window using getWindow() method. // Yes, it involves some overhead (function call + couple of divisions), // but it's tiny in fact. // 2. loop over the blocks is also unrolled. Inside we use pre-computed blockData[j] // to set up gradient and histogram pointers. // 3. loops over cells and pixels in each cell are merged // (since there is no overlap between cells, each pixel in the block is processed once) // and also unrolled. Inside we use PixData[k] to access the gradient values and // update the histogram // count1 = count2 = count4 = 0; for( j = 0; j < blockSize.width; j++ ) for( i = 0; i < blockSize.height; i++ ) { PixData* data = 0; float cellX = (j+0.5f)/cellSize.width - 0.5f; float cellY = (i+0.5f)/cellSize.height - 0.5f; int icellX0 = cvFloor(cellX); int icellY0 = cvFloor(cellY); int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1; cellX -= icellX0; cellY -= icellY0; if( (unsigned)icellX0 < (unsigned)ncells.width && (unsigned)icellX1 < (unsigned)ncells.width ) { if( (unsigned)icellY0 < (unsigned)ncells.height && (unsigned)icellY1 < (unsigned)ncells.height ) { data = &pixData[rawBlockSize*2 + (count4++)]; data->histOfs[0] = (icellX0*ncells.height + icellY0)*nbins; data->histWeights[0] = (1.f - cellX)*(1.f - cellY); data->histOfs[1] = (icellX1*ncells.height + icellY0)*nbins; data->histWeights[1] = cellX*(1.f - cellY); data->histOfs[2] = (icellX0*ncells.height + icellY1)*nbins; data->histWeights[2] = (1.f - cellX)*cellY; data->histOfs[3] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[3] = cellX*cellY; } else { data = &pixData[rawBlockSize + (count2++)]; if( (unsigned)icellY0 < (unsigned)ncells.height ) { icellY1 = icellY0; cellY = 1.f - cellY; } data->histOfs[0] = (icellX0*ncells.height + icellY1)*nbins; data->histWeights[0] = (1.f - cellX)*cellY; data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[1] = cellX*cellY; data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[2] = data->histWeights[3] = 0; } } else { if( (unsigned)icellX0 < (unsigned)ncells.width ) { icellX1 = icellX0; cellX = 1.f - cellX; } if( (unsigned)icellY0 < (unsigned)ncells.height && (unsigned)icellY1 < (unsigned)ncells.height ) { data = &pixData[rawBlockSize + (count2++)]; data->histOfs[0] = (icellX1*ncells.height + icellY0)*nbins; data->histWeights[0] = cellX*(1.f - cellY); data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[1] = cellX*cellY; data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[2] = data->histWeights[3] = 0; } else { data = &pixData[count1++]; if( (unsigned)icellY0 < (unsigned)ncells.height ) { icellY1 = icellY0; cellY = 1.f - cellY; } data->histOfs[0] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[0] = cellX*cellY; data->histOfs[1] = data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[1] = data->histWeights[2] = data->histWeights[3] = 0; } } data->gradOfs = (grad.cols*i + j)*2; data->qangleOfs = (qangle.cols*i + j)*2; data->gradWeight = weights(i,j); } assert( count1 + count2 + count4 == rawBlockSize ); // defragment pixData for( j = 0; j < count2; j++ ) pixData[j + count1] = pixData[j + rawBlockSize]; for( j = 0; j < count4; j++ ) pixData[j + count1 + count2] = pixData[j + rawBlockSize*2]; count2 += count1; count4 += count2; // initialize blockData for( j = 0; j < nblocks.width; j++ ) for( i = 0; i < nblocks.height; i++ ) { BlockData& data = blockData[j*nblocks.height + i]; data.histOfs = (j*nblocks.height + i)*blockHistogramSize; data.imgOffset = Point(j*blockStride.width,i*blockStride.height); } }
总结:
以上大致为HOG检测计算大致的函数调用堆栈。