BoW算法及DBoW2库简介(二)
一、BoW算法
用OpenCV实现了最简单的BoW算法进行了一次小规模的图像检索任务,使用UKbench数据库,算法原理和网上的描述差不多,使用K-means算法进行聚类,这里使用KDTree算法进行特征量化,按照自己的理解计算了TF-IDF权重,使用余弦距离计算图像之间的相似性。下面给出关键函数依赖于OpenCV的实现:
如TF-IDF权重的计算,这里只是按照自己的理解实现了算法,有的地方传参不是很合适,不过不影响效果:
std::vector<double> compute_TF(cv::Mat& descriptors, cv::Mat& labels) { std::vector<double> tf(Num_clu, 0.0); for (int i = 0; i < descriptors.rows; i++) { tf[labels.at<int>(i)] ++; } for (unsigned int i = 0; i < tf.size(); i++) { tf[i] /= (float)descriptors.rows; } return tf; } std::vector<double> comput_IDF(std::vector<cv::Mat>& descriptors, std::vector<cv::Mat> &labels) { std::vector<double> idf(Num_clu, 1.0); for (unsigned int i = 0; i < descriptors.size(); i++) { std::vector<int> idf_tmp(Num_clu, 0); for (int j = 0; j < descriptors[i].rows; j++) { idf_tmp[labels[i].at<int>(j)] ++; } for (unsigned int j = 0; j < idf_tmp.size(); j++) { if (idf_tmp[j] != 0) idf[j] ++; } } for (unsigned int i = 0; i < idf.size(); i++) { idf[i] = log(Num_img / idf[i]); } return idf; }
有一点需要注意,这里的IDF应该是只计算一次,而TF则是对每一幅图像计算一次。
有了TF-IDF函数的实现就可以计算BoW向量了,首先是计算训练图像的BoW向量:
cv::Mat TrainingBowVector(cv::Mat & centers, std::vector<double>& IDF) { cv::SurfFeatureDetector detector; cv::SurfDescriptorExtractor extractor; char image_name[50]; std::vector<cv::Mat> descriptor_all; descriptor_all.reserve(Num_img); //Find the keypoints and compute the descriptors; for (int i = 1; i <= Num_img; i++) { std::cout << "I:" << i << std::endl; sprintf_s(image_name, "D:\\DataBase\\UKbench\\TestImage\\%d.jpg", i); cv::Mat image = cv::imread(image_name, 0); std::vector<cv::KeyPoint> keypoints; cv::Mat descriptors; detector.detect(image, keypoints); std::cout << "Keypoints:" << keypoints.size() << std::endl; extractor.compute(image, keypoints, descriptors); descriptor_all.push_back(descriptors); } //Get the training descriptors; std::cout << "Get the training descriptors." << std::endl; cv::Mat descriptor_train; for (int j = 0; j < Num_tra; j++) descriptor_train.push_back(descriptor_all[j]); cv::Mat labels_k; cv::kmeans(descriptor_train, Num_clu, labels_k, cv::TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 100, 0.01) , 3, cv::KMEANS_PP_CENTERS, centers); const int tk = 1, Emax = INT_MAX; cv::KDTree T(centers, false); std::vector<cv::Mat> labels(Num_img); for (int i = 0; i < Num_img; i++) { cv::Mat descriptor_img = descriptor_all[i]; for (int j = 0; j < descriptor_img.rows; j++) { std::vector<float> desc_vec(descriptor_img.row(j)); std::vector<int> idx_tmp(tk); T.findNearest(desc_vec, tk, Emax, idx_tmp, cv::noArray(), cv::noArray()); labels[i].push_back(idx_tmp[0]); } } std::cout << "Compute the TF-IDF." << std::endl; cv::Mat BowVec; //Compute the TF-IDF for each image; IDF = comput_IDF(descriptor_all, labels); for (int i = 0; i < Num_img; i++) { std::vector<double> TF = compute_TF(descriptor_all[i], labels[i]); cv::Mat BowVec_tmp; for (unsigned int j = 0; j < IDF.size(); j++) { BowVec_tmp.push_back(TF[j] * IDF[j]); //BowVec_tmp.push_back(TF[j]); } BowVec_tmp = BowVec_tmp.t(); cv::normalize(BowVec_tmp, BowVec_tmp); BowVec.push_back(BowVec_tmp); } return BowVec; }
计算测试图片的BoW向量和上面类似。有了训练图像和测试图像的BoW向量就可以根据余弦距离计算相似度了,最后使用堆排序获得最相似的图像ID。
而Vocabuliary Tree算法的代码实现和上面的不同点在于码书的训练方式。
二、DBoW2库的使用
使用DBoW2库训练码书,并根据bow打分完成图像检索,根据正向索引完成特征匹配,在ORB里面没注意到倒排索引加速图像检索的部分。
首先是码书的训练(“盗用”代码:http://www.cnblogs.com/jian-li/p/5666556.html):
#include <iostream> #include <vector> #include "Thirdparty/DBoW2/DBoW2/FORB.h" #include "Thirdparty/DBoW2/DBoW2/TemplatedVocabulary.h" // OpenCV #include <opencv2/opencv.hpp> #include "opencv2/core/core.hpp" #include <opencv/cv.h> #include <opencv/highgui.h> #include <opencv2/nonfree/features2d.hpp> // ROS #include <rosbag/bag.h> #include <rosbag/view.h> #include <ros/ros.h> #include <sensor_msgs/Image.h> #include <boost/foreach.hpp> #include <cv_bridge/cv_bridge.h> #include "ORBextractor.h" #include <dirent.h> #include <string.h> using namespace DBoW2; using namespace DUtils; using namespace std; using namespace ORB_SLAM; // - - - - - --- - - - -- - - - - - /// ORB Vocabulary typedef DBoW2::TemplatedVocabulary<DBoW2::FORB::TDescriptor, DBoW2::FORB> ORBVocabulary; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void extractORBFeatures(cv::Mat &image, vector<vector<cv::Mat> > &features, ORBextractor* extractor); void changeStructureORB( const cv::Mat &descriptor,vector<bool> &mask, vector<cv::Mat> &out); void isInImage(vector<cv::KeyPoint> &keys, float &cx, float &cy, float &rMin, float &rMax, vector<bool> &mask); void createVocabularyFile(ORBVocabulary &voc, std::string &fileName, const vector<vector<cv::Mat> > &features); // ---------------------------------------------------------------------------- int main() { //Extracting ORB features from image folder vector<std::string> filenames; std::string folder = "/home/saodiseng/FRONTAL/"; cv::glob(folder, filenames); // initialze ORBextractor int nLevels = 5;//6; ORBextractor* extractor = new ORBextractor(1000,1.2,nLevels,1,20); int nImages = filenames.size(); vector<vector<cv::Mat > > features; features.clear(); features.reserve(nImages); cv::Mat image; cout << "> Extracting Features from " << nImages << " images..." << endl; for(int i = 0; i < nImages; ++i) { std::cout << "Processing the " << i <<" image " << std::endl; cv::Mat src = cv::imread(filenames[i]); imshow("View", src); cv::waitKey(1); if (!src.empty()) { cv::cvtColor(src, image, CV_RGB2GRAY); extractORBFeatures(image, features, extractor); } } cout << "... Extraction done!" << endl; // Creating the Vocabulary // define vocabulary const int k = 10; // branching factor const WeightingType weight = TF_IDF; const ScoringType score = L1_NORM; ORBVocabulary voc(k, nLevels, weight, score); std::string vociName = "vociOmni.txt"; createVocabularyFile(voc, vociName, features); cout << "--- THE END ---" << endl; return 0; } // ---------------------------------------------------------------------------- void extractORBFeatures(cv::Mat &image, vector<vector<cv::Mat> > &features, ORBextractor* extractor) { vector<cv::KeyPoint> keypoints; cv::Mat descriptorORB; (*extractor)(image, cv::Mat(), keypoints, descriptorORB); // reject features outside region of interest vector<bool> mask; float cx = 0; float cy = 0; float rMin = 0; float rMax = 0; isInImage(keypoints, cx, cy, rMin, rMax, mask); // create descriptor vector for the vocabulary features.push_back(vector<cv::Mat>()); changeStructureORB(descriptorORB, mask, features.back()); imshow("ORBFeature", features.back().back()); } // ---------------------------------------------------------------------------- void changeStructureORB( const cv::Mat &descriptor,vector<bool> &mask, vector<cv::Mat> &out) { for (int i = 0; i < descriptor.rows; i++) { if(mask[i]) { out.push_back(descriptor.row(i)); } } } // ---------------------------------------------------------------------------- void isInImage(vector<cv::KeyPoint> &keys, float &cx, float &cy, float &rMin, float &rMax, vector<bool> &mask) { int N = keys.size(); mask = vector<bool>(N, false); int num = 0; for(int i=0; i<N; i++) { cv::KeyPoint kp = keys[i]; float u = kp.pt.x; float v = kp.pt.y; if(u>20 && u<320-20 && v>20 && v<240-20) { mask[i] = true; num ++; } } std::cout << "In image number " << num << std::endl; } // ---------------------------------------------------------------------------- void createVocabularyFile(ORBVocabulary &voc, std::string &fileName, const vector<vector<cv::Mat> > &features) { cout << "> Creating vocabulary. May take some time ..." << endl; voc.create(features); cout << "... done!" << endl; cout << "> Vocabulary information: " << endl << voc << endl << endl; // save the vocabulary to disk cout << endl << "> Saving vocabulary..." << endl; voc.saveToTextFile(fileName); cout << "... saved to file: " << fileName << endl; }
也可以直接使用ORB给定的码书。
再下面就是训练BoW向量并计算打分:
void FrameRecog::ComputeBoW() { //数据类型转换; vector<cv::Mat>vFrDesc = Converter::toDescriptorVector(Descriptors); //BowVec为BoW特征向量,FeatVec为正向索引; pORBVocabulary->transform(vFrDesc, BowVec, FeatVec, 4); } float score = pORBVocabulary->score(BowVec, vBowVec[i]);
ComputeBoW()函数计算了当前帧的BowVec向量,以及它的第4层正向索引值FeatVec。下面一句即计算了两个BoW向量的相似性打分。当打分满足某个阈值之后,还需要通过正向索引值进行特征匹配:
int FrameRecog::FeatMatchByBoW( const int idx ) { int nmatches = 0; const int TH_LOW = 50; const int HISTO_LENGTH = 30; const int factor = 1.0f/HISTO_LENGTH; const DBoW2::FeatureVector &vFeatVecTD = vFeatVec[idx]; const DBoW2::FeatureVector &vFeatVecCD = FeatVec; DBoW2::FeatureVector::const_iterator TDit = vFeatVecTD.begin(); DBoW2::FeatureVector::const_iterator CDit = vFeatVecCD.begin(); DBoW2::FeatureVector::const_iterator TDend= vFeatVecTD.end(); DBoW2::FeatureVector::const_iterator CDend= vFeatVecCD.end(); while( TDit != TDend && CDit != CDend ) { //first为单词的索引,second则对应为该单词索引下的ORB特征集合; if( TDit->first == CDit->first) { //second是要循环的对象 const vector<unsigned int> vIndicesTD = TDit->second; const vector<unsigned int> vIndicesCD = CDit->second; //循环关键帧和当前帧对应单词下的特征集合,计算相似性; for ( size_t iTD = 0; iTD < vIndicesTD.size(); iTD ++ ) { const unsigned int realIdxTD = vIndicesTD[iTD]; const cv::Mat &dTD = vDescriptors[idx].row(realIdxTD); int bestDist1 = 256; int bestIdxF = -1; int bestDist2 = 256; for ( size_t iCD = 0; iCD < vIndicesCD.size(); iCD ++ ) { const unsigned int realIdxCD = vIndicesCD[iCD]; const cv::Mat &dCD = Descriptors.row(realIdxCD); const int dist = DescriptorDistance(dTD, dCD); //这里注意是双阈值; if( dist < bestDist1 ) { bestDist2 = bestDist1; bestDist1 = dist; bestIdxF = realIdxCD; } else if( dist < bestDist2 ) { bestDist2 = dist; } } //这里有两个输入参数,一个是TH_LOW,是指两个特征的最小距离阈值; //第二个是0.95,它是指相似特征的最小距离小于第二小距离的百分之九十五; //第二个参数的含义是,当该参数越接近于1时,该式越接近于成立,而越小时说明要求越高, //即最小距离远大于第二小距离,所以两特征是相似特征的概率非常大 if(bestDist1 <= TH_LOW) { if( static_cast<float>(bestDist1)<0.95 * static_cast<float>(bestDist2)) nmatches ++; } } TDit ++; CDit ++; } else if( TDit->first < CDit->first ) { TDit = vFeatVecTD.lower_bound(CDit->first); } else { CDit = vFeatVecCD.lower_bound(TDit->first); } } //原函数中还有特征对应的3D地图点的输出,以及根据ORB特征的主方向进一步判断特征是否相似的代码,这里略去; return nmatches; }
int FrameRecog::DescriptorDistance(const cv::Mat &a, const cv::Mat &b) { const int *pa = a.ptr<int32_t>(); const int *pb = b.ptr<int32_t>(); int dist = 0; for ( int i = 0; i < 8; i ++, pa ++, pb ++ ) { unsigned int v = *pa ^ *pb; v = v - ((v>>1) & 0x55555555); v = (v & 0x33333333) + ((v >> 2) & 0x33333333); dist += (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; } return dist; }
上面的源文件在ORBmatches.cc中的
int ORBmatcher::SearchByBoW(KeyFrame* pKF,Frame &F, vector<MapPoint*> &vpMapPointMatches) 函数中。即通过正向索引给出特征匹配数或匹配的特征以及对应的3D点。