OCR (Optical Character Recognition,光学字符识别),我们这个练习就是对OCR英文字母进行识别。得到一张OCR图片后,提取出字符相关的ROI图像,并且大小归一化,整个图像的像素值序列可以直接作为特征。但直接将整个图像作为特征数据维度太高,计算量太大,所以也可以进行一些降维处理,减少输入的数据量。
处理过程一般这样:先对原图像进行裁剪,得到字符的ROI图像,二值化。然后将图像分块,统计每个小块中非0像素的个数,这样就形成了一个较小的矩阵,这矩阵就是新的特征了。opencv为我们提供了一些这样的数据,放在\opencv\sources\samples\data\letter-recognition.data文件打开就是下面图片:
每一行代表一个样本。第一列大写的字母,就是标注,随后的16列就是该字母的特征向量。这个文件中总共有20000行样本,共分类26类(26个字母)。
我们将这些数据读取出来后,分成两部分,第一部分16000个样本作为训练样本,训练出分类器后,对这16000个训练数据和余下的4000个数据分别进行测试,得到训练精度和测试精度。其中adaboost比较特殊一点,训练和测试样本各为10000.
1 #include "stdafx.h" 2 #include "opencv2\opencv.hpp" 3 #include <iostream> 4 using namespace std; 5 using namespace cv; 6 using namespace cv::ml; 7 8 // 读取文件数据 9 bool read_num_class_data(const string& filename, int var_count,Mat* _data, Mat* _responses) 10 { 11 const int M = 1024; 12 char buf[M + 2]; 13 14 Mat el_ptr(1, var_count, CV_32F); 15 int i; 16 vector<int> responses; 17 18 _data->release(); 19 _responses->release(); 20 FILE *f; 21 fopen_s(&f, filename.c_str(), "rt"); 22 if (!f) 23 { 24 cout << "Could not read the database " << filename << endl; 25 return false; 26 } 27 28 for (;;) 29 { 30 char* ptr; 31 if (!fgets(buf, M, f) || !strchr(buf, ',')) 32 break; 33 responses.push_back((int)buf[0]); 34 ptr = buf + 2; 35 for (i = 0; i < var_count; i++) 36 { 37 int n = 0; 38 sscanf_s(ptr, "%f%n", &el_ptr.at<float>(i), &n); 39 ptr += n + 1; 40 } 41 if (i < var_count) 42 break; 43 _data->push_back(el_ptr); 44 } 45 fclose(f); 46 Mat(responses).copyTo(*_responses); 47 return true; 48 } 49 50 51 //准备训练数据 52 Ptr<TrainData> prepare_train_data(const Mat& data, const Mat& responses, int ntrain_samples) 53 { 54 Mat sample_idx = Mat::zeros(1, data.rows, CV_8U); 55 Mat train_samples = sample_idx.colRange(0, ntrain_samples); 56 train_samples.setTo(Scalar::all(1)); 57 58 int nvars = data.cols; 59 Mat var_type(nvars + 1, 1, CV_8U); 60 var_type.setTo(Scalar::all(VAR_ORDERED)); 61 var_type.at<uchar>(nvars) = VAR_CATEGORICAL; 62 63 return TrainData::create(data, ROW_SAMPLE, responses, 64 noArray(), sample_idx, noArray(), var_type); 65 } 66 67 //设置迭代条件 68 inline TermCriteria TC(int iters, double eps) 69 { 70 return TermCriteria(TermCriteria::MAX_ITER + (eps > 0 ? TermCriteria::EPS : 0), iters, eps); 71 } 72 73 //分类预测 74 void test_and_save_classifier(const Ptr<StatModel>& model, const Mat& data, const Mat& responses, 75 int ntrain_samples, int rdelta) 76 { 77 int i, nsamples_all = data.rows; 78 double train_hr = 0, test_hr = 0; 79 80 // compute prediction error on train and test data 81 for (i = 0; i < nsamples_all; i++) 82 { 83 Mat sample = data.row(i); 84 85 float r = model->predict(sample); 86 r = std::abs(r + rdelta - responses.at<int>(i)) <= FLT_EPSILON ? 1.f : 0.f; 87 88 if (i < ntrain_samples) 89 train_hr += r; 90 else 91 test_hr += r; 92 } 93 94 test_hr /= nsamples_all - ntrain_samples; 95 train_hr = ntrain_samples > 0 ? train_hr / ntrain_samples : 1.; 96 97 printf("Recognition rate: train = %.1f%%, test = %.1f%%\n", 98 train_hr*100., test_hr*100.); 99 } 100 101 //随机树分类 102 bool build_rtrees_classifier(const string& data_filename) 103 { 104 Mat data; 105 Mat responses; 106 read_num_class_data(data_filename, 16, &data, &responses); 107 108 int nsamples_all = data.rows; 109 int ntrain_samples = (int)(nsamples_all*0.8); 110 111 Ptr<RTrees> model; 112 Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples); 113 model = RTrees::create(); 114 model->setMaxDepth(10); 115 model->setMinSampleCount(10); 116 model->setRegressionAccuracy(0); 117 model->setUseSurrogates(false); 118 model->setMaxCategories(15); 119 model->setPriors(Mat()); 120 model->setCalculateVarImportance(true); 121 model->setActiveVarCount(4); 122 model->setTermCriteria(TC(100, 0.01f)); 123 model->train(tdata); 124 test_and_save_classifier(model, data, responses, ntrain_samples, 0); 125 cout << "Number of trees: " << model->getRoots().size() << endl; 126 127 // Print variable importance 128 Mat var_importance = model->getVarImportance(); 129 if (!var_importance.empty()) 130 { 131 double rt_imp_sum = sum(var_importance)[0]; 132 printf("var#\timportance (in %%):\n"); 133 int i, n = (int)var_importance.total(); 134 for (i = 0; i < n; i++) 135 printf("%-2d\t%-4.1f\n", i, 100.f*var_importance.at<float>(i) / rt_imp_sum); 136 } 137 138 return true; 139 } 140 141 //adaboost分类 142 bool build_boost_classifier(const string& data_filename) 143 { 144 const int class_count = 26; 145 Mat data; 146 Mat responses; 147 Mat weak_responses; 148 149 read_num_class_data(data_filename, 16, &data, &responses); 150 int i, j, k; 151 Ptr<Boost> model; 152 153 int nsamples_all = data.rows; 154 int ntrain_samples = (int)(nsamples_all*0.5); 155 int var_count = data.cols; 156 157 Mat new_data(ntrain_samples*class_count, var_count + 1, CV_32F); 158 Mat new_responses(ntrain_samples*class_count, 1, CV_32S); 159 160 for (i = 0; i < ntrain_samples; i++) 161 { 162 const float* data_row = data.ptr<float>(i); 163 for (j = 0; j < class_count; j++) 164 { 165 float* new_data_row = (float*)new_data.ptr<float>(i*class_count + j); 166 memcpy(new_data_row, data_row, var_count*sizeof(data_row[0])); 167 new_data_row[var_count] = (float)j; 168 new_responses.at<int>(i*class_count + j) = responses.at<int>(i) == j + 'A'; 169 } 170 } 171 172 Mat var_type(1, var_count + 2, CV_8U); 173 var_type.setTo(Scalar::all(VAR_ORDERED)); 174 var_type.at<uchar>(var_count) = var_type.at<uchar>(var_count + 1) = VAR_CATEGORICAL; 175 176 Ptr<TrainData> tdata = TrainData::create(new_data, ROW_SAMPLE, new_responses, 177 noArray(), noArray(), noArray(), var_type); 178 vector<double> priors(2); 179 priors[0] = 1; 180 priors[1] = 26; 181 182 model = Boost::create(); 183 model->setBoostType(Boost::GENTLE); 184 model->setWeakCount(100); 185 model->setWeightTrimRate(0.95); 186 model->setMaxDepth(5); 187 model->setUseSurrogates(false); 188 model->setPriors(Mat(priors)); 189 model->train(tdata); 190 Mat temp_sample(1, var_count + 1, CV_32F); 191 float* tptr = temp_sample.ptr<float>(); 192 193 // compute prediction error on train and test data 194 double train_hr = 0, test_hr = 0; 195 for (i = 0; i < nsamples_all; i++) 196 { 197 int best_class = 0; 198 double max_sum = -DBL_MAX; 199 const float* ptr = data.ptr<float>(i); 200 for (k = 0; k < var_count; k++) 201 tptr[k] = ptr[k]; 202 203 for (j = 0; j < class_count; j++) 204 { 205 tptr[var_count] = (float)j; 206 float s = model->predict(temp_sample, noArray(), StatModel::RAW_OUTPUT); 207 if (max_sum < s) 208 { 209 max_sum = s; 210 best_class = j + 'A'; 211 } 212 } 213 214 double r = std::abs(best_class - responses.at<int>(i)) < FLT_EPSILON ? 1 : 0; 215 if (i < ntrain_samples) 216 train_hr += r; 217 else 218 test_hr += r; 219 } 220 221 test_hr /= nsamples_all - ntrain_samples; 222 train_hr = ntrain_samples > 0 ? train_hr / ntrain_samples : 1.; 223 printf("Recognition rate: train = %.1f%%, test = %.1f%%\n", 224 train_hr*100., test_hr*100.); 225 226 cout << "Number of trees: " << model->getRoots().size() << endl; 227 return true; 228 } 229 230 //多层感知机分类(ANN) 231 bool build_mlp_classifier(const string& data_filename) 232 { 233 const int class_count = 26; 234 Mat data; 235 Mat responses; 236 237 read_num_class_data(data_filename, 16, &data, &responses); 238 Ptr<ANN_MLP> model; 239 240 int nsamples_all = data.rows; 241 int ntrain_samples = (int)(nsamples_all*0.8); 242 Mat train_data = data.rowRange(0, ntrain_samples); 243 Mat train_responses = Mat::zeros(ntrain_samples, class_count, CV_32F); 244 245 // 1. unroll the responses 246 cout << "Unrolling the responses...\n"; 247 for (int i = 0; i < ntrain_samples; i++) 248 { 249 int cls_label = responses.at<int>(i) -'A'; 250 train_responses.at<float>(i, cls_label) = 1.f; 251 } 252 253 // 2. train classifier 254 int layer_sz[] = { data.cols, 100, 100, class_count }; 255 int nlayers = (int)(sizeof(layer_sz) / sizeof(layer_sz[0])); 256 Mat layer_sizes(1, nlayers, CV_32S, layer_sz); 257 258 #if 1 259 int method = ANN_MLP::BACKPROP; 260 double method_param = 0.001; 261 int max_iter = 300; 262 #else 263 int method = ANN_MLP::RPROP; 264 double method_param = 0.1; 265 int max_iter = 1000; 266 #endif 267 268 Ptr<TrainData> tdata = TrainData::create(train_data, ROW_SAMPLE, train_responses); 269 model = ANN_MLP::create(); 270 model->setLayerSizes(layer_sizes); 271 model->setActivationFunction(ANN_MLP::SIGMOID_SYM, 0, 0); 272 model->setTermCriteria(TC(max_iter, 0)); 273 model->setTrainMethod(method, method_param); 274 model->train(tdata); 275 return true; 276 } 277 278 //K最近邻分类 279 bool build_knearest_classifier(const string& data_filename, int K) 280 { 281 Mat data; 282 Mat responses; 283 read_num_class_data(data_filename, 16, &data, &responses); 284 int nsamples_all = data.rows; 285 int ntrain_samples = (int)(nsamples_all*0.8); 286 287 Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples); 288 Ptr<KNearest> model = KNearest::create(); 289 model->setDefaultK(K); 290 model->setIsClassifier(true); 291 model->train(tdata); 292 293 test_and_save_classifier(model, data, responses, ntrain_samples, 0); 294 return true; 295 } 296 297 //贝叶斯分类 298 bool build_nbayes_classifier(const string& data_filename) 299 { 300 Mat data; 301 Mat responses; 302 read_num_class_data(data_filename, 16, &data, &responses); 303 304 int nsamples_all = data.rows; 305 int ntrain_samples = (int)(nsamples_all*0.8); 306 307 Ptr<NormalBayesClassifier> model; 308 Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples); 309 model = NormalBayesClassifier::create(); 310 model->train(tdata); 311 312 test_and_save_classifier(model, data, responses, ntrain_samples, 0); 313 return true; 314 } 315 316 317 //svm分类 318 bool build_svm_classifier(const string& data_filename) 319 { 320 Mat data; 321 Mat responses; 322 read_num_class_data(data_filename, 16, &data, &responses); 323 324 int nsamples_all = data.rows; 325 int ntrain_samples = (int)(nsamples_all*0.8); 326 327 Ptr<SVM> model; 328 Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples); 329 model = SVM::create(); 330 model->setType(SVM::C_SVC); 331 model->setKernel(SVM::LINEAR); 332 model->setC(1); 333 model->train(tdata); 334 335 test_and_save_classifier(model, data, responses, ntrain_samples, 0); 336 return true; 337 } 338 339 int main() 340 { 341 string data_filename = "E:/opencv/opencv/sources/samples/data/letter-recognition.data"; //字母数据 342 343 cout << "svm分类:" << endl; 344 build_svm_classifier(data_filename); 345 346 cout << "贝叶斯分类:" << endl; 347 build_nbayes_classifier(data_filename); 348 349 cout << "K最近邻分类:" << endl; 350 build_knearest_classifier(data_filename,10); 351 352 cout << "随机树分类:" << endl; 353 build_rtrees_classifier(data_filename); 354 355 //cout << "adaboost分类:" << endl; 356 //build_boost_classifier(data_filename); 357 358 //cout << "ANN(多层感知机)分类:" << endl; 359 //build_mlp_classifier(data_filename); 360 }