openmp 和 thread 性能实测
#include <stdio.h> #include <iostream> #include <thread> #include<omp.h> //#include <opencv2/core.hpp> //#include <cv/cv_tools.h> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <opencv2/core/version.hpp> using namespace std; using namespace cv; #define TEST_BY_OPENMP void childFunc(Mat& pic, Mat &dst) { int w = pic.cols; int h = pic.rows; //do{ int sz = 3 * w*h; for (int i = 0; i != sz; ++i) { //int src_data = pic.data[i]; //float ss= 1.234556; //ss*=1.2; //src_data+=ss; //dst.data[i] = ((src_data >>5)<<5); dst.data[i] = ((pic.data[i] >> 5) << 5); } //}while(0); } vector<Mat> split(Mat& _src, int n) { int sz = _src.cols*_src.rows*3; vector<Mat> pics(n); for (int i = 0; i != n; ++i) { pics[i] = Mat(_src.rows/n,_src.cols,CV_8UC3, _src.data + i*sz/n ); } return pics; } int main() { const int LOOPS = 1e10; Mat src = imread("bg.jpg"); resize(src,src,Size(3840,2160)); Mat dst(src.size(),src.type()); vector<Mat> child_pics = split(src, 4); vector<Mat> child_pics_d = split(dst, 4); long long t0 = getTickCount(); for(int k=0;k!=LOOPS;++k){ #ifdef TEST_BY_OPENMP #pragma omp parallel num_threads(4) { int index = omp_get_thread_num(); childFunc(child_pics[index], child_pics_d[index]); }//7.3 ms cpu=60% for (int i = 0; i != 4; ++i) { childFunc(child_pics[i], child_pics_d[i]); }//6.97ms cpu=45% #else thread t[4]; for (int i = 0; i != 4; ++i) { t[i] = thread(childFunc, child_pics[i], child_pics_d[i]); } for (int i = 0; i != 4; ++i) { t[i].join(); }//21.32ms cpu=55% #endif }; long long t1 = getTickCount(); double time_waste = double(t1 - t0) / getTickFrequency(); time_waste /= LOOPS; printf("time waste=%.2f ms\n", time_waste * 1000); imshow("test", dst); waitKey(0); }
发现好像确实不能使用thread 分片处理图像数据 处理时间太慢 不知道为什么....可能是数据竞争的原因吧。。
openmp速度更快一些