opencv的并行操作parallel_for_的问题
第一次运行时,并行计算会比普通计算要快很多,如图1所示。可是第二次运行,两种方式所用时间基本差不多,有高手能指点下吗?
这段程序我是参考了下面这篇文章:
OpenCV并行加速Parallel_for_与ParallelLoopBody教程_void operator()(const range& range)-CSDN博客
#include <iostream> #include<opencv2/opencv.hpp> #include<opencv2/core/utility.hpp> using namespace cv; using namespace std; class PllMul:public ParallelLoopBody//并行计算类 { public: PllMul(const Mat& _src1,const Mat&_src2,Mat& _result) { int cn=_src1.channels(); _src1.convertTo(src1,CV_32FC(cn)); _src2.convertTo(src2,CV_32FC(cn)); _result.convertTo(result,CV_32FC(cn)); CV_Assert(src1.channels()==src2.channels() &&src1.rows==src2.rows &&src1.cols==src2.cols); rows=src1.rows; cols=src1.cols; } void operator()(const Range&range)const //重载括号()运算符 { int totalPixs=range.end-range.start; float *presult=(float*)(result.data); float *ps1 =(float*)(src1.data); float *ps2 =(float*)(src2.data); int cn=src1.channels(); for(int i=0;i<totalPixs;i++) { for(int k=0;k<cn;k++) presult[k]=ps1[k]*ps2[k]; presult+=cn; ps1 +=cn; ps2 +=cn; } } Mat result;//为了方便测试,设为了共有成员 private: Mat src1; Mat src2; int rows; int cols; }; class MyMul //普通计算类 { public: MyMul(const Mat& _src1,const Mat&_src2,Mat& _result) { int cn=_src1.channels(); _src1.convertTo(src1,CV_32FC(cn)); _src2.convertTo(src2,CV_32FC(cn)); _result.convertTo(result,CV_32FC(cn)); CV_Assert(src1.channels()==src2.channels() &&src1.rows==src2.rows &&src1.cols==src2.cols); rows=src1.rows; cols=src1.cols; } void operator()(const Range&range)const //重载括号()运算符 { int totalPixs=range.end-range.start; float *presult=(float*)(result.data); float *ps1 =(float*)(src1.data); float *ps2 =(float*)(src2.data); int cn=src1.channels(); for(int i=0;i<totalPixs;i++) { for(int k=0;k<cn;k++) presult[k]=ps1[k]*ps2[k]; presult+=cn; ps1 +=cn; ps2 +=cn; } } Mat result;//为了方便测试,设为了共有成员 private: Mat src1; Mat src2; int rows; int cols; }; void testMllWithoutParallel(Mat &s1,Mat&s2,Mat&result1)//普通计算模式测试 { result1=Mat(s1.size(),s1.type(),Scalar::all(0)); PllMul mul(s1,s2,result1); mul(Range(0,s1.rows*s1.cols));//调用括号()运算符函数 cout<<"OK!"<<endl; } void testPllMllWithParallel(Mat&s1,Mat&s2,Mat&result)//并行计算模式测试 { result=Mat(s1.size(),s1.type(),Scalar::all(0)); // PllMul mul(); cv::parallel_for_(Range(0,s1.rows*s1.cols),PllMul(s1,s2,result));//隐式调用括号()运算符函数 cout<<endl<<"OK!"<<endl; } int main() { Mat s1(1000,1000,CV_8UC3),s2(1000,1000,CV_8UC3); Mat result1,result2; randu(s1,0,10);randu(s2,0,10); clock_t start,stop; start=clock(); testMllWithoutParallel(s1,s2,result1); stop=clock(); cout<<"Running time using \'parallel for \':" << (double)(stop - start) / CLOCKS_PER_SEC * 1000 << "ms" << endl; start=clock(); testPllMllWithParallel(s1,s2,result2); stop=clock(); cout << "Running time using \'common call function \':" << (double)(stop - start) / CLOCKS_PER_SEC * 1000 << "ms" << endl; return 0; }
图1. 第一次运行结果
图2. 第二次运行结果