opencv的并行操作parallel_for_的问题

第一次运行时,并行计算会比普通计算要快很多,如图1所示。可是第二次运行,两种方式所用时间基本差不多,有高手能指点下吗?

这段程序我是参考了下面这篇文章:

OpenCV并行加速Parallel_for_与ParallelLoopBody教程_void operator()(const range& range)-CSDN博客

 

#include <iostream>
#include<opencv2/opencv.hpp>
#include<opencv2/core/utility.hpp>

using namespace cv;
using namespace std;

class PllMul:public ParallelLoopBody//并行计算类
{
public:
    PllMul(const Mat& _src1,const Mat&_src2,Mat& _result)
    {
        int cn=_src1.channels();
        _src1.convertTo(src1,CV_32FC(cn));
        _src2.convertTo(src2,CV_32FC(cn));
        _result.convertTo(result,CV_32FC(cn));

        CV_Assert(src1.channels()==src2.channels()
                  &&src1.rows==src2.rows
                  &&src1.cols==src2.cols);
        rows=src1.rows;
        cols=src1.cols;
    }
    void operator()(const Range&range)const //重载括号()运算符
    {
        int totalPixs=range.end-range.start;
        float *presult=(float*)(result.data);
        float *ps1    =(float*)(src1.data);
        float *ps2    =(float*)(src2.data);
        int cn=src1.channels();
        for(int i=0;i<totalPixs;i++)
        {
            for(int k=0;k<cn;k++)
                presult[k]=ps1[k]*ps2[k];

            presult+=cn;
            ps1    +=cn;
            ps2    +=cn;
        }
    }
    Mat result;//为了方便测试,设为了共有成员
private:
    Mat src1;
    Mat src2;
    int rows;
    int cols;
};
class MyMul             //普通计算类
{
public:
    MyMul(const Mat& _src1,const Mat&_src2,Mat& _result)
    {
        int cn=_src1.channels();
        _src1.convertTo(src1,CV_32FC(cn));
        _src2.convertTo(src2,CV_32FC(cn));
        _result.convertTo(result,CV_32FC(cn));

        CV_Assert(src1.channels()==src2.channels()
                  &&src1.rows==src2.rows
                  &&src1.cols==src2.cols);
        rows=src1.rows;
        cols=src1.cols;

    }
    void operator()(const Range&range)const //重载括号()运算符
    {
        int totalPixs=range.end-range.start;
        float *presult=(float*)(result.data);
        float *ps1    =(float*)(src1.data);
        float *ps2    =(float*)(src2.data);
        int cn=src1.channels();
        for(int i=0;i<totalPixs;i++)
        {
            for(int k=0;k<cn;k++)
                presult[k]=ps1[k]*ps2[k];

            presult+=cn;
            ps1    +=cn;
            ps2    +=cn;
        }
    }
    
    Mat result;//为了方便测试,设为了共有成员
private:
    Mat src1;
    Mat src2;
    int rows;
    int cols;
};
void testMllWithoutParallel(Mat &s1,Mat&s2,Mat&result1)//普通计算模式测试
{
    result1=Mat(s1.size(),s1.type(),Scalar::all(0));
    PllMul mul(s1,s2,result1);
    mul(Range(0,s1.rows*s1.cols));//调用括号()运算符函数
    cout<<"OK!"<<endl;
}
void testPllMllWithParallel(Mat&s1,Mat&s2,Mat&result)//并行计算模式测试
{
    result=Mat(s1.size(),s1.type(),Scalar::all(0));
//    PllMul mul();
    cv::parallel_for_(Range(0,s1.rows*s1.cols),PllMul(s1,s2,result));//隐式调用括号()运算符函数
    cout<<endl<<"OK!"<<endl;
}
int main()
{
    Mat s1(1000,1000,CV_8UC3),s2(1000,1000,CV_8UC3);
    Mat result1,result2;
    randu(s1,0,10);randu(s2,0,10);
    clock_t start,stop;

    start=clock();
    testMllWithoutParallel(s1,s2,result1);
    stop=clock();
    cout<<"Running time using \'parallel for \':" << (double)(stop - start) / CLOCKS_PER_SEC * 1000 << "ms" << endl;

    start=clock();
    testPllMllWithParallel(s1,s2,result2);
    stop=clock();
    cout << "Running time using \'common call function \':" << (double)(stop - start) / CLOCKS_PER_SEC * 1000 << "ms" << endl;

    return 0;
}

       图1. 第一次运行结果

 图2. 第二次运行结果

posted @ 2024-06-19 06:20  凤凰_1  阅读(42)  评论(0编辑  收藏  举报