OpenMP性能优化方案
#include <iostream>
#include <windows.h>
using namespace std;
struct Count
{
long count1;
char space[64]; // 此处增加64字节的“无用”空间,目的就是使得count1和count2位于不同的Cache行,从而避免Cache行同步引起的“乒乓效应”,可以尝试去掉这64字节的“无用”空间,将会得到令人诧异的运行结果,要探明深层原因,可以搜索一下“乒乓效应 CPU cache”
long count2;
};
DWORD WINAPI Calculate1(void *p)
{
double x = 0,y = 0;
long* c1 = (long*)p;
for(; x <= 0.5;x += 0.0001)
for(y = 0;y <= 1;y += 0.0001)
{
if (x*x + y*y <= 1) (*c1)++;
}
return 0;
}
DWORD WINAPI Calculate2(void *p)
{
double i = 0,j = 0;//16
long* c2 = (long*)p;
for(i=0.5001;i <=1;i+=0.0001)
for(j=0;j <=1;j+=0.0001)
{
if(i*i+j*j <=1) (*c2)++;
}
return 0;
}
int main()
{
DWORD pThreadID;
Count cnt;
DWORD startTime = 0,endTime = 0;
startTime= GetTickCount();
Calculate1(&cnt.count1);
Calculate2(&cnt.count2);
cout << "Serial calculate cost " << GetTickCount() - startTime << "ms." << endl;
cnt.count1 = 0;
cnt.count2 = 0;
startTime= GetTickCount();
HANDLE hThread1 = CreateThread (NULL,
0,
Calculate1,
&cnt.count1,
0,
&pThreadID);
HANDLE hThread2 = CreateThread (NULL,
0,
Calculate2,
&cnt.count2,
0,
&pThreadID);
WaitForSingleObject(hThread1, INFINITE);
WaitForSingleObject(hThread2, INFINITE);
endTime=GetTickCount();
cout << "Parallel calculate cost " << endTime-startTime << "ms." << endl;
}