OpenMP的效果
今天简单地把#pragma omp parallel for private(j)语句加在一个练习时做的求解Laplace方程的有限差分程序上,4万个点的网格上,在P4超线程的CPU上,使得365秒减少到314秒,50秒的差距呀。
ps:然而在使用SOR方法时,4万个点的网格,单线程只用了14秒,,以后有时间再研究一下高斯赛德尔的SOR多线程版本,看有什么改进。
附带程序:
1#include <omp.h>
2#include <iostream>
3#include <fstream>
4#include <cmath>
5#include <iomanip>
6using namespace std;
7//迭代终止误差
8const double eps=1e-7;
9//网格划分
10const int ndivx=200;
11const int ndivy=200;
12const int nNx=ndivx+1;
13const int nNy=ndivy+1;
14const double deltax=1.0/ndivx;
15const double deltay=1.0/ndivy;
16//常数
17const double PI=3.14159265;
18const double omiga=1.902;//松弛因子
19
20
21
22
23int main(void)
24{
25// omp_set_num_threads(2);
26 Timer tm;
27 tm.start();
28
29 int j,i;
30 double error=0;
31 double** uold=new double*[nNy];
32 double** unew=new double*[nNy];
33 for ( i=0;i<nNx;i++)
34 {
35 uold[i]=new double [nNx];
36 unew[i]=new double [nNx];
37 for ( j=0;j<nNx;j++)
38 {
39 uold[i][j]=unew[i][j]=0;
40 }
41 }
42
43 //边界条件
44 for ( i=0;i<nNx;i++)
45 {
46 uold[0][i]=unew[0][i]=10+cos(i*PI*deltax);
47 uold[nNy-1][i]=unew[nNy-1][i]=10+cos(i*PI*deltax);
48 }
49
50 for ( i=0;i<nNy;i++)
51 {
52 uold[i][0]=unew[i][0]=10+cos(i*PI*deltay);
53 uold[i][nNx-1]=unew[i][nNx-1]=10+cos(i*PI*deltay);
54 }
55
56 int time=0;
57 double ukn=-1+2*(1.0/(deltax*deltax)+1.0/(deltay*deltay));
58 do{
59
60#pragma omp parallel for private(j)
61 for ( i=1;i<nNy-1;i++)
62 {
63 for ( j=1;j<nNx-1;j++)
64 {
65//jacobi
66 unew[i][j]=1.0/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
67 +(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*uold[i-1][j]
68 +(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
69 +(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*uold[i][j-1] );
70//sor
71/* unew[i][j]=(1-omiga)*uold[i][j]
72 +omiga/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
73 +(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*unew[i-1][j]
74 +(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
75 +(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*unew[i][j-1] );*/
76 }
77 }
78 double sum=0;
79 for ( i=1;i<nNy-1;i++)
80 {
81 for ( j=1;j<nNx-1;j++)
82 {
83 sum+=(uold[i][j]-unew[i][j])*(uold[i][j]-unew[i][j]);
84 }
85 }
86 error=sqrt(sum);
87
88 for ( i=0;i<nNy;i++)
89 {
90 for ( j=0;j<nNx;j++)
91 {
92 uold[i][j]=unew[i][j];
93 }
94 }
95 // if(time%100==0)
96// cout<<++time<<"\t\t"<<error<<endl;
97 if (time>=100000)
98 {
99 break;
100 }
101 }while(error>eps);
102
103 ofstream tecplot("tec.plt");
104 tecplot<<"zone i="<<nNy<<",j="<<nNx<<",f=point\n";
105 for ( i=0;i<nNy;i++)
106 {
107 for ( j=0;j<nNx;j++)
108 {
109 tecplot << setw(15) << i*deltay
110 << setw(15) << j*deltax
111 << setw(15) << uold[i][j]<<"\n";
112 }
113 }
114 tecplot.close();
115 for ( i=0;i<nNy;i++)
116 {
117 delete[] uold[i];
118 delete[] unew[i];
119 }
120 delete[] uold;
121 delete[] unew;
122 tm.stop();
123 tm.diplayelapsedtime();
124 return 0;
125}
2#include <iostream>
3#include <fstream>
4#include <cmath>
5#include <iomanip>
6using namespace std;
7//迭代终止误差
8const double eps=1e-7;
9//网格划分
10const int ndivx=200;
11const int ndivy=200;
12const int nNx=ndivx+1;
13const int nNy=ndivy+1;
14const double deltax=1.0/ndivx;
15const double deltay=1.0/ndivy;
16//常数
17const double PI=3.14159265;
18const double omiga=1.902;//松弛因子
19
20
21
22
23int main(void)
24{
25// omp_set_num_threads(2);
26 Timer tm;
27 tm.start();
28
29 int j,i;
30 double error=0;
31 double** uold=new double*[nNy];
32 double** unew=new double*[nNy];
33 for ( i=0;i<nNx;i++)
34 {
35 uold[i]=new double [nNx];
36 unew[i]=new double [nNx];
37 for ( j=0;j<nNx;j++)
38 {
39 uold[i][j]=unew[i][j]=0;
40 }
41 }
42
43 //边界条件
44 for ( i=0;i<nNx;i++)
45 {
46 uold[0][i]=unew[0][i]=10+cos(i*PI*deltax);
47 uold[nNy-1][i]=unew[nNy-1][i]=10+cos(i*PI*deltax);
48 }
49
50 for ( i=0;i<nNy;i++)
51 {
52 uold[i][0]=unew[i][0]=10+cos(i*PI*deltay);
53 uold[i][nNx-1]=unew[i][nNx-1]=10+cos(i*PI*deltay);
54 }
55
56 int time=0;
57 double ukn=-1+2*(1.0/(deltax*deltax)+1.0/(deltay*deltay));
58 do{
59
60#pragma omp parallel for private(j)
61 for ( i=1;i<nNy-1;i++)
62 {
63 for ( j=1;j<nNx-1;j++)
64 {
65//jacobi
66 unew[i][j]=1.0/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
67 +(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*uold[i-1][j]
68 +(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
69 +(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*uold[i][j-1] );
70//sor
71/* unew[i][j]=(1-omiga)*uold[i][j]
72 +omiga/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
73 +(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*unew[i-1][j]
74 +(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
75 +(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*unew[i][j-1] );*/
76 }
77 }
78 double sum=0;
79 for ( i=1;i<nNy-1;i++)
80 {
81 for ( j=1;j<nNx-1;j++)
82 {
83 sum+=(uold[i][j]-unew[i][j])*(uold[i][j]-unew[i][j]);
84 }
85 }
86 error=sqrt(sum);
87
88 for ( i=0;i<nNy;i++)
89 {
90 for ( j=0;j<nNx;j++)
91 {
92 uold[i][j]=unew[i][j];
93 }
94 }
95 // if(time%100==0)
96// cout<<++time<<"\t\t"<<error<<endl;
97 if (time>=100000)
98 {
99 break;
100 }
101 }while(error>eps);
102
103 ofstream tecplot("tec.plt");
104 tecplot<<"zone i="<<nNy<<",j="<<nNx<<",f=point\n";
105 for ( i=0;i<nNy;i++)
106 {
107 for ( j=0;j<nNx;j++)
108 {
109 tecplot << setw(15) << i*deltay
110 << setw(15) << j*deltax
111 << setw(15) << uold[i][j]<<"\n";
112 }
113 }
114 tecplot.close();
115 for ( i=0;i<nNy;i++)
116 {
117 delete[] uold[i];
118 delete[] unew[i];
119 }
120 delete[] uold;
121 delete[] unew;
122 tm.stop();
123 tm.diplayelapsedtime();
124 return 0;
125}