| #include <stdlib.h> |
| #include <iostream> |
| #include <math.h> |
| #include "../headers/global.h" |
| #include "../headers/sphkernel.h" |
| |
| using namespace std; |
| |
| |
| void getPairwiseSeparations(double** &pos) |
| { |
| |
| |
| |
| |
| #if defined(OPT_BASE) && (defined(OPT_SIMD)||defined(OPT_OMP)) |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) |
| { |
| #ifndef OPT_SIMD |
| double temp1 = pos[0][i]; |
| double temp2 = pos[1][i]; |
| double temp3 = pos[2][i]; |
| for (int j = 0; j < N; j++) |
| { |
| |
| dx[i][j] = pos[0][j] - temp1; |
| dy[i][j] = pos[1][j] - temp2; |
| dz[i][j] = pos[2][j] - temp3; |
| } |
| #else |
| float64x2_t v0 = vld1q_dup_f64(&pos[0][i]); |
| float64x2_t v1 = vld1q_dup_f64(&pos[1][i]); |
| float64x2_t v2 = vld1q_dup_f64(&pos[2][i]); |
| for(int j=0;j<N/2*2;j+=2) |
| { |
| float64x2_t v0_0 = vld1q_f64(&pos[0][j]); |
| float64x2_t v1_0 = vld1q_f64(&pos[1][j]); |
| float64x2_t v2_0 = vld1q_f64(&pos[2][j]); |
| vst1q_f64(&dx[i][j],vsubq_f64(v0_0,v0)); |
| vst1q_f64(&dy[i][j],vsubq_f64(v1_0,v1)); |
| vst1q_f64(&dz[i][j],vsubq_f64(v2_0,v2)); |
| } |
| for (int j = N/2*2; j < N; j++) |
| { |
| dx[i][j] = pos[0][j] - pos[0][i]; |
| dy[i][j] = pos[1][j] - pos[1][i]; |
| dz[i][j] = pos[2][j] - pos[2][i]; |
| } |
| #endif |
| } |
| #else |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) |
| { |
| for (int j = 0; j < N; j++) |
| { |
| |
| dx[i][j] = pos[0][j] - pos[0][i]; |
| dy[i][j] = pos[1][j] - pos[1][i]; |
| dz[i][j] = pos[2][j] - pos[2][i]; |
| |
| |
| } |
| |
| } |
| #endif |
| } |
| |
| void getW(double** &dx, double** &dy, double** &dz, const double h) |
| { |
| |
| |
| #if defined(OPT_OMP) || defined(OPT_BASE) |
| double value1 = pow((1.0 / (h*sqrt(pi))), 3.0); |
| double value2 = pow(h,2); |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) |
| { |
| for (int j = 0; j < N; j++) |
| { |
| r[i][j] = sqrt(pow(dx[i][j],2.0) + pow(dy[i][j],2.0) + pow(dz[i][j],2.0)); |
| W[i][j] = value1 * exp((-pow(r[i][j],2) / value2)); |
| } |
| } |
| #else |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) |
| { |
| for (int j = 0; j < N; j++) |
| { |
| r[i][j] = sqrt(pow(dx[i][j],2.0) + pow(dy[i][j],2.0) + pow(dz[i][j],2.0)); |
| W[i][j] = pow((1.0 / (h*sqrt(pi))), 3.0) * exp((-pow(r[i][j],2) / pow(h,2))); |
| |
| |
| |
| } |
| } |
| #endif |
| |
| } |
| |
| void getGradW(double** &dx, double** &dy, double** &dz, const double h) |
| { |
| |
| |
| #if defined(OPT_OMP) || defined(OPT_BASE) |
| double value1 = pow(h,2); |
| double value2 = -2/pow(h,5)/pow(pi,(3/2)); |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) |
| { |
| for (int j = 0; j < N; j++) |
| { |
| r[i][j] = sqrt(pow(dx[i][j],2.0) + pow(dy[i][j],2.0) + pow(dz[i][j],2.0)); |
| gradPara[i][j] = exp(-pow(r[i][j],2) / value1) * value2; |
| wx[i][j] = gradPara[i][j]*dx[i][j]; |
| wy[i][j] = gradPara[i][j]*dy[i][j]; |
| wz[i][j] = gradPara[i][j]*dz[i][j]; |
| } |
| } |
| #else |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int i = 0; i < N; i++) { |
| for (int j = 0; j < N; j++) { |
| |
| r[i][j] = sqrt(pow(dx[i][j],2.0) + pow(dy[i][j],2.0) + pow(dz[i][j],2.0)); |
| |
| gradPara[i][j] = -2 * exp(-pow(r[i][j],2) / pow(h,2)) / pow(h,5) / pow(pi,(3/2)); |
| |
| wx[i][j] = gradPara[i][j]*dx[i][j]; |
| wy[i][j] = gradPara[i][j]*dy[i][j]; |
| wz[i][j] = gradPara[i][j]*dz[i][j]; |
| |
| |
| } |
| |
| } |
| #endif |
| } |
| |
| void getDensity(double** &pos, double &m, const double h) |
| { |
| getPairwiseSeparations(pos); |
| getW(dx, dy, dz, h); |
| |
| |
| |
| |
| #ifdef OPT_BASE |
| for(int i = 0; i < N; i++) |
| { |
| for(int j = 0; j < N; j++) |
| rho[j] += m * W[i][j]; |
| } |
| #else |
| for (int j = 0; j < N; j++) |
| { |
| for (int i = 0; i < N; i++) |
| rho[j] += m * W[i][j]; |
| |
| |
| |
| } |
| #endif |
| } |
| |
| void getPressure(double* &rho, const double k, double &n) |
| { |
| |
| |
| |
| #ifdef OPT_BASE |
| double value = 1+1/n; |
| for (int j = 0; j < N; j++) |
| P[j] = k * pow(rho[j], value); |
| #else |
| for (int j = 0; j < N; j++) |
| { |
| P[j] = k * pow(rho[j], (1+1/n)); |
| |
| } |
| #endif |
| |
| } |
| |
| void getAcc(double** &pos, double** &vel, double &m, const double h, const double k, double &n, double lmbda, const double nu) |
| { |
| getDensity(pos, m, h); |
| getPressure(rho, k, n); |
| getPairwiseSeparations(pos); |
| getGradW(dx, dy, dz, h); |
| #if defined(OPT_BASE) |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int j = 0; j < N; j++) |
| { |
| |
| |
| |
| |
| |
| double temp1 = P[j]/pow(rho[j],2); |
| double temp3 =0.0,temp4=0.0,temp5=0.0; |
| for (int i = 0; i < N; i++) |
| { |
| double temp2 = pow(P[i]/rho[i],2); |
| temp3 += (temp1 + temp2) * wx[j][i]; |
| temp4 += (temp1 + temp2) * wy[j][i]; |
| temp5 += (temp1 + temp2) * wz[j][i]; |
| } |
| acc[0][j] += (temp3 *=m); |
| acc[1][j] += (temp4 *=m); |
| acc[2][j] += (temp5 *=m); |
| } |
| #else |
| #ifdef OPT_OMP |
| #pragma omp parallel for schedule(guided) proc_bind(close) |
| #endif |
| for (int j = 0; j < N; j++) |
| { |
| for (int i = 0; i < N; i++) |
| { |
| acc[0][j] -= m * ( P[j]/pow(rho[j],2) + pow(P[i]/rho[i],2) ) * wx[i][j]; |
| acc[1][j] -= m * ( P[j]/pow(rho[j],2) + pow(P[i]/rho[i],2) ) * wy[i][j]; |
| acc[2][j] -= m * ( P[j]/pow(rho[j],2) + pow(P[i]/rho[i],2) ) * wz[i][j]; |
| } |
| } |
| #endif |
| |
| |
| #ifdef OPT_BASE |
| for (int j = 0; j < N; j++) |
| { |
| acc[0][j] -= (lmbda * pos[0][j] + nu * vel[0][j]); |
| acc[1][j] -= (lmbda * pos[1][j] + nu * vel[1][j]); |
| acc[2][j] -= (lmbda * pos[2][j] + nu * vel[2][j]); |
| } |
| #else |
| for (int j = 0; j < N; j++) |
| { |
| acc[0][j] -= lmbda * pos[0][j]; |
| acc[1][j] -= lmbda * pos[1][j]; |
| acc[2][j] -= lmbda * pos[2][j]; |
| } |
| for (int j = 0; j < N; j++) |
| { |
| acc[0][j] -= nu * vel[0][j]; |
| acc[1][j] -= nu * vel[1][j]; |
| acc[2][j] -= nu * vel[2][j]; |
| } |
| #endif |
| } |
| |
| #ifdef OPT_SIMD |
| |
| float64_t exp_(float64_t x) |
| { |
| |
| int n = 0; |
| double prior = 1.0; |
| double sum = prior; |
| while(1) |
| { |
| double cur = prior * x /++n; |
| sum += cur; |
| prior = cur; |
| if(cur<=EPSILON) |
| break; |
| } |
| |
| return sum; |
| } |
| |
| |
| |
| |
| |
| |
| |
| #endif |
| |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 没有源码,如何修改代码逻辑?
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
· NetPad:一个.NET开源、跨平台的C#编辑器
· PowerShell开发游戏 · 打蜜蜂