DirectX11:Position Based Fluid
DirectX11:Position Based Fluid
前言
这是我本科毕业设计项目,使用DirectX11实现一个基于PBD的流体模拟仿真,同时也算是补了Games101的大作业了。
阅读本文假设你对以下内容比较熟悉:(由于内容较多,我也分为了几篇博客)
DirectX11 With Windows SDK--00 目录 |
---|
流体模拟:Smoothed Particle Hydrodynamics |
流体模拟:NeighborHood Search |
DirectX11:GPU基数排序 |
流体模拟:Position Based Fluid |
算法过程
具体过程
领域搜索(Neighbor Search)
我们采用空间哈希的方法对粒子所处的空间网格进行划分,通过计算其空间哈希值并将其进行排序,得到当前网格的起始与结束地址。(具体实现可参考:流体模拟:NeighborHood Search)
我们可以通过遍历当前粒子附近的27个网格得到出其的邻居粒子(即粒子之间距离少于一定距离,本项目设为粒子半径),最大邻居粒子数量设为96。
HLSL核心代码
#include "PBFSolverCommon.hlsli"
//Find Neighbor Paticle
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
//curr particle pos
float3 currPos = g_sortedNewPosition[DTid.x];
float3 sceneMin = g_Bounds[0];
int3 currCellPos = floor((currPos - sceneMin) / g_CellSize);
//curr particle index
//uint currParitcleIndex = g_ParticleIndex[DTid.x];
int neighborCount = 0;
int x = 0, y = 0, z = 0;
[unroll(3)]
for (z = -1; z <= 1; ++z)
{
[unroll(3)]
for (y = -1; y <= 1; ++y)
{
[unroll(3)]
for (x = -1; x <= 1; ++x)
{
//find 27 cell neighbor particle
int3 neighCellPos = currCellPos + int3(x, y, z);
if (neighCellPos.x < 0.0f || neighCellPos.y < 0.0f || neighCellPos.z < 0.0f)
{
continue;
}
uint cellHash = GetCellHash(neighCellPos);
uint neighborCellStart = g_CellStart[cellHash];
uint neighborCellEnd = g_CellEnd[cellHash];
if (neighborCellStart >= neighborCellEnd)
{
continue;
}
for (uint index = neighborCellStart; index < neighborCellEnd; ++index)
{
//get the cell particle pos
float3 neighborPartclePos = g_sortedNewPosition[index];
float3 distance = currPos - neighborPartclePos;
float distancesq = dot(distance, distance);
if (distancesq < g_ParticleRadiusSq)
{
//contact
int contactsIndex = DTid.x * g_MaxNeighborPerParticle + neighborCount;
g_Contacts[contactsIndex] = index;
neighborCount++;
}
if (neighborCount == g_MaxNeighborPerParticle)
{
g_ContactCounts[DTid.x] = neighborCount;
return;
}
}
}
}
}
g_ContactCounts[DTid.x] = neighborCount;
}
不可压缩约束和拉格朗日乘子
流体粒子\(i\)的密度根据SPH方法计算可得:\(\rho_{i}=\sum_{j} m_{j} W\left(p_{i}-p_{j}, h\right)\)
拉格朗日乘子的计算公式为:
其中密度约束\(C_i\)要保证不为负:\(C_i=max(C_i,0)\),即\(C_i\ge 0\)为不等式约束;
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
//curr particle pos
float3 currPos = g_sortedNewPosition[DTid.x];
//curr neighbor count
uint neightborCount = g_ContactCounts[DTid.x];
//clac density
float density = 0;
//clac Lagrange multiplier
float3 gradSum_i = float3(0.0f, 0.0f, 0.0f);
float gradSum_j = 0;
uint i = 0;
for (i = 0; i < neightborCount; ++i)
{
//get the cell particle pos
uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
float3 neighborPartclePos = g_sortedNewPosition[neightborParticleIndex];
//r=p_i-p_j
float3 r = currPos - neighborPartclePos;
density += WPoly6(r, g_sphSmoothLength);
float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
currGrad *= g_InverseDensity_0;
gradSum_i += currGrad;
if (neightborParticleIndex != DTid.x)
{
gradSum_j += dot(currGrad, currGrad);
}
}
//debug show
g_Density[DTid.x] = density;
float gradSumTotal = gradSum_j + dot(gradSum_i, gradSum_i);
// evaluate density constraint
float constraint = max(density * g_InverseDensity_0 - 1.0f, 0.0f);
float lambda = -constraint / (gradSumTotal + g_LambdaEps);
g_LambdaMultiplier[DTid.x] = lambda;
}
约束投影与拉伸不稳定性
粒子\(i\)经过上述的约束投影后可以计算出对应的位移向量(包括自身的密度约束以及邻居粒子的密度约束的共同作用),可由下面公式计算得出:
以及通过添加排斥项\(S_{corr}\)来避免产生粒子聚集的现象:
其中,\(\Delta q\)表示到粒子\(i\)的一个固定距离,通常取\(|\Delta q|=0.1h,...,0.3h\),\(h\)即前面提到的光滑核半径。此外,公式中的\(k\)可以看作表面张力参数,取值\(k=0.1\),而\(n=4\)。最后公式变成了:
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
//curr particle pos
float3 currPos = g_sortedNewPosition[DTid.x];
//curr neighbor count
uint neightborCount = g_ContactCounts[DTid.x];
float currLambda = g_LambdaMultiplier[DTid.x];
float poly6Q = WSpiky(g_DeltaQ, g_sphSmoothLength);
float3 deltaPos = float3(0.0f, 0.0f, 0.0f);
uint i = 0;
for (i = 0; i < neightborCount; ++i)
{
//get the cell particle pos
uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
float neighborLambda = g_LambdaMultiplier[neightborParticleIndex];
//get the cell particle pos
float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
//r=p_i-p_j
float3 r = currPos - neighborParticlePos;
float poly6 = WSpiky(r, g_sphSmoothLength);
float diffPoly = poly6 / poly6Q;
float scorr = -g_ScorrK * pow(abs(diffPoly), g_ScorrN);
float coff_j = currLambda + neighborLambda + scorr;
float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
deltaPos += coff_j * currGrad;
}
deltaPos = deltaPos * g_InverseDensity_0;
g_DeltaPosition[DTid.x] = deltaPos;
}
上述描述的平均约束力确保了收敛性,但是在某些情况下,这种平均会过于激进,并且达到解所需的迭代次数增加。因此,我们需要一个全局用户参数\(\omega\)来控制逐次超松驰法(SOR)的速度。
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID)
{
if (DTid.x >= g_ParticleNums)
{
return;
}
uint totalNeighborNum = g_ContactCounts[DTid.x];
float factor = max(g_SOR * totalNeighborNum, 1.0f);
float3 resPos = g_sortedNewPosition[DTid.x] + g_DeltaPosition[DTid.x] * (1 / factor);
g_sortedNewPosition[DTid.x] = resPos;
}
处理碰撞
粒子与边界碰撞的处理也一直是一个非常关键的问题,一般来说,在SPH方法中采用将边界粒子也视为粒子(即边界粒子)进行表达,我不想采用这种方法。
另一种处理方法是采用SDF(Signed Distance Field)的方式变大空间中容器的位置,然后判断粒子是否出去了这个空间并将其推回去,从而保证粒子在容器之中。因为时间有限以及场景都是简单的几何体,所以本项目直接使用了解析SDF函数来解决。若场景复杂,存在多面体模型的话,可以考虑先bake出当前场景的中3D SDF Texture,在Shader中进行采样进行计算。
因为本场景采用平面对粒子的容器范围进行限制,所以采用了平面的SDF距离场公式:
我们由高中数学知识可得平面的一般式为:\(A(x-x_0)+B(y-y_0)+C(z-z_0)+h=0\),其中法线为\((A,B,C)\),\((x_0,y_0,z_0)\)为平面上一点,可得到如下的平面的SDF函数。(更多公式可参考大神inigo quilez的博客)
//sdf plane function
float sdfPlane(float3 p, float3 n, float h)
{
// n must be normalized
return dot(p, n) + h;
}
所以我们处理粒子的碰撞时,首先在约束求解前得到每个粒子接触的平面(最大的接触平面数为6)。在每次迭代求解后,对其进行判断是否出去了这个空间并将其推回去。
处理碰撞过程中,我还增加了一个friction model,具体公式如下:
HLSL核心代码:
//CollisionPlane_CS.hlsl
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
float3 currPos = g_sortedNewPosition[DTid.x];
int count = 0;
int i = 0;
[unroll]
for (i = 0; i < g_PlaneNums; ++i)
{
float distance = sdfPlane(currPos, g_Plane[i].xyz, g_Plane[i].w) - g_CollisionDistance;
if (distance < g_CollisionThreshold && count < g_MaxCollisionPlanes)
{
int index = DTid.x * g_MaxCollisionPlanes + count;
g_CollisionPlanes[index] = g_Plane[i];
count++;
}
}
g_CollisionCounts[DTid.x] = count;
}
//SolveContact_CS.hlsl
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
float3 currPos = g_sortedNewPosition[DTid.x];
float3 oldPos = g_sortedOldPosition[DTid.x];
int collisionCount = g_CollisionCounts[DTid.x];
int i = 0;
for (i = 0; i < collisionCount; ++i)
{
int index = DTid.x * g_MaxCollisionPlanes + i;
float4 currPlane = g_CollisionPlanes[index];
float distance = sdfPlane(currPos, currPlane.xyz, currPlane.w) - g_CollisionDistance; //d
if (distance < 0.0f)
{
float3 sdfPos = (-distance) * currPlane.xyz;
//friction model
float3 deltaPos = currPos - oldPos;
float deltaX = dot(deltaPos, currPlane.xyz);
float3 deltaDistane = (-deltaX) * currPlane.xyz + deltaPos; //DeltaX
float deltaLength = dot(deltaDistane, deltaDistane);
[flatten]
if (deltaLength <= (g_StaticFriction * distance)) //|deltaX|< u_s*disctance
{
sdfPos -= deltaDistane;
}
else
{
float dynamicFriction = min((-distance) * 0.01f * rsqrt(deltaLength), 1.0f); //
sdfPos -= dynamicFriction * (deltaDistane);
}
currPos += sdfPos;
}
}
g_UpdatedPosition[DTid.x] = currPos;
}
更新速度
这里的公式非常简单:
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
float3 oldPos = g_sortedOldPosition[DTid.x];
float3 updatePos = g_sortedNewPosition[DTid.x];
float3 newVec = g_InverseDeltaTime * (updatePos - oldPos);
g_UpdatedVelocity[DTid.x] = newVec;
}
涡轮控制和人工粘性
由于数值耗散,PBD方法通常会引入额外的阻尼,导致整个系统的能来损耗,由此会导致本来该有的一些涡流快速消失。PBF通过vorticity confinement由系统重新注入能量:
上述公式中,\(N=\frac{\eta}{|\eta|}, \eta=\nabla|\omega|_{i}\),而流体粒子的旋度\(w_i\)计算公式如下:
Vorticity Confinement的基本思路是:通过添加体积力(body force)\(f_{i}^{\text {vorticity }}\)的方式,在旋度粒子(可理解为比周围粒子旋转快的粒子,\(\omega_i\)指向粒子\(i\)的旋转轴)处加速粒子的旋转运动,通过这种方式来保持系统的旋度。\(\epsilon\)用来控制Vorticity Confinement的强度。
PBF方法采用XSPH的粘度方法直接更新速度,从而产生粘性阻尼。添加人工粘性(Artificial Viscosity)除了可以增加模拟的数值稳定性,还可以消除非物理振荡(nonphysical oscillations)
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
//Clac curl
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
//curr neighbor count
uint neightborCount = g_ContactCounts[DTid.x];
//curr particle pos
float3 currPos = g_sortedNewPosition[DTid.x];
float3 currVec = g_UpdatedVelocity[DTid.x];
float3 currOmega = float3(0.0f, 0.0f, 0.0f);
uint i = 0;
for (i = 0; i < neightborCount; ++i)
{
//get the cell particle pos
uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
//get the cell particle pos
float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
//r=p_i-p_j
float3 r = currPos - neighborParticlePos;
//v_j-v_i
float3 deltaVelocity = g_UpdatedVelocity[neightborParticleIndex] - currVec;
float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
//calc omega
float3 omega_j = cross(deltaVelocity, currGrad);
currOmega += omega_j;
}
float curlLength = length(currOmega);
g_Curl[DTid.x] = float4(currOmega.xyz, curlLength);
}
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
float3 currPos = g_sortedNewPosition[DTid.x];
float3 currVec = g_UpdatedVelocity[DTid.x];
float3 oldVec = g_sortedVelocity[DTid.x];
uint counter = g_ContactCounts[DTid.x];
float3 deltaTotalVec = float3(0.0f, 0.0f, 0.0f);
float3 etaTotal = float3(0.0f, 0.0f, 0.0f);
float density;
for (uint i = 0; i < counter; ++i)
{
//get the cell particle pos
uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
//get the cell particle pos
float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
//r=p_i-p_j
float3 r = currPos - neighborParticlePos;
//v_j-v_i
float3 deltaVelocity = g_UpdatedVelocity[neightborParticleIndex] - currVec;
float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
//vorsitory confinement
float neighCurlLength = g_Curl[neightborParticleIndex].w;
etaTotal += currGrad * neighCurlLength;
//XSPH
float3 deltaVec_j = deltaVelocity * WSpiky(r, g_sphSmoothLength);
deltaTotalVec += deltaVec_j;
//density debug
density += WPoly6(r, g_sphSmoothLength);
}
float3 impulse = float3(0.0f, 0.0f, 0.0f);
//vorticity Confinement
if (length(etaTotal) > 0.0f && g_VorticityConfinement > 0.0f && density>0.0f)
{
float epsilon = g_DeltaTime * g_DeltaTime * g_InverseDensity_0 * g_VorticityConfinement;
float3 currCurl = g_Curl[DTid.x].xyz; //r2
float3 N = normalize(etaTotal);
float3 force = cross(N, currCurl);
impulse += epsilon * force;
}
//XSPH
impulse += g_VorticityC * deltaTotalVec;
// solve plane
uint planeCounts = g_CollisionCounts[DTid.x];
uint resCounts = 0;
float3 restitutionVec = float3(0.0f, 0.0f, 0.0f);
for (uint j = 0; j < planeCounts; ++j)
{
float4 plane = g_CollisionPlanes[DTid.x * g_MaxCollisionPlanes + j];
float distance = sdfPlane(currPos, plane.xyz, plane.w) - 1.001f * g_CollisionDistance;
float oldVecD = dot(oldVec, plane.xyz);
if (distance < 0.0f && oldVecD < 0.0f)
{
float currVecD = dot(currVec, plane.xyz);
float restitutionD = oldVecD * g_Restituion + currVecD;
restitutionVec += plane.xyz * (-restitutionD);
resCounts++;
}
}
resCounts = max(resCounts, 1);
restitutionVec /= resCounts;
impulse += restitutionVec;
g_Impulses[DTid.x] = impulse;
}
最终处理
最终我们只需将求解后的最终结果(位置与速度信息)输出即可,本项目还对粒子的最大速度进行一定的限制。
HLSL核心代码:
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x >= g_ParticleNums)
{
return;
}
uint prevIndex = g_Particleindex[DTid.x];
g_SolveredPosition[prevIndex] = g_sortedNewPosition[DTid.x];
float3 currVec = g_UpdatedVelocity[DTid.x];
float3 impulse = g_Impulses[DTid.x];
float3 oldVec = g_oldVelocity[prevIndex];
float3 deltaVec = currVec + impulse - oldVec;
float deltaVecLengthsq = dot(deltaVec, deltaVec);
if (deltaVecLengthsq > (g_MaxVeclocityDelta * g_MaxVeclocityDelta))
{
deltaVec = deltaVec * rsqrt(deltaVecLengthsq) * g_MaxVeclocityDelta;
}
float3 finVec = oldVec + deltaVec;
g_SolveredVelocity[prevIndex] = finVec;
}
C++核心部分
因为代码太多,这里这粗略展示算法核心过程代码,具体代码可去下面的仓库地址查看:
void FluidSystem::TickLogic(ID3D11DeviceContext* deviceContext, PBFSolver::PBFParams params)
{
m_pPBFSolver->SetPBFParams(params);
for (int i = 0; i < params.subStep; ++i)
{
m_pPBFSolver->PredParticlePosition(deviceContext, *m_pPBFSolverEffect,
m_pParticlePosBuffer->GetBuffer(), m_pParticleVecBuffer->GetBuffer());
//NeighborSearch
m_GpuTimer_NeighBorSearch.Start();
m_pNeighborSearch->BeginNeighborSearch(deviceContext, m_pPBFSolver->GetPredPosition(), m_pParticleIndexBuffer->GetBuffer(), params.cellSize);
m_pNeighborSearch->CalcBounds(deviceContext, *m_pNeighborSearchEffect, m_pPBFSolver->GetPredPosition(), m_pParticleIndexBuffer->GetBuffer(), params.cellSize);
m_pNeighborSearch->RadixSort(deviceContext, *m_pNeighborSearchEffect);
m_pNeighborSearch->FindCellStartAndEnd(deviceContext, *m_pNeighborSearchEffect);
m_pNeighborSearch->EndNeighborSearch();
// Constraint iter solver
m_pPBFSolver->BeginConstraint(deviceContext, *m_pPBFSolverEffect, m_pNeighborSearch->GetSortedParticleIndex(),
m_pNeighborSearch->GetSortedCellStart(), m_pNeighborSearch->GetSortedCellEnd(), m_pNeighborSearch->GetBounds());
m_pPBFSolver->SolverConstraint(deviceContext, *m_pPBFSolverEffect);
m_pPBFSolver->EndConstraint(deviceContext, *m_pPBFSolverEffect);
//update data
m_pParticlePosBuffer->UpdataBufferGPU(deviceContext, m_pPBFSolver->GetSolveredPosition());
m_pParticleVecBuffer->UpdataBufferGPU(deviceContext, m_pPBFSolver->GetSolveredVelocity());
}
m_pPBFSolver->CalcAnisotropy(deviceContext, *m_pPBFSolverEffect);
}
void PBFSolver::SolverConstraint(ID3D11DeviceContext* deviceContext, PBFSolverEffect& effect)
{
effect.SetOutPutUAVByName("g_LambdaMultiplier", m_pLambdaMultiplierBuffer->GetUnorderedAccess());
effect.SetOutPutUAVByName("g_Density", m_pDensityBuffer->GetUnorderedAccess());
effect.SetOutPutUAVByName("g_DeltaPosition", m_pDeltaPositionBuffer->GetUnorderedAccess());
effect.SetOutPutUAVByName("g_UpdatedPosition", m_pUpdatedPositionBuffer->GetUnorderedAccess());
for (int i = 0; i < m_PBFParams.maxSolverIterations; ++i)
{
//calc lagrange multiplier
effect.SetCalcLagrangeMultiplierState();
effect.Apply(deviceContext);
deviceContext->Dispatch(m_BlockNums, 1, 1);
//calc Displacement
effect.SetCalcDisplacementState();
effect.Apply(deviceContext);
deviceContext->Dispatch(m_BlockNums, 1, 1);
//add deltapos
effect.SetADDDeltaPositionState();
effect.Apply(deviceContext);
deviceContext->Dispatch(m_BlockNums, 1, 1);
//solver contacts
effect.SetSolverContactState();
effect.Apply(deviceContext);
deviceContext->Dispatch(m_BlockNums, 1, 1);
m_pSortedNewPostionBuffer->UpdataBufferGPU(deviceContext, m_pUpdatedPositionBuffer->GetBuffer(),m_ParticleNums);
}
}