Unity的Forward+ FPTL光照剔除解析(一)

序言

FPTL也叫Fine Pruned Tiled Light Lists。自从我开发HDRP的时候,就一直想把这个FPTL的实现完全看明白。但是无论是国内还是国外都几乎找不到关于HDRP相关算法的解析,基本上都是寥寥草草几句话带过,于是乎就有了这篇水文。

FPTL的大致的流程

CPU准备灯光数据流程:
使用Jobs ProcessedVisibleLightsBuilder 转换VisibleLightLightRenderData(HQLightRenderDatabase中的m_LightData),主要目的是排序灯光数据,使其更加紧凑,对GPU遍历性能也更加友好
使用Jobs GpuLightsBuilder,将LightRenderData转成LightData(渲染中实际上要用的数据,剔除要用的数据lightBounds,lightVolumes)
最后Push Light Data到GPU

GPU渲染流程:
预先生成Pre Depth深度图,
清理LightList.(一般是更改分辨率触发)
计算Light的ScrBound,(GenerateLightsScreenSpaceAABBs)
BigTileLightList建立(可选BigTilePrepass),
更细小的Tile的LightList建立。(BuildPerTileLightList)
VoxelLightList建立(透明物体接受光照使用VoxelLightList)

    //HDRenderPipeline.LightLoop.cs

    BuildGPULightListOutput BuildGPULightList(
        RenderGraph renderGraph,
        HDCamera hdCamera,
        TileAndClusterData tileAndClusterData,
        int totalLightCount,
        ref ShaderVariablesLightList constantBuffer,
        TextureHandle depthStencilBuffer,
        TextureHandle stencilBufferCopy,
        GBufferOutput gBuffer)
    {
        using (var builder = renderGraph.AddRenderPass<BuildGPULightListPassData>("Build Light List", out var passData, ProfilingSampler.Get(HDProfileId.BuildLightList)))
        {
            builder.EnableAsyncCompute(hdCamera.frameSettings.BuildLightListRunsAsync());

            PrepareBuildGPULightListPassData(renderGraph, builder, hdCamera, tileAndClusterData, ref constantBuffer, totalLightCount, depthStencilBuffer, stencilBufferCopy, gBuffer, passData);

            builder.SetRenderFunc(
                (BuildGPULightListPassData data, RenderGraphContext context) =>
                {
                    bool tileFlagsWritten = false;

                    ClearLightLists(data, context.cmd);
                    GenerateLightsScreenSpaceAABBs(data, context.cmd);
                    BigTilePrepass(data, context.cmd);
                    BuildPerTileLightList(data, ref tileFlagsWritten, context.cmd);
                    VoxelLightListGeneration(data, context.cmd);

                    BuildDispatchIndirectArguments(data, tileFlagsWritten, context.cmd);
                });

            return passData.output;
        }
    }

从HDAdditionalLightData(前端)到HDLightRenderDatabase(后端)

创建灯光的时候,Light自动Attach了一个MonoBehavior HDAdditionalLightData,这一功能实现是在Light的Editor内。
通过继承LightEditor,再加上CustomEditorForRenderPipeline Attribute就可以让BuildIn的灯光Editor在不同的管线切换到管线扩展的灯光Editor。

    //HDLightEditor.cs

    [CanEditMultipleObjects]
    [CustomEditorForRenderPipeline(typeof(Light), typeof(HDRenderPipelineAsset))]
    sealed partial class HDLightEditor : LightEditor
    {
        ...
        protected override void OnEnable()
        {
            base.OnEnable();

            // 自动Attach逻辑
            m_AdditionalLightDatas = CoreEditorUtils.GetAdditionalData<HDAdditionalLightData>(targets, HDAdditionalLightData.InitDefaultHDAdditionalLightData);
            m_SerializedHDLight = new SerializedHDLight(m_AdditionalLightDatas, settings);

            // Update emissive mesh and light intensity when undo/redo
            Undo.undoRedoPerformed += OnUndoRedo;

            HDLightUI.RegisterEditor(this);
        }
        ...
    }

创建灯光的时候,HDAdditionalLightData(Light创建时自动Attach的MonoBehavior)就需要在HDLightRenderDatabase中创建出HDLightRenderEntity(用于寻址用的句柄)
可以看到HDLightRenderDatabase灯光数据库,用于方便(容易绕晕)索引灯光数据,采用的是单例模式

        //HDAdditionalLightData.cs
        internal void CreateHDLightRenderEntity(bool autoDestroy = false)
        {
            if (!this.lightEntity.valid)
            {
                HDLightRenderDatabase lightEntities = HDLightRenderDatabase.instance;
                this.lightEntity = lightEntities.CreateEntity(autoDestroy);
                lightEntities.AttachGameObjectData(this.lightEntity, legacyLight.GetInstanceID(), this, legacyLight.gameObject);
            }

            UpdateRenderEntity();
        }

        void OnEnable()
        {
            ...
            CreateHDLightRenderEntity();
        }
    //HDLightRenderDatabase.cs
    //Light rendering entity. This struct acts as a handle to set / get light render information into the database.
    internal struct HDLightRenderEntity
    {
        public int entityIndex;
        public static readonly HDLightRenderEntity Invalid = new HDLightRenderEntity() { entityIndex = HDLightRenderDatabase.InvalidDataIndex };
        public bool valid { get { return entityIndex != HDLightRenderDatabase.InvalidDataIndex; } }
    }
    //HDLightRenderDatabase.cs
    internal partial class HDLightRenderDatabase
    {
        ....
        static public HDLightRenderDatabase instance
        {
            get
            {
                if (s_Instance == null)
                    s_Instance = new HDLightRenderDatabase();
                return s_Instance;
            }
        }
        ...
    }

在修改灯光相关数据的时候,需要通过lightEntity句柄进行寻址,然后修改HDLightRenderDatabase中的HDLightRenderData

    //HDAdditionalLightData.cs
    //类似的还有UpdateRenderEntity
    public void SetAreaLightSize(Vector2 size)
    {
        ...
        if (lightEntity.valid)
        {
            ref HDLightRenderData lightRenderData = ref HDLightRenderDatabase.instance.EditLightDataAsRef(lightEntity);
            lightRenderData.shapeWidth = m_ShapeWidth;
            lightRenderData.shapeHeight = m_ShapeHeight;
        }
        ...
    }
        //HDLightRenderDatabase.cs
        ....

        //Gets and edits a reference. Must be not called during rendering pipeline, only during game object modification.
        public ref HDLightRenderData EditLightDataAsRef(in HDLightRenderEntity entity) => ref EditLightDataAsRef(m_LightEntities[entity.entityIndex].dataIndex);

        //Gets and edits a reference. Must be not called during rendering pipeline, only during game object modification.
        public ref HDLightRenderData EditLightDataAsRef(int dataIndex)
        {
            if (dataIndex >= m_LightCount)
                throw new Exception("Entity passed in is out of bounds. Index requested " + dataIndex + " and maximum length is " + m_LightCount);

            unsafe
            {
                HDLightRenderData* data = (HDLightRenderData*)m_LightData.GetUnsafePtr<HDLightRenderData>() + dataIndex;
                return ref UnsafeUtility.AsRef<HDLightRenderData>(data);
            }
        }
        ...

同样,删除灯光的时候也需要销毁对应的lightEntity

    //HDAdditionalLightData.cs
    void OnDestroy()
    {
        ...
        DestroyHDLightRenderEntity();
    }
    internal void DestroyHDLightRenderEntity()
    {
        if (!lightEntity.valid)
            return;

        HDLightRenderDatabase.instance.DestroyEntity(lightEntity);
        lightEntity = HDLightRenderEntity.Invalid;
    }
    //HDLightRenderDatabase.cs
    public void DestroyEntity(HDLightRenderEntity lightEntity)
    {
        Assert.IsTrue(IsValid(lightEntity));

        m_FreeIndices.Enqueue(lightEntity.entityIndex);
        LightEntityInfo entityData = m_LightEntities[lightEntity.entityIndex];
        m_LightsToEntityItem.Remove(entityData.lightInstanceID);

        if (m_HDAdditionalLightData[entityData.dataIndex] != null)
            --m_AttachedGameObjects;

        RemoveAtSwapBackArrays(entityData.dataIndex);

        if (m_LightCount == 0)
        {
            DeleteArrays();
        }
        else
        {
            HDLightRenderEntity entityToUpdate = m_OwnerEntity[entityData.dataIndex];
            LightEntityInfo dataToUpdate = m_LightEntities[entityToUpdate.entityIndex];
            dataToUpdate.dataIndex = entityData.dataIndex;
            m_LightEntities[entityToUpdate.entityIndex] = dataToUpdate;
            if (dataToUpdate.lightInstanceID != entityData.lightInstanceID)
                m_LightsToEntityItem[dataToUpdate.lightInstanceID] = dataToUpdate;
        }
    }

HDLightRenderData就是我们基于BuildIn的灯光扩展数据类型,也是存放在HDLightRenderDatabase最基本的存储数据。

而渲染的时候,我们通常来说只能拿到CullingResults的VisibleLights数组也就是说可以拿到BuildIn的Light对象
而这时候就需要我们对Light对象操作一次TryGetComponent<HDAdditionalLightData>(及其离谱)

        //HDProcessedVisibleLightsBuilder.LightLoop.cs

        private void BuildVisibleLightEntities(in CullingResults cullResults)
        {
            ...
            //明知道是bullshit还是不改,TODO了一万年了
            //TODO: this should be accelerated by a c++ API
            var defaultEntity = HDLightRenderDatabase.instance.GetDefaultLightEntity();
            for (int i = 0; i < cullResults.visibleLights.Length; ++i)
            {
                Light light = cullResults.visibleLights[i].light;
                int dataIndex = HDLightRenderDatabase.instance.FindEntityDataIndex(light);
                if (dataIndex == HDLightRenderDatabase.InvalidDataIndex)
                {
                    
                    //Shuriken lights bullshit: this happens because shuriken lights dont have the HDAdditionalLightData OnEnabled.
                    //Because of this, we have to forcefully create a light render entity on the rendering side. Horrible!!!
                    if (light.TryGetComponent<HDAdditionalLightData>(out var hdAdditionalLightData))
                    {
                        if (!hdAdditionalLightData.lightEntity.valid)
                            hdAdditionalLightData.CreateHDLightRenderEntity(autoDestroy: true);
                    }
                    else
                        dataIndex = HDLightRenderDatabase.instance.GetEntityDataIndex(defaultEntity);
                }

                m_VisibleLightEntityDataIndices[i] = dataIndex;
                m_VisibleLightBakingOutput[i] = light.bakingOutput;
                m_VisibleLightShadowCasterMode[i] = light.lightShadowCasterMode;
                m_VisibleLightShadows[i] = light.shadows;
            }
        }

HDProcessedVisibleLightsBuilder,HDGpuLightsBuilder

HDProcessedVisibleLightsBuilder

HDProcessedVisibleLightsBuilder顾名思义,就是用来处理VisibleLight的Builder(采用的是设计模式中的创建者模式)。
由于上面提到渲染管线的CullingResult只能拿到Light对象,
所以为了拿到对应Visible的HDLightRenderData,就需要用HDProcessedVisibleLightsBuilder对灯光进行预处理(拿到灯光数据,排序)。

        //LightingLoop.cs

        // Compute data that will be used during the light loop for a particular light.
        void PreprocessVisibleLights(CommandBuffer cmd, HDCamera hdCamera, in CullingResults cullResults, DebugDisplaySettings debugDisplaySettings, in AOVRequestData aovRequest)
        {
            using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.ProcessVisibleLights)))
            {
                m_ProcessedLightsBuilder.Build(
                    hdCamera,
                    cullResults,
                    m_ShadowManager,
                    m_ShadowInitParameters,
                    aovRequest,
                    lightLoopSettings,
                    m_CurrentDebugDisplaySettings);
                
                ...
            }
        }

BuildVisibleLightEntities就是上面Light对象操作一次TryGetComponent然后拿到lightEntity的函数,就不再贴代码了。

    //HDProcessedVisibleLightsBuilder.cs

    //Builds sorted HDProcessedVisibleLight structures.
    public void Build(
        HDCamera hdCamera,
        in CullingResults cullingResult,
        HDShadowManager shadowManager,
        in HDShadowInitParameters inShadowInitParameters,
        in AOVRequestData aovRequestData,
        in GlobalLightLoopSettings lightLoopSettings,
        DebugDisplaySettings debugDisplaySettings)
    {
        BuildVisibleLightEntities(cullingResult);

        if (m_Size == 0)
            return;

        FilterVisibleLightsByAOV(aovRequestData);
        StartProcessVisibleLightJob(hdCamera, cullingResult.visibleLights, lightLoopSettings, debugDisplaySettings);
        CompleteProcessVisibleLightJob();
        SortLightKeys();
        ProcessShadows(hdCamera, shadowManager, inShadowInitParameters, cullingResult);
    }

可以看到Build函数里面开启了一个Jobs,StartProcessVisibleLightJob,这个Jobs主要是用来对不同灯光种类进行分类计数并且按照重要性进行排序
m_ProcessVisibleLightCounts 是分类计数器的数组,数组元素统计了(Directional,Area,Punctual,Area,Shadow,BakeShadow)的数量。
m_ProcessedLightVolumeType 每个VisibleLight对应的LightVolumeType
m_ProcessedEntities 就是每个VisibleLight对应的HDProcessedVisibleLight
m_SortKeys就是后续用来排序重要性的数组

    //HDProcessedVisibleLightsBuilder.Jobs.cs
    ...
    #region output processed lights
    [WriteOnly]
    public NativeArray<int> processedVisibleLightCountsPtr;
    [WriteOnly]
    public NativeArray<LightVolumeType> processedLightVolumeType;
    [WriteOnly]
    public NativeArray<HDProcessedVisibleLight> processedEntities;
    [WriteOnly]
    [NativeDisableContainerSafetyRestriction]
    public NativeArray<uint> sortKeys;
    [WriteOnly]
    [NativeDisableContainerSafetyRestriction]
    public NativeArray<int> shadowLightsDataIndices;
    #endregion

    ...
    public void StartProcessVisibleLightJob(
        HDCamera hdCamera,
        NativeArray<VisibleLight> visibleLights,
        in GlobalLightLoopSettings lightLoopSettings,
        DebugDisplaySettings debugDisplaySettings)
    {
        if (m_Size == 0)
            return;

        var lightEntityCollection = HDLightRenderDatabase.instance;
        var processVisibleLightJob = new ProcessVisibleLightJob()
        {
            //Parameters.
            ....
            //render light entities.
            lightData = lightEntityCollection.lightData,

            //data of all visible light entities.
            visibleLights = visibleLights,
            ....

            //Output processed lights.
            processedVisibleLightCountsPtr = m_ProcessVisibleLightCounts,
            processedLightVolumeType = m_ProcessedLightVolumeType,
            processedEntities = m_ProcessedEntities,
            sortKeys = m_SortKeys,
            shadowLightsDataIndices = m_ShadowLightsDataIndices
        };

        m_ProcessVisibleLightJobHandle = processVisibleLightJob.Schedule(m_Size, 32);
    }

HDProcessedVisibleLight是VisibleLight转换到LightData之前的中间载体
记录了在HDLightRenderDatabase中lightEntity的Index
渲染时GPULightType,以及对应的HDLightType
这里为什么要有HDLightType,主要是因为HDRP把面光源也当做了Point Light Type,这样才能够不被剔除(在cullingResult.VisibleLights里面),因为面光在BuildIn里面只能够烘焙,不能设置成RealTime。
其他数据暂时用不上就不说了。

    public enum HDLightType
    {
        /// <summary>Spot Light. Complete this type by setting the SpotLightShape too.</summary>
        Spot = LightType.Spot,
        /// <summary>Directional Light.</summary>
        Directional = LightType.Directional,
        /// <summary>Point Light.</summary>
        Point = LightType.Point,
        /// <summary>Area Light. Complete this type by setting the AreaLightShape too.</summary>
        Area = LightType.Area,
    }

    enum GPULightType
    {
        Directional,
        Point,
        Spot,
        ProjectorPyramid,
        ProjectorBox,

        // AreaLight
        Tube, // Keep Line lights before Rectangle. This is needed because of a compiler bug (see LightLoop.hlsl)
        Rectangle,
        // Currently not supported in real time (just use for reference)
        Disc,
        // Sphere,
    };

    internal struct HDProcessedVisibleLight
    {
        public int dataIndex;
        public GPULightType gpuLightType;
        public HDLightType lightType;
        public float lightDistanceFade;
        public float lightVolumetricDistanceFade;
        public float distanceToCamera;
        public HDProcessedVisibleLightsBuilder.ShadowMapFlags shadowMapFlags;
        public bool isBakedShadowMask;
    }

LightVolumeType主要是用来描述灯光体积的形状,就可以在ComputeShader内应用不同的的剔除计算

    internal enum LightVolumeType
    {
        Cone,
        Sphere,
        Box,
        Count
    }

LightCategory这里的命名我认为是有问题的。叫灯光种类,实际上贴花,体积雾都进来了,这些实际上是因为HDRP的贴花和体积雾也想要参与剔除计算(一般体积雾和贴花都能够用LightVolumeType描述自身体积的形状)。
后续剔除之后,LightList里面也会根据不同的LightCategory划分Buffer布局

    internal enum LightCategory
    {
        Punctual,
        Area,
        Env,
        Decal,
        LocalVolumetricFog, // WARNING: Currently lightlistbuild.compute assumes Local Volumetric Fog is the last element in the LightCategory enum. Do not append new LightCategory types after LocalVolumetricFog. TODO: Fix .compute code.
        Count
    }

回到ProcessVisibleLightJob上面来,可以看到这里并行地对cullingResult.VisibleLight进行处理,得出light对应的lightCategory,gpuLightType,lightVolumeType
将lightCategory, gpuLightType, lightVolumeType, index打包成SortKey.从PackLightSortKey看得出来平行光的重要性是最高的。

        //HDProcessedVisibleLightsBuilder.Jobs.cs

        //对计数器进行原子操作
        private int IncrementCounter(HDProcessedVisibleLightsBuilder.ProcessLightsCountSlots counterSlot)
        {
            int outputIndex = 0;
            unsafe
            {
                int* ptr = (int*)processedVisibleLightCountsPtr.GetUnsafePtr<int>() + (int)counterSlot;
                outputIndex = Interlocked.Increment(ref UnsafeUtility.AsRef<int>(ptr));
            }
            return outputIndex;
        }

        private int NextOutputIndex() => IncrementCounter(HDProcessedVisibleLightsBuilder.ProcessLightsCountSlots.ProcessedLights) - 1;

        //在HDGpuLightsBuilder.cs中
        public static uint PackLightSortKey(LightCategory lightCategory, GPULightType gpuLightType, LightVolumeType lightVolumeType, int lightIndex)
        {
            //We sort directional lights to be in the beginning of the list.
            //This ensures that we can access directional lights very easily after we sort them.
            uint isDirectionalMSB = gpuLightType == GPULightType.Directional ? 0u : 1u;
            uint sortKey = (uint)isDirectionalMSB << 31 | (uint)lightCategory << 27 | (uint)gpuLightType << 22 | (uint)lightVolumeType << 17 | (uint)lightIndex;
            return sortKey;
        }
        //Unpacks a sort key for a light
        public static void UnpackLightSortKey(uint sortKey, out LightCategory lightCategory, out GPULightType gpuLightType, out LightVolumeType lightVolumeType, out int lightIndex)
        {
            lightCategory = (LightCategory)((sortKey >> 27) & 0xF);
            gpuLightType = (GPULightType)((sortKey >> 22) & 0x1F);
            lightVolumeType = (LightVolumeType)((sortKey >> 17) & 0x1F);
            lightIndex = (int)(sortKey & 0xFFFF);
        }
        //End of HDGpuLightsBuilder.cs

        public void Execute(int index)
        {
        
            VisibleLight visibleLight = visibleLights[index];
            int dataIndex = visibleLightEntityDataIndices[index];
            LightBakingOutput bakingOutput = visibleLightBakingOutput[index];
            LightShadows shadows = visibleLightShadows[index];
            if (TrivialRejectLight(visibleLight, dataIndex))
                return;

            ref HDLightRenderData lightRenderData = ref GetLightData(dataIndex);

            ...
            //防止超出同屏幕(Area,Punctual,Directional)光源限制
            if (!IncrementLightCounterAndTestLimit(lightCategory, gpuLightType))
                return;
            //原子操作
            int outputIndex = NextOutputIndex();

            sortKeys[outputIndex] = HDGpuLightsBuilder.PackLightSortKey(lightCategory, gpuLightType, lightVolumeType, index);

            processedLightVolumeType[index] = lightVolumeType;
            processedEntities[index] = new HDProcessedVisibleLight()
            {
                dataIndex = dataIndex,
                gpuLightType = gpuLightType,
                lightType = lightType,
                lightDistanceFade = lightDistanceFade,
                lightVolumetricDistanceFade = volumetricDistanceFade,
                distanceToCamera = distanceToCamera,
                shadowMapFlags = shadowMapFlags,
                isBakedShadowMask = isBakedShadowMaskLight
            };

            ...
        }

所以经过HDProcessedVisibleLightsBuilder的ProcessVisibleLightJob调度Jobs对visibleLights进行处理之后,就得到了预处理过后的HDProcessedVisibleLight以及LightVolumeType,以及一个根据重要性排序过的SortKey数组
而这个SortKey数组则是后续CreateGpuLightDataJob中寻址灯光数据最主要的手段
(通过UnpackSortKey,我们能够得到当前灯光在m_ProcessedLightVolumeType/m_ProcessedEntities的Index)

HDGpuLightsBuilder

经过 HDProcessedVisibleLightsBuilder的预处理,得到了SortKey,每个VisibleLight对应的HDProcessedVisibleLight以及LightVolumeType。
接下来就是通过Jobs对HDProcessedVisibleLight以及LightVolumeType转化成最终在渲染时的LightData,DirectionalLightData,剔除计算用的SFiniteLightBound,LightVolumeData

//LightLoop.cs
    void PrepareGPULightdata(CommandBuffer cmd, HDCamera hdCamera, CullingResults cullResults)
    {
        using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.PrepareGPULightdata)))
        {
            // 2. Go through all lights, convert them to GPU format.
            // Simultaneously create data for culling (LightVolumeData and SFiniteLightBound)
            m_GpuLightsBuilder.Build(cmd, hdCamera, cullResults,
            m_ProcessedLightsBuilder,
            HDLightRenderDatabase.instance, m_ShadowInitParameters, m_CurrentDebugDisplaySettings);
            ...
        }
    }

//HDGpuLightsBuilder.LightLoop.cs
    public void Build(
        CommandBuffer cmd,
        HDCamera hdCamera,
        in CullingResults cullingResult,
        HDProcessedVisibleLightsBuilder visibleLights,
        HDLightRenderDatabase lightEntities,
        in HDShadowInitParameters shadowInitParams,
        DebugDisplaySettings debugDisplaySettings)
    {
        ...
        if (totalLightsCount > 0)
        {

            ...            
            StartCreateGpuLightDataJob(hdCamera, cullingResult, hdShadowSettings, visibleLights, lightEntities);
            CompleteGpuLightDataJob();



            CalculateAllLightDataTextureInfo(cmd, hdCamera, cullingResult, visibleLights, lightEntities, hdShadowSettings, shadowInitParams, debugDisplaySettings);
        }
    }

从Execute可以看出来,对于平行光直接转换成GPUFormat(DirectionalLightData),而其他光源执行StoreAndConvertLightToGPUFormat

    //HDGpuLightsBuilder.Jobs.cs

        #region output processed lights
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<LightData> lights;
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<DirectionalLightData> directionalLights;
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<LightsPerView> lightsPerView;
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<SFiniteLightBound> lightBounds;
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<LightVolumeData> lightVolumes;
        [WriteOnly]
        [NativeDisableContainerSafetyRestriction]
        public NativeArray<int> gpuLightCounters;
        #endregion

        public void Execute(int index)
        {
            var sortKey = sortKeys[index];
            HDGpuLightsBuilder.UnpackLightSortKey(sortKey, out var lightCategory, out var gpuLightType, out var lightVolumeType, out var lightIndex);

            if (gpuLightType == GPULightType.Directional)
            {
                int outputIndex = index;
                ConvertDirectionalLightToGPUFormat(outputIndex, lightIndex, lightCategory, gpuLightType, lightVolumeType);
            }
            else
            {
                int outputIndex = index - directionalSortedLightCounts;
                StoreAndConvertLightToGPUFormat(outputIndex, lightIndex, lightCategory, gpuLightType, lightVolumeType);
            }
        }

        public void StartCreateGpuLightDataJob(
            HDCamera hdCamera,
            in CullingResults cullingResult,
            HDShadowSettings hdShadowSettings,
            HDProcessedVisibleLightsBuilder visibleLights,
            HDLightRenderDatabase lightEntities)
        {
            ...

            var createGpuLightDataJob = new CreateGpuLightDataJob()
            {
                //Parameters
                ....

                //outputs
                gpuLightCounters = m_LightTypeCounters,
                lights = m_Lights,
                directionalLights = m_DirectionalLights,
                lightsPerView = m_LightsPerView,
                lightBounds = m_LightBounds,
                lightVolumes = m_LightVolumes
            };

            m_CreateGpuLightDataJobHandle = createGpuLightDataJob.Schedule(visibleLights.sortedLightCounts, 32);
        }


StoreAndConvertLightToGPUFormat具体的内容就是跟平行光类似,除了需要转换成GPUFormat(LightData),还需要转换出剔除计算用的SFiniteLightBound,LightVolumeData
需要留意的是,在这里也对LightCategory.Punctual,LightCategory.Area的光源进行计数了。(不启用FPTL或者Cluster的时候进行遍历LightData用)

            //HDGpuLightsBuilder.Jobs.cs
            private void ComputeLightVolumeDataAndBound(
                LightCategory lightCategory, GPULightType gpuLightType, LightVolumeType lightVolumeType,
                in VisibleLight light, in LightData lightData, in Vector3 lightDimensions, in Matrix4x4 worldToView, int outputIndex)
            {
                // Then Culling side
                var range = lightDimensions.z;
                var lightToWorld = light.localToWorldMatrix;
                Vector3 positionWS = lightData.positionRWS;
                Vector3 positionVS = worldToView.MultiplyPoint(positionWS);

                Vector3 xAxisVS = worldToView.MultiplyVector(lightToWorld.GetColumn(0));
                Vector3 yAxisVS = worldToView.MultiplyVector(lightToWorld.GetColumn(1));
                Vector3 zAxisVS = worldToView.MultiplyVector(lightToWorld.GetColumn(2));

                // Fill bounds
                var bound = new SFiniteLightBound();
                var lightVolumeData = new LightVolumeData();

                lightVolumeData.lightCategory = (uint)lightCategory;
                lightVolumeData.lightVolume = (uint)lightVolumeType;

                if (gpuLightType == GPULightType.Spot || gpuLightType == GPULightType.ProjectorPyramid)
                {
                    ...
                }
                else if (gpuLightType == GPULightType.Point)
                {
                    ...
                }
                else if (gpuLightType == GPULightType.Tube)
                {
                    ...
                }
                else if (gpuLightType == GPULightType.Rectangle)
                {
                    ...
                }
                else if (gpuLightType == GPULightType.ProjectorBox)
                {
                    ...
                }
                else if (gpuLightType == GPULightType.Disc)
                {
                    //not supported at real time at the moment
                }
                else
                {
                    Debug.Assert(false, "TODO: encountered an unknown GPULightType.");
                }


                lightBounds[outputIndex] = bound;
                lightVolumes[outputIndex] = lightVolumeData;
            }



            private void StoreAndConvertLightToGPUFormat(
                int outputIndex, int lightIndex,
                LightCategory lightCategory, GPULightType gpuLightType, LightVolumeType lightVolumeType)
            {
                var light = visibleLights[lightIndex];
                var processedEntity = processedEntities[lightIndex];
                var lightData = new LightData();
                ref HDLightRenderData lightRenderData = ref GetLightData(processedEntity.dataIndex);

                ConvertLightToGPUFormat(
                    lightCategory, gpuLightType, globalConfig,
                    visibleLightShadowCasterMode[lightIndex],
                    visibleLightBakingOutput[lightIndex],
                    light,
                    processedEntity,
                    lightRenderData,
                    out var lightDimensions,
                    ref lightData);

                for (int viewId = 0; viewId < viewCounts; ++viewId)
                {
                    var lightsPerViewContainer = lightsPerView[viewId];
                    ComputeLightVolumeDataAndBound(
                        lightCategory, gpuLightType, lightVolumeType,
                        light, lightData, lightDimensions, lightsPerViewContainer.worldToView, outputIndex + lightsPerViewContainer.boundsOffset);
                }

                if (useCameraRelativePosition)
                    lightData.positionRWS -= cameraPos;

                switch (lightCategory)
                {
                    case LightCategory.Punctual:
                        IncrementCounter(HDGpuLightsBuilder.GPULightTypeCountSlots.Punctual);
                        break;
                    case LightCategory.Area:
                        IncrementCounter(HDGpuLightsBuilder.GPULightTypeCountSlots.Area);
                        break;
                    default:
                        Debug.Assert(false, "TODO: encountered an unknown LightCategory.");
                        break;
                }

#if DEBUG
                if (outputIndex < 0 || outputIndex >= outputLightCounts)
                    throw new Exception("Trying to access an output index out of bounds. Output index is " + outputIndex + "and max length is " + outputLightCounts);
#endif
                lights[outputIndex] = lightData;
            }

SFiniteLightBound,LightVolumeData

SFiniteLightBound 记录了灯光AABB数据(轴朝向,View Space中的中心点坐标,半径)
LightVolumeData则是主要用于记录LightVolumeType为Box时的剔除用数据(boxInnerDist/boxInvRange),以及Spot,ProjectorPyramid类型灯光剔除用数据(cotan)

    [GenerateHLSL]
    struct SFiniteLightBound
    {
        public Vector3 boxAxisX; // Scaled by the extents (half-size)
        public Vector3 boxAxisY; // Scaled by the extents (half-size)
        public Vector3 boxAxisZ; // Scaled by the extents (half-size)
        public Vector3 center; // Center of the bounds (box) in camera space
        public float scaleXY; // Scale applied to the top of the box to turn it into a truncated pyramid (X = Y)
        public float radius; // Circumscribed sphere for the bounds (box)
    };

    [GenerateHLSL]
    struct LightVolumeData
    {
        public Vector3 lightPos; // Of light's "origin"
        public uint lightVolume; // Type index

        public Vector3 lightAxisX; // Normalized
        public uint lightCategory; // Category index

        public Vector3 lightAxisY; // Normalized
        public float radiusSq; // Cone and sphere: light range squared

        public Vector3 lightAxisZ; // Normalized
        public float cotan; // Cone: cotan of the aperture (half-angle)

        public Vector3 boxInnerDist; // Box: extents (half-size) of the inner box
        public uint featureFlags;

        public Vector3 boxInvRange; // Box: 1 / (OuterBoxExtents - InnerBoxExtents)
        public float unused2;
    };

先以最简单的Point Light举例,捋一遍剔除的流程,这样后续其他类型的灯光剔除流程也只是计算上的差别了。

Point Light的SFiniteLightBound/LightVolumeData数据

    private void ComputeLightVolumeDataAndBound(
        LightCategory lightCategory, GPULightType gpuLightType, LightVolumeType lightVolumeType,
        in VisibleLight light, in LightData lightData, in Vector3 lightDimensions, in Matrix4x4 worldToView, int outputIndex)
    {
        ...
        else if (gpuLightType == GPULightType.Point)
        {
            // Construct a view-space axis-aligned bounding cube around the bounding sphere.
            // This allows us to utilize the same polygon clipping technique for all lights.
            // Non-axis-aligned vectors may result in a larger screen-space AABB.
            Vector3 vx = new Vector3(1, 0, 0);
            Vector3 vy = new Vector3(0, 1, 0);
            Vector3 vz = new Vector3(0, 0, 1);

            bound.center = positionVS;
            bound.boxAxisX = vx * range;
            bound.boxAxisY = vy * range;
            bound.boxAxisZ = vz * range;
            bound.scaleXY = 1.0f;
            bound.radius = range;

            // fill up ldata
            lightVolumeData.lightAxisX = vx;
            lightVolumeData.lightAxisY = vy;
            lightVolumeData.lightAxisZ = vz;
            lightVolumeData.lightPos = bound.center;
            lightVolumeData.radiusSq = range * range;
            lightVolumeData.featureFlags = (uint)LightFeatureFlags.Punctual;
        }
        ...
    }

GenerateLightsScreenSpaceAABBs

在前面介绍FPTL的大致的流程中,
第一步,预先PrePass得到深度图,这个在一般的延迟管线或者是前向管线也可以拿得到,这里就不说了。
第二步,清理LightList也一般是分辨率更改才会触发的,这里先跳过。
第三部,也就是这篇文章的重点。利用前面Jobs得到的SFiniteLightBound,在Scrbound.compute计算灯光在屏幕空间上的AABB。

Scrbound Dispatch

Scrbound Dispatch 64线程为1组,4个线程计算1盏灯

        //HDRenderPipeline.LightLoop.cs

        static void GenerateLightsScreenSpaceAABBs(BuildGPULightListPassData data, CommandBuffer cmd)
        {
            if (data.totalLightCount != 0)
            {
                using (new ProfilingScope(cmd, ProfilingSampler.Get(HDProfileId.GenerateLightAABBs)))
                {
                    // With XR single-pass, we have one set of light bounds per view to iterate over (bounds are in view space for each view)
                    cmd.SetComputeBufferParam(data.screenSpaceAABBShader, data.screenSpaceAABBKernel, HDShaderIDs.g_data, data.convexBoundsBuffer);
                    cmd.SetComputeBufferParam(data.screenSpaceAABBShader, data.screenSpaceAABBKernel, HDShaderIDs.outputData, data.debugDataReadBackBuffer);
                    cmd.SetComputeBufferParam(data.screenSpaceAABBShader, data.screenSpaceAABBKernel, HDShaderIDs.g_vBoundsBuffer, data.AABBBoundsBuffer);

                    ConstantBuffer.Push(cmd, data.lightListCB, data.screenSpaceAABBShader, HDShaderIDs._ShaderVariablesLightList);

                    const int threadsPerLight = 4;  // Shader: THREADS_PER_LIGHT (4)
                    const int threadsPerGroup = 64; // Shader: THREADS_PER_GROUP (64)

                    int groupCount = HDUtils.DivRoundUp(data.totalLightCount * threadsPerLight, threadsPerGroup);

                    cmd.DispatchCompute(data.screenSpaceAABBShader, data.screenSpaceAABBKernel, groupCount, data.viewCount, 1);
                }
            }
        }
//Scrbound.compute
#define MAX_CLIP_VERTS    (10)
#define NUM_VERTS         (8)
#define NUM_FACES         (6)
#define NUM_PLANES        (6)
#define THREADS_PER_GROUP (64)
#define THREADS_PER_LIGHT (4) // Set to 1 for debugging
#define LIGHTS_PER_GROUP  (THREADS_PER_GROUP / THREADS_PER_LIGHT)
#define VERTS_PER_GROUP   (NUM_VERTS * LIGHTS_PER_GROUP)
#define VERTS_PER_THREAD  (NUM_VERTS / THREADS_PER_LIGHT)
#define FACES_PER_THREAD  DIV_ROUND_UP(NUM_FACES, THREADS_PER_LIGHT)

计算灯光AABB的各个顶点在视锥体的内外情况

1.先拿到刚刚计算的SFiniteLightBound灯光AABB数据

[numthreads(THREADS_PER_GROUP, 1, 1)]
void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
{
    const uint t        = threadID;
    const uint g        = groupID.x;
    const uint eyeIndex = groupID.y; // Currently, can only be 0 or 1

    const uint intraGroupLightIndex = t / THREADS_PER_LIGHT;
    const uint globalLightIndex     = g * LIGHTS_PER_GROUP + intraGroupLightIndex;
    const uint baseVertexOffset     = intraGroupLightIndex * NUM_VERTS;

    const uint eyeAdjustedInputOffset = GenerateLightCullDataIndex(globalLightIndex, g_iNrVisibLights, eyeIndex);
    const SFiniteLightBound  cullData = g_data[eyeAdjustedInputOffset];

    const float4x4 projMat    = g_mProjectionArr[eyeIndex];
    const float4x4 invProjMat = g_mInvProjectionArr[eyeIndex];

    //gs_CullClipFaceMasks初始化为0,即默认所有的面都在外面
    if (t % THREADS_PER_LIGHT == 0)
    {
        gs_CullClipFaceMasks[intraGroupLightIndex] = 0;
    }

    // Bounding frustum.
    const float3 rbpC  = cullData.center.xyz;   // View-space
    const float3 rbpX  = cullData.boxAxisX.xyz; // Pre-scaled
    const float3 rbpY  = cullData.boxAxisY.xyz; // Pre-scaled
    const float3 rbpZ  = cullData.boxAxisZ.xyz; // Pre-scaled
    const float scale  = cullData.scaleXY;      // scale.x = scale.y
    // Bounding sphere.
    const float radius = cullData.radius;
    ...

    
}

2.计算各个AABB的顶点,并且把viewSpace的顶点转换到齐次坐标下,
判断顶点是否在视锥体外部。(0 <= x <= w, 0 <= y <= w, 0 <= z <= w)
若behindMask为0,则说明当前遍历的顶点在视锥体内部,需要更新当前AABB的ndcAaBbMinPt,ndcAaBbMaxPt
若behindMask不为0,则说明当前遍历的顶点在视锥体外面,需要记录点相关的面,留到后面计算相关的面与视锥体的交点,再更新AABB的ndcAaBbMinPt,ndcAaBbMaxPt.
遍历顶点结束后,通过原子操作InterlockedOr gs_CullClipFaceMasks,同步LDS,就得到了当前灯光所有在视锥体外面的顶点相关的面(需要进一步计算的所有的面)
需要留意intraGroupLightIndex 4个线程计算1盏灯,表示4个线程并行计算的灯光Index,这里的InterlockedOr四个线程同步的是相同的intraGroupLightIndex。

//VERTS_PER_THREAD=8/4=2,一个线程计算两个顶点,4个线程就能算8个顶点,也就是一盏灯
for (i = 0; i < VERTS_PER_THREAD; i++)
    {
        uint v = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT;

        // rbpVerts[0] = rbpC - rbpX * scale - rbpY * scale - rbpZ; (-s, -s, -1)
        // rbpVerts[1] = rbpC + rbpX * scale - rbpY * scale - rbpZ; (+s, -s, -1)
        // rbpVerts[2] = rbpC - rbpX * scale + rbpY * scale - rbpZ; (-s, +s, -1)
        // rbpVerts[3] = rbpC + rbpX * scale + rbpY * scale - rbpZ; (+s, +s, -1)
        // rbpVerts[4] = rbpC - rbpX         - rbpY         + rbpZ; (-1, -1, +1)
        // rbpVerts[5] = rbpC + rbpX         - rbpY         + rbpZ; (+1, -1, +1)
        // rbpVerts[6] = rbpC - rbpX         + rbpY         + rbpZ; (-1, +1, +1)
        // rbpVerts[7] = rbpC + rbpX         + rbpY         + rbpZ; (+1, +1, +1)

        float3 m = GenerateVertexOfStandardCube(v);
        m.xy *= ((v & 4) == 0) ? scale : 1; // X, Y in [-scale, scale]

        float3 rbpVertVS = rbpC + m.x * rbpX + m.y * rbpY + m.z * rbpZ;
        // Avoid generating (w = 0).
        rbpVertVS.z = (abs(rbpVertVS.z) > FLT_MIN) ? rbpVertVS.z : FLT_MIN;

        float4 hapVert = mul(projMat, float4(rbpVertVS, 1));

        // Warning: the W component may be negative.
        // Flipping the -W pyramid by negating all coordinates is incorrect
        // and will break both classification and clipping.
        // For the orthographic projection, (w = 1).

        // Transform the X and Y components: [-w, w] -> [0, w].
        hapVert.xy = 0.5 * hapVert.xy + (0.5 * hapVert.w);

        // For each vertex, we must determine whether it is within the bounds.
        // For culling and clipping, we must know, per culling plane, whether the vertex
        // is in the positive or the negative half-space.
        uint behindMask = 0; // Initially in front

        // Consider the vertex to be inside the view volume if:
        // 0 <= x <= w
        // 0 <= y <= w   <-- include boundary points to avoid clipping them later
        // 0 <= z <= w
        // w is always valid
        // TODO: epsilon for numerical robustness?
        
        //#define NUM_FACES         (6)  6/2 一个顶点相关面数为3.
        for (uint j = 0; j < (NUM_PLANES / 2); j++)
        {
            float w = hapVert.w;

            behindMask |= (hapVert[j] < 0 ? 1 : 0) << (2 * j + 0); // Planes crossing '0'
            behindMask |= (hapVert[j] > w ? 1 : 0) << (2 * j + 1); // Planes crossing 'w'
        }

        if (behindMask == 0) // Inside?
        {
            // Clamp to the bounds in case of numerical errors (may still generate -0).
            float3 rapVertNDC = saturate(hapVert.xyz * rcp(hapVert.w));

            ndcAaBbMinPt = min(ndcAaBbMinPt, float4(rapVertNDC, rbpVertVS.z));
            ndcAaBbMaxPt = max(ndcAaBbMaxPt, float4(rapVertNDC, rbpVertVS.z));
        }
        else // Outside
        {
            // Mark all the faces of the bounding frustum associated with this vertex.
            cullClipFaceMask |= GetFaceMaskOfVertex(v);
        }

        gs_HapVertsX[baseVertexOffset + v]          = hapVert.x;
        gs_HapVertsY[baseVertexOffset + v]          = hapVert.y;
        gs_HapVertsZ[baseVertexOffset + v]          = hapVert.z;
        gs_HapVertsW[baseVertexOffset + v]          = hapVert.w;
        gs_BehindMasksOfVerts[baseVertexOffset + v] = behindMask;
    }

    InterlockedOr(gs_CullClipFaceMasks[intraGroupLightIndex], cullClipFaceMask);
    GroupMemoryBarrierWithGroupSync();

测试视锥体八个顶点是否在灯光体积内

若上面经过线程同步过后的cullClipFaceMasks!=0,即存在顶点与灯光体积相交,就测试视锥体的八个顶点是否在灯光体积的AABB内
若在,则将当前测试的顶点也作为视锥体与灯光体积相交的顶点,并用于更新ndcAaBbMinPt,ndcAaBbMaxPt

    // (2) Test the corners of the view volume.
    if (cullClipFaceMask != 0)
    {
        //1.利用之前计算的视角空间的灯光体积center坐标以及轴向rbpX,rbpY,rbpZ(cullData.boxAxisX/Y/Z)重构灯光空间矩阵
        //2.GenerateVertexOfStandardCube的顶点坐标[-1,1],z需要*0.5+0.5转换到[0,1]这时候的顶点才算是Project Space下的视锥体的八个顶点
        //3.视锥体的八个顶点转换到灯光空间,需要先转到View Space,再转换到灯光空间矩阵

        // The light is partially outside the view volume.
        // Therefore, some of the corners of the view volume may be inside the light volume.
        // We perform aggressive culling, so we must make sure they are accounted for.
        // The light volume is a special type of cuboid - a right frustum.
        // We can exploit this fact by building a light-space projection matrix.
        // P_v = T * (R * S) * P_l
        // P_l = (R * S)^{-1} * T^{-1} * P_v
        float4x4 invTranslateToLightSpace      = Translation4x4(-rbpC);
        float4x4 invRotateAndScaleInLightSpace = Homogenize3x3(Invert3x3(ScaledRotation3x3(rbpX, rbpY, rbpZ)));
        // TODO: avoid full inversion by using unit vectors and passing magnitudes explicitly.

        // This (orthographic) projection matrix maps a view-space point to a light-space [-1, 1]^3 cube.
        float4x4 lightSpaceMatrix = mul(invRotateAndScaleInLightSpace, invTranslateToLightSpace);

        
        //GPULightType为Spot和ProjectorPyramid的LightSpace才是透视的,
        //其他光源(Point,Rectangle,Tube,ProjectorBox)Scale为1
        //只关心点光源流程可以不用看

        if (scale != 1) // Perspective light space?
        {
            //(bound.scaleXY = squeeze ? 0.01f : 1.0f;)s为0.01
            //e=-1.0202020f;也就是lightSpace的z从[-1, 1]变成[-2,0],把AABB原点往z轴负方向偏移
            //n=0.0202020f,f=0.9797979797f,g=0.9797979797f,a(aspect)=1
            //PerspectiveProjection4x4返回一个空间投影矩阵,把正交投影变成透视投影矩阵


            // Compute the parameters of the perspective projection.
            float s = scale;
            float e = -1 - 2 * (s * rcp(1 - s)); // Signed distance from the origin to the eye
            float n = -e - 1;                    // Distance from the eye to the near plane
            float f = -e + 1;                    // Distance from the eye to the far plane
            float g = f;                         // Distance from the eye to the projection plane

            float4x4 invTranslateEye = Translation4x4(float3(0, 0, -e));
            float4x4 perspProjMatrix = PerspectiveProjection4x4(1, g, n, f);

            lightSpaceMatrix = mul(mul(perspProjMatrix, invTranslateEye), lightSpaceMatrix);
        }

        for (i = 0; i < VERTS_PER_THREAD; i++)
        {
            uint v = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT;

            float3 rapVertCS = GenerateVertexOfStandardCube(v);
            rapVertCS.z = rapVertCS.z * 0.5 + 0.5; // View's projection matrix MUST map Z to [0, 1]

            float4 hbpVertVS = mul(invProjMat, float4(rapVertCS, 1)); // Clip to view space
            float4 hapVertLS = mul(lightSpaceMatrix, hbpVertVS);      // View to light space

            // Consider the vertex to be inside the light volume if:
            // -w < x < w
            // -w < y < w   <-- exclude boundary points, as we will not clip using these vertices
            // -w < z < w   <-- assume that Z-precision is not very important here
            // 0  < w
            // TODO: epsilon for numerical robustness?

            bool inside = Max3(abs(hapVertLS.x), abs(hapVertLS.y), abs(hapVertLS.z)) < hapVertLS.w;

            //若视锥体的八个顶点也在灯光体积内部,则将当前测试的顶点也作为视锥体与灯光体积相交的顶点,
            //并用于更新ndcAaBbMinPt,ndcAaBbMaxPt。
            if (inside)
            {
                float3 rapVertNDC = float3(rapVertCS.xy * 0.5 + 0.5, rapVertCS.z);
                float  rbpVertVSz = hbpVertVS.z * rcp(hbpVertVS.w);

                ndcAaBbMinPt = min(ndcAaBbMinPt, float4(rapVertNDC, rbpVertVSz));
                ndcAaBbMaxPt = max(ndcAaBbMaxPt, float4(rapVertNDC, rbpVertVSz));
            }
        }
    }
    
    InterlockedAnd(gs_CullClipFaceMasks[intraGroupLightIndex], cullClipFaceMask);

    GroupMemoryBarrierWithGroupSync();

    cullClipFaceMask = gs_CullClipFaceMasks[intraGroupLightIndex];

用CullClipFaceMasks计算相交的面

在前面第一步的时候LDS的gs_BehindMasksOfVerts记录了灯光体积顶点在视锥体的内外情况。
而CullClipFaceMask只是将在视锥体外面的顶点相关的面记录了下来,
在记录的面里面,可能含有四个顶点都在视锥体外部的面(用gs_BehindMasksOfVerts判断),这种面需要剔除掉。

//////////////////////////////////Tool Functions/////////////////////////////////////////////////////////
       // offset:提取位置的偏移量
       // numBits:提取Bit的数量
       //ex: data = 1111 1111 1111 0101 0101 0101 , offset = 12 , numBits = 12
       // mask = 1111 1111 1111
       // data >> 12 = 0000 0000 0000 1111 1111 1111
       // result =  1111 1111 1111

       // Unsigned integer bit field extraction.
       // Note that the intrinsic itself generates a vector instruction.
       // Wrap this function with WaveReadLaneFirst() to get scalar output.
       uint BitFieldExtract(uint data, uint offset, uint numBits)
       {
           uint mask = (1u << numBits) - 1u;
           return (data >> offset) & mask;
       }

       #define VERT_LIST_LEFT   ((4) << 9 | (6) << 6 | (2) << 3 | (0) << 0)
       #define VERT_LIST_RIGHT  ((3) << 9 | (7) << 6 | (5) << 3 | (1) << 0)
       #define VERT_LIST_BOTTOM ((1) << 9 | (5) << 6 | (4) << 3 | (0) << 0)
       #define VERT_LIST_TOP    ((6) << 9 | (7) << 6 | (3) << 3 | (2) << 0)
       #define VERT_LIST_FRONT  ((2) << 9 | (3) << 6 | (1) << 3 | (0) << 0)
       #define VERT_LIST_BACK   ((5) << 9 | (7) << 6 | (6) << 3 | (4) << 0)
       //allVertLists
       //VERT_LIST_RIGHT 右边面对应的顶点序列,有4个点,顺时针3 7 5 1
       //顶点序号最大为7,所以只需要3位就可以表示点的序号,所以一共需要3*4=12bit表示一个面的顶点序列
       
       //allVertLists[f >> 1]
       //f>>1 即f/2 选择allVertLists的xyz
       //left,right 0/2,1/2 => 0 0
       //top,bottom 2/2,3/2 => 1 1
       //back,front 4/2,5/2 => 2 2

       //12 * (f & 1)
       //f&1判断face的奇偶数,控制是否要偏移12bit
       //0&1=0 1&1=1
       //2&1=0 3&1=1
       //偶数需要offset,而奇数不需要offset直接拿就行.
       //比如top=>2 拿的是allVertLists[1]中的前面12bit,需要Offset 12bit
       //bottom=>3 奇数不需要Offset
       uint GetVertexListOfFace(uint f)
       {
           // Warning: don't add 'static' here unless you want really bad code gen.
           const uint3 allVertLists = uint3((VERT_LIST_RIGHT << 12) | VERT_LIST_LEFT,
                                           (VERT_LIST_TOP   << 12) | VERT_LIST_BOTTOM,
                                           (VERT_LIST_BACK  << 12) | VERT_LIST_FRONT);

           return BitFieldExtract(allVertLists[f >> 1], 12 * (f & 1), 12);
       }

       bool TryCullFace(uint f, uint baseOffsetVertex)
       {
           //FACE_MASK=>((1 << NUM_FACES) - 1) => (1<<6-1) => 111111
           uint cullMaskOfFace = FACE_MASK; // Initially behind
           uint vertListOfFace = GetVertexListOfFace(f);

           for (uint j = 0; j < 4; j++)
           {
               uint v = BitFieldExtract(vertListOfFace, 3 * j, 3);

               //BehindMask记录了对应点对于视锥体Volume的内外情况
               //11 11 11代表完全在Volume外面,00 00 00代表完全在Volume里面
               //如果有一个点是00 00 00则说明至少有一个点在Volume里面那这个面就不能被剔除掉即cullMaskOfFace != 0返回false
               //
               // Consider the vertex to be inside the view volume if:
               // 0 <= x <= w
               // 0 <= y <= w   <-- include boundary points to avoid clipping them later
               // 0 <= z <= w

               // Non-zero if ALL the vertices are behind any of the planes.
               cullMaskOfFace &= gs_BehindMasksOfVerts[baseOffsetVertex + v];
           }

           return (cullMaskOfFace != 0);
       }

       //减去前面n个bit==1的位之后,返回第一个bit==1的index
       //ex: value = 111100 n = 3
       // result = 5 
       //firstbitlow(111100) = 2
       uint NthBitLow(uint value, uint n)
       {
           uint b = -1; // Consistent with the behavior of firstbitlow()
           uint c = countbits(value);

           if (n < c) // Validate inputs
           {
               uint r = n + 1; // Compute the number of remaining bits

               do
               {
                   uint f = firstbitlow(value >> (b + 1)); // Find the next set bit
                   b += f + r; // Make a guess (assume all [b+f+1,b+f+r] bits are set)
                   c = countbits(value << (32 - (b + 1))); // Count the number of bits actually set
                   r = (n + 1) - c; // Compute the number of remaining bits
               }
               while (r > 0);
           }

           return b;
       }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////


       // (3) Cull the faces.
       {
           const uint cullFaceMask   = cullClipFaceMask;

           //countbits返回cullFaceMask有多少位为1,第一步时标记了几个面需要进行剔除处理
           const uint numFacesToCull = countbits(cullFaceMask); // [0, 6]

           //FACES_PER_THREAD 6/4 = 2 每个线程计算两个面
           for (i = 0; i < FACES_PER_THREAD; i++)
           {
               uint n = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT;

               if (n < numFacesToCull)
               {

                   //减去前面n个bit==1的位之后,返回第一个bit==1的index
                   //即f为cullFaceMask中所有bit==1的面序号
                   uint f = NthBitLow(cullFaceMask, n);

                   //若面相关的4个点都在视锥体内(cullMaskOfFace != 0),说明当前面和视锥体不存在相交,可以被剔除。
                   
                   //只要有一个点是在视锥体外(cullMaskOfFace == 0)而且之前第一步的计算被标记的面是存在点是在视锥体内部的,
                   //说明当前面不能被完全剔除,面所对应的Bit需要保留。
                   if (TryCullFace(f, baseVertexOffset))
                   {
                       cullClipFaceMask ^= 1 << f; // Clear the bit
                   }
               }
           }
       }

裁剪相交的面,并计算相交的顶点

上一步得到cullClipFaceMask即灯光体积和视锥体体积相交的面。那么就需要进一步计算面与面之间相交的顶点,然后再更新ndcAaBbMinPt,ndcAaBbMaxPt
这里跟上一步一样遍历cullClipFaceMask中标记的面


// (4) Clip the faces.
{
    const uint clipFaceMask   = cullClipFaceMask;
    const uint numFacesToClip = countbits(clipFaceMask); // [0, 6]

    for (i = 0; i < FACES_PER_THREAD; i++)
    {
        uint n = i * THREADS_PER_LIGHT + t % THREADS_PER_LIGHT;

        if (n < numFacesToClip)
        {
            uint f = NthBitLow(clipFaceMask, n);

            uint   srcBegin, srcSize;
            ClipFaceAgainstViewVolume(f, baseVertexOffset,
                                        srcBegin, srcSize, t);
            UpdateAaBb(srcBegin, srcSize, t, g_isOrthographic != 0, invProjMat,
                        ndcAaBbMinPt, ndcAaBbMaxPt);
        }
    }
}

ClipFaceAgainstViewVolume

RingBuffer

RingBufer是用来存储面被裁剪之后的顶点,两平面相交输入进RingBuffer除了原本的顶点之外,还会加入新的顶点,为了保证原本的求交过程正常进行,每个线程都需要独立的Buffer空间,所以Buffer长度为10*64


    // Clipping a plane by a cube may produce a hexagon (6-gon).
    // Clipping a hexagon by 4 planes may produce a decagon (10-gon).
    #define MAX_CLIP_VERTS    (10)
    #define THREADS_PER_GROUP (64)

    // ----------- Use LDS for the vertex ring buffer as otherwise on FXC we create register spilling

    groupshared float gs_VertexRingBufferX[MAX_CLIP_VERTS * THREADS_PER_GROUP];
    groupshared float gs_VertexRingBufferY[MAX_CLIP_VERTS * THREADS_PER_GROUP];
    groupshared float gs_VertexRingBufferZ[MAX_CLIP_VERTS * THREADS_PER_GROUP];
    groupshared float gs_VertexRingBufferW[MAX_CLIP_VERTS * THREADS_PER_GROUP];

    float4 GetFromRingBuffer(uint threadIdx, uint entry)
    {
        float4 outV;
        outV.x = gs_VertexRingBufferX[threadIdx * MAX_CLIP_VERTS + entry];
        outV.y = gs_VertexRingBufferY[threadIdx * MAX_CLIP_VERTS + entry];
        outV.z = gs_VertexRingBufferZ[threadIdx * MAX_CLIP_VERTS + entry];
        outV.w = gs_VertexRingBufferW[threadIdx * MAX_CLIP_VERTS + entry];
        return outV;
    }

    void WriteToRingBuffer(uint threadIdx, uint entry, float4 value)
    {
        gs_VertexRingBufferX[threadIdx * MAX_CLIP_VERTS + entry] = value.x;
        gs_VertexRingBufferY[threadIdx * MAX_CLIP_VERTS + entry] = value.y;
        gs_VertexRingBufferZ[threadIdx * MAX_CLIP_VERTS + entry] = value.z;
        gs_VertexRingBufferW[threadIdx * MAX_CLIP_VERTS + entry] = value.w;
    }

    ///////////////////////////////////////////////////////////////////////////////////////
RingBuffer初始化
    void ClipFaceAgainstViewVolume(uint f, uint baseVertexOffset,
                                out uint srcBegin, out uint srcSize,
                                uint threadIdx)
    {
        //RingBuffer存储当前面的所有顶点(包括裁剪产生的顶点,只需要记录Begin的Index和顶点数量就可以反复利用RingBuffer的空间)
        srcBegin = 0;
        srcSize  = 4;

        uint clipMaskOfFace = 0; // Initially in front
        //得到面相关的顶点列表
        uint vertListOfFace = GetVertexListOfFace(f);

        //先把第一步存放在gs_BehindMasksOfVerts中的顶点拿出来,然后放入到RingBuffer中。

        for (uint j = 0; j < 4; j++)
        {
            //提取面相关的顶点序号
            uint v = BitFieldExtract(vertListOfFace, 3 * j, 3);

            //gs_BehindMasksOfVerts记录了对应点对于视锥体六个面的内外情况,clipMaskOfFace则记录当前面对于视锥体六个面的相交的情况,
            //ex: 点a:000010 点b:000110 点c:000000 点d:000100
            //clipMaskOfFace:000110
            //clipMaskOfFace==0标志为当前面完全在视锥体内部

            // Non-zero if ANY of the vertices are behind any of the planes.
            clipMaskOfFace |= gs_BehindMasksOfVerts[baseVertexOffset + v];

            //写入RingBuffer中
            
            // Not all edges may require clipping. However, filtering the vertex list
            // is somewhat expensive, so we currently don't do it.
            WriteToRingBuffer(threadIdx, j, float4(gs_HapVertsX[baseVertexOffset + v], gs_HapVertsY[baseVertexOffset + v], gs_HapVertsZ[baseVertexOffset + v], gs_HapVertsW[baseVertexOffset + v]));
            //vertRingBuffer[j].x = gs_HapVertsX[baseVertexOffset + v];
            //vertRingBuffer[j].y = gs_HapVertsY[baseVertexOffset + v];
            //vertRingBuffer[j].z = gs_HapVertsZ[baseVertexOffset + v];
            //vertRingBuffer[j].w = gs_HapVertsW[baseVertexOffset + v];
        }
        
        
        // Sutherland-Hodgeman polygon clipping algorithm.
        // It works by clipping the entire polygon against one clipping plane at a time.
        while (clipMaskOfFace != 0)
        {
            //接下来就是遍历灯光体积与视锥体平面相交的平面
            uint p = firstbitlow(clipMaskOfFace);

            uint dstBegin, dstSize;
            ClipPolygonAgainstPlane(p, srcBegin, srcSize, threadIdx, dstBegin, dstSize);

            srcBegin = dstBegin;
            srcSize  = dstSize;

            clipMaskOfFace ^= 1 << p; // Clear the bit to continue using firstbitlow()
        }
    }

AABB平面与视锥体平面求交(ClipPolygonAgainstPlane)

这一步就是AABB平面与视锥体平面求交并把求交后的顶点加入到RingBuffer中

    struct ClipVertex
    {
        float4 pt; // Homogeneous coordinate after perspective
        float  bc; // Boundary coordinate with respect to the plane 'p'
    };

    ClipVertex CreateClipVertex(uint p, float4 v)
    {
        bool evenPlane = (p & 1) == 0;

        //不同Clip Plane对应的轴分量:0>>1=0 1>>1=0  2>>1=1 3>>1=1 ....
        //01=>0 [left/right:x]
        //23=>1 [bottom/top:y]
        //45=>2 [front/back:z]

        float c = v[p >> 1];
        float w = v.w;

        ClipVertex cv;

        cv.pt = v;

        //clip space:[0,w]在视锥体内 所以视锥体的ClipPlane的left,bottom,front Face被视作为对应轴上的原点
        //0 2 4:c   left=0      bottom=0    front=0
        //1 3 5:w-c right=w     top=w       back=w
        
        //left_plane:0-----right_plane:w-----v:c
        
        //bc为当前点到视锥体平面的投影距离

        cv.bc = evenPlane ? c : w - c; // dot(PlaneEquation, HapVertex);

        return cv;
    }

    float4 IntersectEdgeAgainstPlane(ClipVertex v0, ClipVertex v1)
    {
        //用bc(点到视锥体平面的投影距离)插值求得中间的顶点位置。
        float alpha = saturate(v0.bc * rcp(v0.bc - v1.bc)); // Guaranteed to lie between 0 and 1

        return lerp(v0.pt, v1.pt, alpha);
    }

    void ClipPolygonAgainstPlane(uint p, uint srcBegin, uint srcSize,
                                uint threadIdx,
                                out uint dstBegin, out uint dstSize)
    {
        //滑动窗口
        //dstBegin标记下一次平面判交的开始Index.
        //dstSize记录平面需要判交顶点的数量
        dstBegin = srcBegin + srcSize; // Start at the end; we don't use modular arithmetic here
        dstSize  = 0;

        ClipVertex tailVert = CreateClipVertex(p, GetFromRingBuffer(threadIdx, (srcBegin + srcSize - 1) % MAX_CLIP_VERTS));

        //防止越界
        uint modSrcIdx = srcBegin % MAX_CLIP_VERTS;
        uint modDstIdx = dstBegin % MAX_CLIP_VERTS;


        //遍历RingBuffer读取放进去的顶点,并转换成ClipVertex
        for (uint j = srcBegin; j < (srcBegin + srcSize); j++)
        {
            float4 v = GetFromRingBuffer(threadIdx, modSrcIdx);
            ClipVertex leadVert = CreateClipVertex(p, v);

            // Execute Blinn's line clipping algorithm.
            // Classify the line segment. 4 cases:
            // 0. v0 out, v1 out -> add nothing
            // 1. v0 in,  v1 out -> add intersection
            // 2. v0 out, v1 in  -> add intersection, add v1
            // 3. v0 in,  v1 in  -> add v1
            // (bc >= 0) <-> in, (bc < 0) <-> out. Beware of -0.

            //bc>=0即点到视锥体平面的投影距离大于0,在平面正侧.
            //有In有Out,判定当前点为相交点,求交点并放入RingBuffer,留到下一次其他平面与RingBuffer进行求交
            if ((tailVert.bc >= 0) != (leadVert.bc >= 0))
            {
                // The line segment is guaranteed to cross the plane.
                float4 clipVert = IntersectEdgeAgainstPlane(tailVert, leadVert);

                WriteToRingBuffer(threadIdx, modDstIdx, clipVert);

                dstSize++;
                modDstIdx++;
                modDstIdx = (modDstIdx == MAX_CLIP_VERTS) ? 0 : modDstIdx;
            }

            //在平面正侧的点依旧放入RingBuffer,留到下一次其他平面与RingBuffer进行求交
            if (leadVert.bc >= 0)
            {

                WriteToRingBuffer(threadIdx, modDstIdx, leadVert.pt);

                dstSize++;
                modDstIdx++;
                modDstIdx = (modDstIdx == MAX_CLIP_VERTS) ? 0 : modDstIdx;
            }


            modSrcIdx++;
            modSrcIdx = (modSrcIdx == MAX_CLIP_VERTS) ? 0 : modSrcIdx;

            tailVert = leadVert; // Avoid recomputation and overwriting the vertex in the ring buffer
        }
    }

UpdateAaBb

用上一步计算求交得到的RingBuffer更新ndcAaBbMaxPt/ndcAaBbMinPt

    void UpdateAaBb(uint srcBegin, uint srcSize, uint threadIdx,
                    bool isOrthoProj, float4x4 invProjMat,
                    inout float4 ndcAaBbMinPt, inout float4 ndcAaBbMaxPt)
    {
        //滑动窗口遍历RingBuffer
        uint modSrcIdx = srcBegin % MAX_CLIP_VERTS;
        
        for (uint j = srcBegin; j < (srcBegin + srcSize); j++)
        {

            float4 hapVert = GetFromRingBuffer(threadIdx, modSrcIdx);

            //透除转换到NDC
            // Clamp to the bounds in case of numerical errors (may still generate -0).
            float3 rapVertNDC = saturate(hapVert.xyz * rcp(hapVert.w));
            float  rbpVertVSz = hapVert.w;

            //正交的时候w=1,需要逆变换回去
            if (isOrthoProj) // Must replace (w = 1)
            {
                rbpVertVSz = dot(invProjMat[2], hapVert);
            }

            //更新ndcAaBbMaxPt/ndcAaBbMinPt
            ndcAaBbMinPt = min(ndcAaBbMinPt, float4(rapVertNDC, rbpVertVSz));
            ndcAaBbMaxPt = max(ndcAaBbMaxPt, float4(rapVertNDC, rbpVertVSz));

            modSrcIdx++;
            modSrcIdx = (modSrcIdx == MAX_CLIP_VERTS) ? 0 : modSrcIdx;
        }
    }

用BoundingSphere计算NDC上的RectMin/RectMax

这里需要求得BoundingSphere在XOZ/YOZ投影平面上过原点(CameraPosition)的切线OB和OD
通过
\(cross(OB' , OC') = |OB'| * |OC'| * Sin[a']\)
\(OB' . OC' = |OB'| *|OC'|* Cos[a'].\)

解得:
\(b.z * c.x - b.x * c.z = |OB'| * |OC'| * Sin[a']\)
\(b.x * c.x + b.z * c.z = |OB'| * |OC'| * Cos[a']\)

而实际上不需要求得B的坐标只需要求得x/z(y/z)的比值因为透视投影矩阵只是对xy轴做缩放,转换到齐次坐标还要做透除(除以Z)
//https://www.zhihu.com/question/289794588/answer/466643632

所以等式也就可以化成
\(令(z=t*b.z,x=t*b.x,t=|OC'|/|OB'|)\)
\(z * c.x - x * c.z = |OC'|^3 * Sin[a']\)
\(x * c.x + z * c.z = |OC'|^3 * Cos[a']\)
\(x = -c.z * r + c.x * |OB'|\)
\(z = c.x * r + c.z * |OB'|\)

\(cross(OD' , OC') = |OD'| * |OC'| * Sin[a']\)
\(OD' . OD' = |OD'| *|OC'|* Cos[a'].\)

同理得:
\(x = c.z * r + c.x * |OB'|\)
\(z = -c.x * r + c.z * |OB'|\)

    float2 ComputeBoundsOfSphereOnProjectivePlane(float3 C, float r, float projScale, float projOffset)
    {
        float xMin, xMax;

        // See sec. 8.2.1 of https://foundationsofgameenginedev.com/#fged2 for an alternative derivation.
        // Goal: find the planes that pass through the origin O, bound the sphere, and form
        // an axis-aligned rectangle at the intersection with the projection plane.
        // Solution (for the X-coordinate):
        // The intersection of the bounding planes and the projection plane must be vertical lines,
        // which means that the bounding planes must be tangent to the Y-axis.
        // The bounding planes must be also tangent to the sphere.
        // Call the intersection points of the two vertical bounding planes and the bounding
        // sphere B and D. Assume that B is on the left of C; D is on the right of C.
        // Note that C may be behind the origin, so the same generally goes for B and D.
        // BC is normal w.r.t. the bounding plane, so it is normal w.r.t. the Y-axis; |BC| = r.
        // As a consequence, it lies in a plane parallel to the the O-X-Z plane.
        // Consider B'C', which is an orthogonal projection of BC onto the actual O-X-Z plane.
        // (Imagine sliding the sphere up or down between the bounding planes).
        // We then consider a triangle OB'C' that lies entirely in the O-X-Z plane.
        // The coordinates are: OB' = (b.x, 0, b.z), OC' = (c.x, 0, c.z).
        float3 B, D;
        // OBC is a right triangle. So is OB'C'.
        // |BC| = |B'C'| = r.
        // |OB'|^2 = |OC'|^2 - |B'C'|^2.
        float lenSqOC_ = math.dot(C.xz, C.xz);
        float lenSqOB_ = lenSqOC_ - r * r;
        // If |OB'| = 0 or |OC'| = 0, the bounding planes tangent to the sphere do not exist.
        if (lenSqOB_ > 0)
        {
            float lenOB_ = math.sqrt(lenSqOB_);


            // |OB' x OC'| = |OB'| * |OC'| * Sin[a'].
            //  OB' . OC'  = |OB'| * |OC'| * Cos[a'].
            // We can determine Sin[a'] = |B'C'| / |OC'| = R / |OC'|.
            // Cos[a'] = Sqrt[1 - Sin[a']^2].
            // (OB' x OC') points along Y.
            // (OB' x OC').y = b.z * c.x - b.x * c.z.
            // Therefore,  b.z * c.x - b.x * c.z = |OB'| * |OC'| * Sin[a'].
            // OB' . OC' = b.x * c.x + b.z * c.z = |OB'| * |OC'| * Cos[a'].
            // Since we don't care about the scale, and |OB'| != 0 and |OC'| != 0,
            // we can equivalently solve
            // z * c.x - x * c.z = |OC'|^3 * Sin[a'].
            // x * c.x + z * c.z = |OC'|^3 * Cos[a'].
            // With 2 equations and 2 unknowns, we can easily solve this linear system.
            // The solutions is
            // x = -c.z * r + c.x * |OB'|.
            // z =  c.x * r + c.z * |OB'|.
            B.x = C.x * lenOB_ - (C.z * r);
            B.z = C.z * lenOB_ + (C.x * r);
            // (OD' x OC') points along Y.
            // (OD' x OC').y = d.z * c.x - d.x * c.z.
            // We must solve
            // z * c.x - x * c.z = -|OC'|^3 * Sin[a'].
            // x * c.x + z * c.z =  |OC'|^3 * Cos[a'].
            // The solution is
            // x =  c.z * r + c.x * |OB'|.
            // z = -c.x * r + c.z * |OB'|.
            D.x = C.x * lenOB_ + (C.z * r);
            D.z = C.z * lenOB_ - (C.x * r);
            // We can transform OB and OD as direction vectors.
            // For the simplification below, see OptimizeProjectionMatrix.

            float rapBx = (B.x * math.rcp(B.z)) * projScale + projOffset;
            float rapDx = (D.x * math.rcp(D.z)) * projScale + projOffset;
            // One problem with the above is that this direction may, for certain spheres,
            // point behind the origin (B.z <= 0 or D.z <= 0).
            // At this point we know that the sphere at least *partially* in front of the origin,
            // and that it is we are not inside the sphere, so there is at least one valid
            // plane (and one valid direction). We just need the second direction to go "in front"
            // of the first one to extend the bounding box.
            xMin = (B.z > 0) ? rapBx : -(float) 0x7F800000;
            xMax = (D.z > 0) ? rapDx : (float) 0x7F800000;
        }
        else
        {
            // Conservative estimate (we do not cull the bounding sphere using the view frustum).
            xMin = -1;
            xMax = 1;
        }

        return new float2(xMin, xMax);
    }
    
    // (5) Compute the AABB of the bounding sphere.
    if (radius > 0)
    {
        // Occasionally, an intersection of AABBs of a bounding sphere and a bounding frustum
        // results in a tighter AABB when compared to using the AABB of the frustum alone.
        // That is the case (mostly) for sphere-capped spot lights with very wide angles.
        // Note that, unfortunately, it is not quite as tight as an AABB of a CSG intersection
        // of a sphere and frustum. Also note that the algorithm below doesn't clip the bounding
        // sphere against the view frustum before computing the bounding box, simply because it is
        // too hard/expensive. I will leave it as a TODO in case someone wants to tackle this problem.
        if ((rbpC.z + radius) > 0) // Is the sphere at least *partially* in front of the origin?
        {
            ndcAaBbMinPt.w = max(ndcAaBbMinPt.w, rbpC.z - radius);
            ndcAaBbMaxPt.w = min(ndcAaBbMaxPt.w, rbpC.z + radius);
            // Computing the 'z' component for an arbitrary projection matrix is hard, so we don't do it.
            // See sec. 8.2.2 of https://foundationsofgameenginedev.com/#fged2 for a solution.

            float2 rectMin, rectMax;

            // For the 'x' and 'y' components, the solution is given below.
            //如果是正交投影
            if (g_isOrthographic)
            {
                // Compute the center and the extents (half-diagonal) of the bounding box.
                float2 center  = mul(projMat, float4(rbpC.xyz,     1)).xy;
                float2 extents = mul(projMat, float4(radius.xx, 0, 0)).xy;

                rectMin = center - extents;
                rectMax = center + extents;
            }
            else // Perspective
            {
                //ComputeBoundsOfSphereOnProjectivePlane函数中只用xz分量
                float2 xBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.xxz, radius, projMat._m00, projMat._m02); // X-Z plane
                float2 yBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.yyz, radius, projMat._m11, projMat._m12); // Y-Z plane

                rectMin = float2(xBounds.r, yBounds.r);
                rectMax = float2(xBounds.g, yBounds.g);
            }


            // Transform to the NDC coordinates.
            rectMin = rectMin * 0.5 + 0.5;
            rectMax = rectMax * 0.5 + 0.5;

            // Note: separating the X- and Y-computations across 2 threads is not worth it.
            ndcAaBbMinPt.xy = max(ndcAaBbMinPt.xy, rectMin);
            ndcAaBbMaxPt.xy = min(ndcAaBbMaxPt.xy, rectMax);
        }
    }

计算出最终的ScrBound(RectMin,RectMax)

最后将计算好的ndcAaBbMinPt以及ndcAaBbMaxPt存放到g_vBoundsBuffer里面,Scrbound的流程就结束了。
正常来说,eyeIndex在没开VR的时候为0,开了之后eyeIndex=()
所以g_vBoundsBuffer的布局就是[light0.min,light1.min.....][light0.max,light1.max.....]

    // The returned values are used to index into our AABB screen space bounding box buffer
    // Usually named g_vBoundsBuffer.  The two values represent the min/max indices.
    ScreenSpaceBoundsIndices GenerateScreenSpaceBoundsIndices(uint lightIndex, uint numVisibleLights, uint eyeIndex)
    {
        // In the monoscopic mode, there is one set of bounds (min,max -> 2 * g_iNrVisibLights)
        // In stereo, there are two sets of bounds (leftMin, leftMax, rightMin, rightMax -> 4 * g_iNrVisibLights)
        const uint eyeRelativeBase = eyeIndex * 2 * numVisibleLights;

        ScreenSpaceBoundsIndices indices;
        indices.min = eyeRelativeBase + lightIndex;
        indices.max = indices.min + numVisibleLights;

        return indices;
    }

    if ((globalLightIndex < (uint)g_iNrVisibLights) && (t % THREADS_PER_LIGHT == 0)) // Avoid bank conflicts
    {
        // For stereo, we have two sets of lights. Therefore, each eye has a set of mins
        // followed by a set of maxs, and each set is equal to g_iNrVisibLights.
        const ScreenSpaceBoundsIndices eyeAdjustedOutputOffsets = GenerateScreenSpaceBoundsIndices(globalLightIndex, g_iNrVisibLights, eyeIndex);

        g_vBoundsBuffer[eyeAdjustedOutputOffsets.min] = ndcAaBbMinPt;
        g_vBoundsBuffer[eyeAdjustedOutputOffsets.max] = ndcAaBbMaxPt;
    }
posted @ 2024-03-30 01:58  凶恶的真实  阅读(230)  评论(0编辑  收藏  举报