PBRT笔记(3)——KD树

茎节点与叶子节点

茎节点与叶子节点皆适用KdAccelNode来表示
注意：这里使用了匿名union

union有个特性:内部类型共用一段内存，且大小为内部最大类型的大小。

struct KdAccelNode {
<KdAccelNode Methods>
    union {
        Float split; // Interior
        int onePrimitive; // Leaf
        int primitiveIndicesOffset; // Leaf
    };
    union {
        int flags; // Both
        int nPrims; // Leaf
        int aboveChild; // Interior
    };
};

使用8字节存储叶子节点（2个union各4byte），其中flags中的低二位2bit用于区分分割方向以及叶子节点，nPrims的高30位存储图元存储数。InitInterior函数也是差不多的操作。
primNums数组存储了图元容器的index，

void KdAccelNode::InitLeaf(int *primNums, int np,
                           std::vector<int> *primitiveIndices) {
    //标记为叶子
    flags = 3;
    //np为图元个数，这样做可以保证上一步flags=3时的存储的数据不会覆盖掉
    nPrims |= (np << 2);
    //存储图元index，如果是0~1个，使用onePrimitive来存储
    if (np == 0)
        onePrimitive = 0;
    else if (np == 1)
        onePrimitive = primNums[0];
    else {
    //多个图元的情况下，使用存储index数组的偏移值，之后存储每个图元index
        primitiveIndicesOffset = primitiveIndices->size();
        for (int i = 0; i < np; ++i) primitiveIndices->push_back(primNums[i]);
    }
}

构建树

KdTreeAccel::KdTreeAccel(std::vector<std::shared_ptr<Primitive>> p,
                         int isectCost, int traversalCost, Float emptyBonus,
                         int maxPrims, int maxDepth)
    : isectCost(isectCost),
      traversalCost(traversalCost),
      maxPrims(maxPrims),
      emptyBonus(emptyBonus),
      primitives(std::move(p)) {
    //nextFreeNode为下一个可用节点的index，nAllocedNodes为分配空间的节点总数
    //将两者设为0，可以让系统在初始化树中的第一个节点时分配内存而不是在启动就分配内存。
    //这两个变量用于判断在此次递归中是否需要申请内存块
    ProfilePhase _(Prof::AccelConstruction);
    nextFreeNode = nAllocedNodes = 0;
    //计算深度的经验公式:8 + 1.3 log(N)
    if (maxDepth <= 0)
        maxDepth = std::round(8 + 1.3f * Log2Int(int64_t(primitives.size())));

    //计算kd树结构的边界盒，以及创建各图元边界盒数组
    std::vector<Bounds3f> primBounds;
    primBounds.reserve(primitives.size());
    for (const std::shared_ptr<Primitive> &prim : primitives) {
        Bounds3f b = prim->WorldBound();
        bounds = Union(bounds, b);
        primBounds.push_back(b);
    }

    //分配内存的相应变量
    
    //edges用于存储x，y，z三个方向上的所有可能切割图元位置
    //因为边界盒有min、max值，所有这里的需要图元个数*2
    std::unique_ptr<BoundEdge[]> edges[3];
    for (int i = 0; i < 3; ++i)
        edges[i].reset(new BoundEdge[2 * primitives.size()]);
    //以最坏打算确定prim1的空间
    std::unique_ptr<int[]> prims0(new int[primitives.size()]);
    std::unique_ptr<int[]> prims1(new int[(maxDepth + 1) * primitives.size()]);

    //primNums 为与当前节点重叠的图元index数组
    std::unique_ptr<int[]> primNums(new int[primitives.size()]);
    for (size_t i = 0; i < primitives.size(); ++i) primNums[i] = i;

    //开始递归构建kd树
    buildTree(0, bounds, primBounds, primNums.get(), primitives.size(),
              maxDepth, edges, prims0.get(), prims1.get());
}

nodeNum为创建节点的偏移量（primNums中的index）

void KdTreeAccel::buildTree(int nodeNum, const Bounds3f &nodeBounds,
                            const std::vector<Bounds3f> &allPrimBounds,
                            int *primNums, int nPrimitives, int depth,
                            const std::unique_ptr<BoundEdge[]> edges[3],
                            int *prims0, int *prims1, int badRefines) {
    CHECK_EQ(nodeNum, nextFreeNode);
    //当分配的内存使用完时，分配下一块内存
    if (nextFreeNode == nAllocedNodes) {
        int nNewAllocNodes = std::max(2 * nAllocedNodes, 512);
        KdAccelNode *n = AllocAligned<KdAccelNode>(nNewAllocNodes);
        if (nAllocedNodes > 0) {
            memcpy(n, nodes, nAllocedNodes * sizeof(KdAccelNode));
            FreeAligned(nodes);
        }
        nodes = n;
        nAllocedNodes = nNewAllocNodes;
    }
    ++nextFreeNode;

    //当需要分割的图元数足够小或是树的深度达到一定次数则生成叶子节点停止递归
    if (nPrimitives <= maxPrims || depth == 0) {
        nodes[nodeNum].InitLeaf(primNums, nPrimitives, &primitiveIndices);
        return;
    }

    //茎节点需要通过分割来生成，这里采用了类似之前生成BVH的SAH算法

    int bestAxis = -1, bestOffset = -1;
    Float bestCost = Infinity;
    Float oldCost = isectCost * Float(nPrimitives);
    Float totalSA = nodeBounds.SurfaceArea();
    Float invTotalSA = 1 / totalSA;
    Vector3f d = nodeBounds.pMax - nodeBounds.pMin;

    //选择一个用于分割图元的轴向
    //优先选择具有最大空间性的轴向
    int axis = nodeBounds.MaximumExtent();
    int retries = 0;
    //goto语句跳转用
retrySplit:

    //初始化对应edges二维数组中对应轴的数组
    //通过图元数找到对应的index，在通过index获得边界盒
    for (int i = 0; i < nPrimitives; ++i) {
        int pn = primNums[i];
        const Bounds3f &bounds = allPrimBounds[pn];
        edges[axis][2 * i] = BoundEdge(bounds.pMin[axis], pn, true);
        edges[axis][2 * i + 1] = BoundEdge(bounds.pMax[axis], pn, false);
    }

    //以切割位置为准进行升序排序，如果位置相同，以类型start为先
    std::sort(&edges[axis][0], &edges[axis][2 * nPrimitives],
              [](const BoundEdge &e0, const BoundEdge &e1) -> bool {
                  if (e0.t == e1.t)
                      return (int)e0.type < (int)e1.type;
                  else
                      return e0.t < e1.t;
              });

    //遍历所有分割位置来计算最佳分割点
    int nBelow = 0, nAbove = nPrimitives;
    for (int i = 0; i < 2 * nPrimitives; ++i) {
        if (edges[axis][i].type == EdgeType::End) --nAbove;
        //取得分割位置
        Float edgeT = edges[axis][i].t;
        //判断当前分割点是否在节点的边界盒内
        if (edgeT > nodeBounds.pMin[axis] && edgeT < nodeBounds.pMax[axis]) {
            // 取得另两个轴，用于计算分割后2个边界盒的面积，之后再用公式计算消耗
            int otherAxis0 = (axis + 1) % 3, otherAxis1 = (axis + 2) % 3;
            Float belowSA = 2 * (d[otherAxis0] * d[otherAxis1] +
                                 (edgeT - nodeBounds.pMin[axis]) *
                                     (d[otherAxis0] + d[otherAxis1]));
            Float aboveSA = 2 * (d[otherAxis0] * d[otherAxis1] +
                                 (nodeBounds.pMax[axis] - edgeT) *
                                     (d[otherAxis0] + d[otherAxis1]));
            Float pBelow = belowSA * invTotalSA;
            Float pAbove = aboveSA * invTotalSA;
            Float eb = (nAbove == 0 || nBelow == 0) ? emptyBonus : 0;
            Float cost =
                traversalCost +
                isectCost * (1 - eb) * (pBelow * nBelow + pAbove * nAbove);

            //如果消耗比之前的变量小，则更新对应的变量
            if (cost < bestCost) {
                bestCost = cost;
                bestAxis = axis;
                bestOffset = i;
            }
        }
        if (edges[axis][i].type == EdgeType::Start) ++nBelow;
    }
    CHECK(nBelow == nPrimitives && nAbove == 0);

    //如果没有找到最优切割位置，则切换轴向继续寻找
    if (bestAxis == -1 && retries < 2) {
        ++retries;
        axis = (axis + 1) % 3;
        goto retrySplit;
    }
    if (bestCost > oldCost) ++badRefines;
    //消耗大于一定数量 && 图元数小于一定量 或者无法找到最优切割位置，则直接生成叶子节点，结束递归
    //无法找到最优切割位置参考书295页的图，即所有图元都重叠在一起
    //当然可能出现好几次循环也找不到最佳结果的情况，所以系统给予4次机会，让其继续寻找
    if ((bestCost > 4 * oldCost && nPrimitives < 16) || bestAxis == -1 ||
        badRefines == 3) {
        nodes[nodeNum].InitLeaf(primNums, nPrimitives, &primitiveIndices);
        return;
    }

    //将分割的图元分组，用于下一次递归
    int n0 = 0, n1 = 0;
    for (int i = 0; i < bestOffset; ++i)
        if (edges[bestAxis][i].type == EdgeType::Start)
            prims0[n0++] = edges[bestAxis][i].primNum;
    for (int i = bestOffset + 1; i < 2 * nPrimitives; ++i)
        if (edges[bestAxis][i].type == EdgeType::End)
            prims1[n1++] = edges[bestAxis][i].primNum;

    //进行下一次递归
    Float tSplit = edges[bestAxis][bestOffset].t;
    Bounds3f bounds0 = nodeBounds, bounds1 = nodeBounds;
    //以切割位置为准，重新计算两个孩子节点的边界盒
    bounds0.pMax[bestAxis] = bounds1.pMin[bestAxis] = tSplit;
    buildTree(nodeNum + 1, bounds0, allPrimBounds, prims0, n0, depth - 1, edges,
              prims0, prims1 + nPrimitives, badRefines);
    int aboveChild = nextFreeNode;
    nodes[nodeNum].InitInterior(bestAxis, aboveChild, tSplit);
    buildTree(aboveChild, bounds1, allPrimBounds, prims1, n1, depth - 1, edges,
              prims0, prims1 + nPrimitives, badRefines);
}


bool KdTreeAccel::Intersect(const Ray &ray, SurfaceInteraction *isect) const {
    ProfilePhase p(Prof::AccelIntersect);
    Float tMin, tMax;
    if (!bounds.IntersectP(ray, &tMin, &tMax)) {
        return false;
    }

    Vector3f invDir(1 / ray.d.x, 1 / ray.d.y, 1 / ray.d.z);
    PBRT_CONSTEXPR int maxTodo = 64;
    //KdToDo记录尚未被处理的节点
    KdToDo todo[maxTodo];
    int todoPos = 0;
    bool hit = false;
    const KdAccelNode *node = &nodes[0];
    while (node != nullptr) {
        if (ray.tMax < tMin) break;
        //如果不是叶子节点
        if (!node->IsLeaf()) {

            //首先与茎节点的分割平面做相交测试，以此来确定光线与节点的相交顺序
            int axis = node->SplitAxis();
            //射线的对应分量上的t值
            Float tPlane = (node->SplitPos() - ray.o[axis]) * invDir[axis];

            //判断先后顺序取得两个节点
            const KdAccelNode *firstChild, *secondChild;
            int belowFirst =
                (ray.o[axis] < node->SplitPos()) ||
                (ray.o[axis] == node->SplitPos() && ray.d[axis] <= 0);
            if (belowFirst) {
                firstChild = node + 1;
                secondChild = &nodes[node->AboveChild()];
            } else {
                firstChild = &nodes[node->AboveChild()];
                secondChild = node + 1;
            }

            //t大于max或者为负数代表了射线不会第二个节点相交
            if (tPlane > tMax || tPlane <= 0)
                node = firstChild;
            //小于tmin不会与第一个节点相交
            else if (tPlane < tMin)
                node = secondChild;
            //两个节点都相交
            else {
                //将第二个节点放入处理队列
                todo[todoPos].node = secondChild;
                todo[todoPos].tMin = tPlane;
                todo[todoPos].tMax = tMax;
                ++todoPos;
                node = firstChild;
                tMax = tPlane;
            }
        } else {
            //叶子节点则对内部图元进行相交测试
            int nPrimitives = node->nPrimitives();
            if (nPrimitives == 1) {
                const std::shared_ptr<Primitive> &p =
                    primitives[node->onePrimitive];
                // Check one primitive inside leaf node
                if (p->Intersect(ray, isect)) hit = true;
            } else {
                for (int i = 0; i < nPrimitives; ++i) {
                    int index =
                        primitiveIndices[node->primitiveIndicesOffset + i];
                    const std::shared_ptr<Primitive> &p = primitives[index];
                    // Check one primitive inside leaf node
                    if (p->Intersect(ray, isect)) hit = true;
                }
            }

            //更新下一次遍历区域的min与max值
            if (todoPos > 0) {
                --todoPos;
                node = todo[todoPos].node;
                tMin = todo[todoPos].tMin;
                tMax = todo[todoPos].tMax;
            } else
                break;
        }
    }
    return hit;
}

结尾

综上所述，KD树的分割（每个轴遍历所有分割方案）与BVH（SAH）分割（每个轴12次分割）更加精确，这样是的KD树的求交效率要比BVH好，也导致了渲染时需要花费比BVH更多的时间在构建KD树上。同时KD树在求交时会计算与光线与分割面的相交位置是否在[min,max]中来判断是否需要与茎节点中的两个节点一一求交，以此来减少求交计算量。而BVH通过深度优先遍历树进行求交的。

posted @ 2018-11-07 22:50 湛蓝玫瑰阅读(732) 评论(0) 编辑收藏举报

刷新页面返回顶部

湛蓝玫瑰

PBRT笔记(3)——KD树

茎节点与叶子节点

构建树

结尾

公告