void transformHierarchyCameraToWorld()

in libraries/hvvr/raycaster/render.cpp [282:365]


void transformHierarchyCameraToWorld(const Frustum* tilesSrc,
                                     const Frustum* blocksSrc,
                                     Frustum* tilesDst,
                                     Frustum* blocksDst,
                                     const matrix4x4 cameraToWorld,
                                     uint32_t blockCount,
                                     Camera_StreamedData* streamed,
                                     Plane cullPlanes[4],
                                     ThreadPool& threadPool) {
    SimpleRayFrustum* simpleTileFrusta = streamed->tileFrusta3D.dataHost();

    auto blockTransformTask = [&](uint32_t startBlock, uint32_t endBlock) -> void {
        assert((_mm_getcsr() & 0x8040) == 0x8040); // make sure denormals are being treated as zero
        for (uint32_t blockIndex = startBlock; blockIndex < endBlock; blockIndex++) {
            blocksDst[blockIndex] = frustumTransform(blocksSrc[blockIndex], cameraToWorld);

            uint32_t startTile = blockIndex * TILES_PER_BLOCK;
            uint32_t endTile = startTile + TILES_PER_BLOCK;
            for (uint32_t tileIndex = startTile; tileIndex < endTile; tileIndex++) {
                Frustum& tileDst = tilesDst[tileIndex];
                tileDst = frustumTransform(tilesSrc[tileIndex], cameraToWorld);
                SimpleRayFrustum& simpleFrustum = simpleTileFrusta[tileIndex];
                for (int n = 0; n < Frustum::pointCount; n++) {
                    simpleFrustum.origins[n] = tileDst.pointOrigin[n];
                    simpleFrustum.directions[n] = tileDst.pointDir[n];
                }
            }
        }
    };

    auto blockTransformAndCullTask = [&](uint32_t startBlock, uint32_t endBlock) -> void {
        assert((_mm_getcsr() & 0x8040) == 0x8040); // make sure denormals are being treated as zero
        for (uint32_t blockIndex = startBlock; blockIndex < endBlock; blockIndex++) {
            blocksDst[blockIndex] = frustumTransform(blocksSrc[blockIndex], cameraToWorld);

            uint32_t startTile = blockIndex * TILES_PER_BLOCK;
            uint32_t endTile = startTile + TILES_PER_BLOCK;
            for (uint32_t tileIndex = startTile; tileIndex < endTile; tileIndex++) {
                Frustum& tileDst = tilesDst[tileIndex];
                tileDst = frustumTransform(tilesSrc[tileIndex], cameraToWorld);
                SimpleRayFrustum& simpleFrustum = simpleTileFrusta[tileIndex];
                for (int n = 0; n < Frustum::pointCount; n++) {
                    simpleFrustum.origins[n] = tileDst.pointOrigin[n];
                    simpleFrustum.directions[n] = tileDst.pointDir[n];
                }
                bool culled = false;
                for (int i = 0; i < 4; ++i) {
                    culled = culled || planeCullsFrustum(cullPlanes[i], simpleFrustum);
                }
                if (culled) {
                    for (int i = 0; i < 4; ++i) {
                        // Signal degenerate frustum
                        simpleFrustum.origins[i] = vector3(INFINITY, INFINITY, INFINITY);
                        simpleFrustum.directions[i] = vector3(0, 0, 0);
                    }
                    tileDst = Frustum(simpleFrustum.origins, simpleFrustum.directions);
                }
            }
        }
    };


    enum { maxTasks = 4096 };
    enum { blocksPerThread = 16 };
    uint32_t numTasks = (blockCount + blocksPerThread - 1) / blocksPerThread;
    assert(numTasks <= maxTasks);
    numTasks = min<uint32_t>(maxTasks, numTasks);


    bool mustCull = !isinf(cullPlanes[0].dist);
    std::future<void> taskResults[maxTasks];
    for (uint32_t i = 0; i < numTasks; ++i) {
        uint32_t startBlock = min(blockCount, i * blocksPerThread);
        uint32_t endBlock = min(blockCount, (i + 1) * blocksPerThread);
        if (mustCull) {
            taskResults[i] = threadPool.addTask(blockTransformAndCullTask, startBlock, endBlock);
        } else {
            taskResults[i] = threadPool.addTask(blockTransformTask, startBlock, endBlock);
        }
    }
    for (uint32_t i = 0; i < numTasks; ++i) {
        taskResults[i].get();
    }
};