in libraries/hvvr/raycaster/traversal.cpp [99:211]
static void cullThread(const RayHierarchy& rayHierarchy,
uint32_t startBlock,
uint32_t endBlock,
const BVHNode* nodes,
TaskData* perThread) {
#if DEBUG_STATS
auto startTime = (double)__rdtsc();
#endif
if (startBlock == endBlock) {
return;
}
perThread->triIndexCount = 0;
BlockFrame blockFrame;
for (uint32_t b = startBlock; b < endBlock; ++b) {
const Frustum& blockFrustum = rayHierarchy.blockFrusta[b];
uint32_t stackSize = traverseBlocks(blockFrame, nodes, blockFrustum);
#if VERIFY_TRAVERSAL
traverse::ref::BlockFrame refBlockFrame;
traverse::ref::Frustum refBlockFrustum(blockFrustum.pointOrigin, blockFrustum.pointDir);
uint32_t refStackSize = traverse::ref::traverseBlocks(refBlockFrame, nodes, refBlockFrustum);
impBlockStackSum += stackSize;
refBlockStackSum += refStackSize;
traverse::ref::TileFrame refTileFrameInit(refBlockFrame, refStackSize);
#endif
if (!stackSize) { // we hit nothing?
for (unsigned i = 0; i < TILES_PER_BLOCK; ++i) {
auto globalTileIndex = b * TILES_PER_BLOCK + i;
perThread->tileIndexRemapEmpty.push_back(globalTileIndex);
}
continue;
}
#if TRAVERSAL_MODE == TRAVERSAL_REF
TileFrame tileFrameInit(blockFrame, stackSize);
#elif TRAVERSAL_MODE == TRAVERSAL_AVX
blockFrame.sort(stackSize);
#else
# error unknown traversal mode
#endif
auto i = TILES_PER_BLOCK;
do {
auto tileIndex = (TILES_PER_BLOCK - i);
auto globalTileIndex = b * TILES_PER_BLOCK + tileIndex;
#if TRAVERSAL_MODE == TRAVERSAL_REF
TileFrame tileFrame(tileFrameInit, stackSize);
#elif TRAVERSAL_MODE == TRAVERSAL_AVX
TileFrame tileFrame;
for (uint32_t slot = 0; slot != stackSize; ++slot)
store(&(tileFrame.stack + slot)->tMin, load_m128(&(blockFrame.sortedStack + slot)->tMin));
#else
# error unknown traversal mode
#endif
uint32_t* triIndices = perThread->triIndices.data() + perThread->triIndexCount;
uint32_t maxTriCount = uint32_t(perThread->triIndices.size()) - perThread->triIndexCount;
const Frustum& tileFrustum = rayHierarchy.tileFrusta[globalTileIndex];
uint32_t outputTriCount = traverseTiles(triIndices, maxTriCount, tileFrame, stackSize, tileFrustum);
#if VERIFY_TRAVERSAL
traverse::ref::TileFrame refTileFrame(refTileFrameInit, stackSize);
traverse::ref::Frustum refTileFrustum(tileFrustum.pointOrigin, tileFrustum.pointDir);
uint32_t refOutputTriCount = traverse::ref::traverseTiles(
triIndices, maxTriCount, refTileFrame, stackSize, refTileFrustum);
impTileTriSum += outputTriCount;
refTileTriSum += refOutputTriCount;
// we just overwrote the triangle buffer with our verification run...
// so we need to use the verification triangle count to avoid artifacts
outputTriCount = refOutputTriCount;
#endif
if (outputTriCount) {
TileTriRange triRange;
triRange.start = perThread->triIndexCount;
triRange.end = triRange.start + outputTriCount;
perThread->tileTriRanges.push_back(triRange);
perThread->tileIndexRemapOccupied.push_back(globalTileIndex);
perThread->triIndexCount += outputTriCount;
} else {
perThread->tileIndexRemapEmpty.push_back(globalTileIndex);
}
} while (--i);
}
#if DEBUG_STATS
// double deltaTimeMs = ((double)__rdtsc() - startTime) * whunt::gRcpCPUFrequency * 1000.0;
std::vector<double> blockFrustaAngle(endBlock - startBlock);
std::vector<double> tileFrustaAngle((endBlock - startBlock) * TILES_PER_BLOCK);
for (size_t b = startBlock; b < endBlock; ++b) {
blockFrustaAngle[b - startBlock] = solidAngle(rayHierarchy.blockFrusta[b]);
for (size_t t = 0; t < TILES_PER_BLOCK; ++t) {
tileFrustaAngle[t] = solidAngle(rayHierarchy.tileFrusta[b * TILES_PER_BLOCK + t]);
}
}
size_t validBlocks, validTiles;
vector4 m4Block = minMaxMeanMedian(blockFrustaAngle, validBlocks);
vector4 m4Tile = minMaxMeanMedian(tileFrustaAngle, validTiles);
// printf("---- Block cull [%u,%u) solid angle: time %f, %u triangles, %g percent coverage\n", startBlock,
// endBlock, deltaTimeMs, currentTriIdx, 100.0*m4X.z*validBlocks / (4 * M_PI));
printf("%u, %u, %g, %g\n", startBlock, perThread->triIndexCount, 100.0 * m4Block.z * validBlocks / (4 * M_PI),
100.0 * m4Tile.z * validTiles / (4 * M_PI));
#endif
}