in libraries/hvvr/raycaster/traversal.cpp [213:330]
uint64_t Raycaster::buildTileTriangleLists(const RayHierarchy& rayHierarchy, Camera_StreamedData* streamed) {
const BVHNode* nodes = _nodes.data();
ArrayView<uint32_t> triIndices(streamed->triIndices.dataHost(), streamed->triIndices.size());
#if DEBUG_STATS
std::vector<double> blockFrustaAngle(rayHierarchy.blockFrusta.size());
for (int i = 0; i < rayHierarchy.blockFrusta.size(); ++i) {
blockFrustaAngle[i] = solidAngle(rayHierarchy.blockFrusta[i]);
}
size_t validBlocks;
vector4 m4X = minMaxMeanMedian(blockFrustaAngle, validBlocks);
printf("Block: Min,Max,Mean,Median: %g, %g, %g, %g\n", m4X.x, m4X.y, m4X.z, m4X.w);
printf("Percent of sphere covered by block frusta: %g\n", 100.0 * m4X.z * validBlocks / (4 * M_PI));
std::vector<double> tileFrustaAngle(rayHierarchy.tileFrusta.size());
for (int i = 0; i < rayHierarchy.tileFrusta.size(); ++i) {
// Convert to square degrees from steradians
tileFrustaAngle[i] = solidAngle(rayHierarchy.tileFrusta[i]);
}
size_t validTiles;
m4X = minMaxMeanMedian(tileFrustaAngle, validTiles);
printf("Tile: Min,Max,Mean,Median: %g, %g, %g, %g\n", m4X.x, m4X.y, m4X.z, m4X.w);
printf("Percent of sphere covered by tile frusta: %g\n", 100.0 * m4X.z * validTiles / (4 * M_PI));
#endif
#if DEBUG_STATS || TIME_BLOCK_CULL
Timer timer;
#endif
#if VERIFY_TRAVERSAL
impBlockStackSum = 0;
refBlockStackSum = 0;
impTileTriSum = 0;
refTileTriSum = 0;
#endif
enum { maxTasks = 4096 };
// workload per task
// 1 is too low, and incurs overhead from switching tasks too frequently inside the thread pool
// 2 seems the fastest (though this could vary depending on the scene and sample distribution)
// 3+ seems to become less efficient due to workload balancing
enum { blocksPerThread = 2 };
uint32_t blockCount = uint32_t(rayHierarchy.blockFrusta.size());
uint32_t numTasks = (blockCount + blocksPerThread - 1) / blocksPerThread;
assert(numTasks <= maxTasks);
numTasks = min<uint32_t>(maxTasks, numTasks);
uint32_t triSpacePerThread = uint32_t(triIndices.size() / numTasks);
assert(triSpacePerThread * numTasks <= triIndices.size());
std::future<void> taskResults[maxTasks];
static TaskData taskData[maxTasks];
for (uint32_t i = 0; i < numTasks; ++i) {
uint32_t startTriIndex = i * triSpacePerThread;
uint32_t endTriIndex = startTriIndex + triSpacePerThread;
taskData[i].reset(triIndices, startTriIndex, endTriIndex);
uint32_t startBlock = min(blockCount, i * blocksPerThread);
uint32_t endBlock = min(blockCount, (i + 1) * blocksPerThread);
if (i == numTasks - 1)
assert(endBlock == blockCount);
taskResults[i] = _threadPool->addTask(cullThread, rayHierarchy, startBlock, endBlock, nodes, &taskData[i]);
}
uint64_t triIndexCount = 0;
uint32_t maxTaskTriCount = 0;
uint32_t tileTriOffsetsStreamed = 0;
uint32_t* streamTileIndexRemapEmpty = streamed->tileIndexRemapEmpty.dataHost();
uint32_t* streamTileIndexRemapOccupied = streamed->tileIndexRemapOccupied.dataHost();
TileTriRange* streamTileTriRanges = streamed->tileTriRanges.dataHost();
for (uint32_t taskIndex = 0; taskIndex < numTasks; taskIndex++) {
taskResults[taskIndex].get();
const TaskData& task = taskData[taskIndex];
for (auto emptyTileIndex : task.tileIndexRemapEmpty) {
streamTileIndexRemapEmpty[streamed->tileCountEmpty++] = emptyTileIndex;
}
for (auto occupiedTileIndex : task.tileIndexRemapOccupied) {
streamTileIndexRemapOccupied[streamed->tileCountOccupied++] = occupiedTileIndex;
}
// tileTriRange within the task is relative to the task's smaller view of the buffer and needs to be offset
uint32_t taskTriOffset = uint32_t(task.triIndices.data() - streamed->triIndices.dataHost());
for (auto tileTriRange : task.tileTriRanges) {
TileTriRange triRangeGlobal;
triRangeGlobal.start = taskTriOffset + tileTriRange.start;
triRangeGlobal.end = taskTriOffset + tileTriRange.end;
streamTileTriRanges[tileTriOffsetsStreamed++] = triRangeGlobal;
}
triIndexCount += task.triIndexCount;
maxTaskTriCount = max(maxTaskTriCount, task.triIndexCount);
}
(void)maxTaskTriCount;
#if DEBUG_STATS || TIME_BLOCK_CULL
double deltaTime = timer.get();
static double minDeltaTime = DBL_MAX;
minDeltaTime = min(minDeltaTime, deltaTime);
static uint64_t frameIndex = 0;
enum { profileFrameSkip = 64 };
if (frameIndex % profileFrameSkip == 0) {
printf("---- Block cull time: %.2fms, min %.2fms\n", deltaTime * 1000.0, minDeltaTime * 1000.0);
}
frameIndex++;
#endif
#if DEBUG_STATS
printf("Total Triangle Idx Count %u\n", triIndexCount);
printf("Max Triangle Idx Count Per Task %u\n", maxTaskTriCount);
#endif
#if VERIFY_TRAVERSAL
printf("Traversal imp/ref blockStackSum, tileTriSum: %d/%d, %d/%d\n",
int(impBlockStackSum), int(refBlockStackSum),
int(impTileTriSum), int(refTileTriSum));
#endif
return triIndexCount;
}