in libkineto/src/RoctracerActivityApi.cpp [266:436]
void RoctracerActivityApi::api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
{
RoctracerActivityApi *dis = &singleton();
if (domain == ACTIVITY_DOMAIN_HIP_API && dis->loggedIds_.contains(cid)) {
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
// Pack callbacks into row structures
static timespec timestamp; // FIXME verify thread safety
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
clock_gettime(CLOCK_MONOTONIC, ×tamp); // record proper clock
}
else { // (data->phase == ACTIVITY_API_PHASE_EXIT)
timespec endTime;
timespec startTime { timestamp };
clock_gettime(CLOCK_MONOTONIC, &endTime); // record proper clock
switch (cid) {
case HIP_API_ID_hipLaunchKernel:
case HIP_API_ID_hipExtLaunchKernel:
case HIP_API_ID_hipLaunchCooperativeKernel: // Should work here
{
auto &args = data->args.hipLaunchKernel;
dis->kernelRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
args.function_address,
nullptr,
args.numBlocks.x,
args.numBlocks.y,
args.numBlocks.z,
args.dimBlocks.x,
args.dimBlocks.y,
args.dimBlocks.z,
args.sharedMemBytes,
args.stream
);
}
break;
case HIP_API_ID_hipHccModuleLaunchKernel:
case HIP_API_ID_hipModuleLaunchKernel:
case HIP_API_ID_hipExtModuleLaunchKernel:
{
auto &args = data->args.hipModuleLaunchKernel;
dis->kernelRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
nullptr,
args.f,
args.gridDimX,
args.gridDimY,
args.gridDimZ,
args.blockDimX,
args.blockDimY,
args.blockDimZ,
args.sharedMemBytes,
args.stream
);
}
break;
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
#if 0
{
auto &args = data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val;
dis->kernelRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
args.function_address,
nullptr,
args.numBlocks.x,
args.numBlocks.y,
args.numBlocks.z,
args.dimBlocks.x,
args.dimBlocks.y,
args.dimBlocks.z,
args.sharedMemBytes,
args.stream
);
}
#endif
break;
case HIP_API_ID_hipMalloc:
dis->mallocRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
data->args.hipMalloc.ptr__val,
data->args.hipMalloc.size
);
break;
case HIP_API_ID_hipFree:
dis->mallocRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
data->args.hipFree.ptr,
0
);
break;
case HIP_API_ID_hipMemcpy:
{
auto &args = data->args.hipMemcpy;
dis->copyRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
args.src,
args.dst,
args.sizeBytes,
args.kind,
static_cast<hipStream_t>(0) // use placeholder?
);
}
break;
case HIP_API_ID_hipMemcpyAsync:
case HIP_API_ID_hipMemcpyWithStream:
{
auto &args = data->args.hipMemcpyAsync;
dis->copyRows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime),
args.src,
args.dst,
args.sizeBytes,
args.kind,
args.stream
);
}
break;
default:
dis->rows_.emplace_back(data->correlation_id,
domain,
cid,
processId(),
systemThreadId(),
timespec_to_ns(startTime),
timespec_to_ns(endTime)
);
break;
}
}
}
}