in libkineto/src/CuptiNvPerfMetric.cpp [370:467]
CuptiProfilerResult evalMetricValues(
const std::string& chipName,
const std::vector<uint8_t>& counterDataImage,
const std::vector<std::string>& metricNames,
bool verbose) {
if (!counterDataImage.size()) {
LOG(ERROR) << "Counter Data Image is empty!";
return {};
}
NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE, nullptr};
metricsContextCreateParams.pChipName = chipName.c_str();
if (!NVPW_CALL(
NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams))) {
return {};
}
NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE, nullptr};
metricsContextDestroyParams.pMetricsContext =
metricsContextCreateParams.pMetricsContext;
SCOPE_EXIT([&]() {
NVPW_MetricsContext_Destroy(
(NVPW_MetricsContext_Destroy_Params*)&metricsContextDestroyParams);
});
NVPW_CounterData_GetNumRanges_Params getNumRangesParams = {
NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE, nullptr};
getNumRangesParams.pCounterDataImage = counterDataImage.data();
if (!NVPW_CALL(
NVPW_CounterData_GetNumRanges(&getNumRangesParams))) {
return {};
}
// TBD in the future support special chars in metric name
// for now these are default
const bool isolated = true;
// API takes a 2D array of chars
std::vector<const char*> metricNamePtrs;
for (const auto& metric : metricNames) {
metricNamePtrs.push_back(metric.c_str());
}
CuptiProfilerResult result{
.metricNames = metricNames};
for (size_t rangeIndex = 0; rangeIndex < getNumRangesParams.numRanges;
++rangeIndex) {
CuptiRangeMeasurement rangeData {
.rangeName = getRangeDescription(counterDataImage, rangeIndex)};
rangeData.values.resize(metricNames.size());
// First set Counter data image with current range
NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = {
NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE, nullptr};
setCounterDataParams.pMetricsContext =
metricsContextCreateParams.pMetricsContext;
setCounterDataParams.pCounterDataImage = counterDataImage.data();
setCounterDataParams.isolated = isolated;
setCounterDataParams.rangeIndex = rangeIndex;
NVPW_CALL(NVPW_MetricsContext_SetCounterData(&setCounterDataParams));
// Now we can evaluate GPU metrics
NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = {
NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE, nullptr};
evalToGpuParams.pMetricsContext =
metricsContextCreateParams.pMetricsContext;
evalToGpuParams.numMetrics = metricNamePtrs.size();
evalToGpuParams.ppMetricNames = metricNamePtrs.data();
evalToGpuParams.pMetricValues = rangeData.values.data();
if (!NVPW_CALL(NVPW_MetricsContext_EvaluateToGpuValues(&evalToGpuParams))) {
LOG(WARNING) << "Failed to evaluate metris for range : "
<< rangeData.rangeName;
continue;
}
if (verbose) {
for (size_t i = 0; i < metricNames.size(); i++) {
LOG(INFO) << "rangeName: " << rangeData.rangeName
<< "\tmetricName: " << metricNames[i]
<< "\tgpuValue: " << rangeData.values[i];
}
}
result.rangeVals.emplace_back(std::move(rangeData));
}
return result;
}