in src/gpu_healthcheck/gpu_healthcheck.py [0:0]
def convert_output_to_proto(output: str) -> dcgm_pb2.DiagnosticReport:
"""Converts the output of the DCGM diagnostic tool to a proto message."""
json_data = json.loads(output)
report = dcgm_pb2.DiagnosticReport()
report.version = json_data.get("version", "")
report.driver_version_detected = json_data.get("Driver Version Detected", "")
report.gpu_device_ids.extend(json_data.get("GPU Device IDs", []))
for gpu_index, serial in json_data.get("GPU Device Serials", {}).items():
report.gpu_device_serials[gpu_index] = serial
for category_data in json_data.get("DCGM GPU Diagnostic", {}).get(
"test_categories", []
):
category = report.dcgm_gpu_diagnostic.test_categories.add()
category.category = category_data["category"]
for test_data in category_data.get("tests", []):
test = category.tests.add()
test.name = test_data["name"]
for result_data in test_data.get("results", []):
result = test.results.add()
result.status = result_data["status"]
if "gpu_id" in result_data:
result.gpu_id = result_data["gpu_id"]
if "info" in result_data:
result.info = result_data["info"]
if "warnings" in result_data:
for warning_data in result_data["warnings"]:
warning = result.warnings.add()
warning.error_category = warning_data["error_category"]
warning.error_id = warning_data["error_id"]
warning.error_severity = warning_data["error_severity"]
warning.warning = warning_data["warning"]
return report