def parse_nccl_result()

in src/nccl_healthcheck/nccl_startup.py [0:0]


def parse_nccl_result(test_result: str) -> NcclResults:
  """Parse the NCCL test result for message size and bandwidth.

  Args:
    test_result (str): The test result to parse.

  Returns:
    NcclResults: The parsed NCCL test results.
  """
  lines = test_result.splitlines()
  results = []
  # Iterate through the data lines
  for line in lines:
    line = line.strip()
    if not line or line.startswith("#"):
      # Skip empty lines and comments
      continue
    chunks = line.split()
    if len(chunks) != _NCCL_RESULT_LENGTH:
      continue
    if chunks[_NCCL_RESULT_TYPE_INDEX] != "float":
      # If the type isn't float then this is not a valid line
      continue
    size = chunks[_NCCL_RESULT_MESSAGE_SIZE_INDEX]
    bandwidth_label = MESSAGE_SIZE_TO_BANDWIDTH_LABEL.get(size, None)
    latency_label = MESSAGE_SIZE_TO_LATENCY_LABEL.get(size, None)
    if bandwidth_label is None and latency_label is None:
      continue
    # In-place bandwidth is in float format, convert it to int since our logic
    # currently assumes ints
    result = NcclResult(
        message_size=size,
        in_place_bw=int(float(chunks[_NCCL_RESULT_IN_PLACE_BW_INDEX])),
        in_place_time=int(float(chunks[_NCCL_RESULTS_IN_PLACE_TIME_INDEX])),
    )
    results.append(result)

  # Extract the average bus bandwidth from the test result
  match = re.search(r"# Avg bus bandwidth\s*:\s*(\d+)", test_result)
  if match:
    bandwidth = int(match.group(1))
    print(f"Found bandwidth: {bandwidth}")
    success = True
  else:
    bandwidth = _NO_BANDWIDTH_VALUE
    success = False

  nccl_results = NcclResults(
      avg_bandwidth=bandwidth,
      results=results,
      success=success,
  )
  print(f"results: {results}")
  return nccl_results