def determine_test_iterations()

in src/health_runner/health_runner.py [0:0]


def determine_test_iterations(num_nodes: int | None = None) -> int:
  """Determine the number of tests to run.

  This function will calculate the number of tests to deploy for a given test
  run. The behavior is determined by three environment variables:
  * BLAST_MODE_ENABLED - If set to "true", more than one test can be deployed.
  By default this will fill the cluster and run a test on every compatible node.
  * BLAST_MODE_NUM_TESTS_LIMIT - If set to an integer, places a limit on the
  number of tests that will be deployed.
  * NODES_CHECKED_PER_TEST -  This should be set to the number of discrete nodes
  that will be consumed in the YAML_PATH yaml.
  Used to calculate the number of tests to deploy. (Defaults to 1)

  Args:
    num_nodes: The number of nodes in the cluster. If not provided, it will be
      determined automatically.

  Returns:
    Integer of the number of tests that will be deployed.
  """
  is_blast_mode = str(os.environ.get("BLAST_MODE_ENABLED")).lower() in [
      "true",
      "1",
  ]

  if is_blast_mode:
    logging.info("Running blast mode")

    get_nodes_output = checker_common.run_command(
        _K_NUM_GPU_NODES_IN_CLUSTER_COMMAND
    )
    num_nodes = num_nodes if num_nodes else int(get_nodes_output.stdout)
    nodes_per_test = int(os.environ.get("NODES_CHECKED_PER_TEST", "1"))
    if num_nodes % nodes_per_test != 0:
      logging.warning(
          "Not all nodes can be checked. %d are present on the"
          " cluster. %d will be checked per test."
          " %d node(s) will be unchecked.",
          num_nodes,
          nodes_per_test,
          num_nodes % nodes_per_test,
      )
    max_num_tests = num_nodes // nodes_per_test

    manual_limit_str = os.environ.get("BLAST_MODE_NUM_TESTS_LIMIT")
    if manual_limit_str is not None:
      return min(int(manual_limit_str), max_num_tests)
    else:
      return max_num_tests

  else:
    logging.info("Running single test mode")
    return 1