def _get_workload_containers_on_node()

in cli/configcheck.py [0:0]


def _get_workload_containers_on_node(node_name: str) -> dict[str, str]:
  """Returns containers actively running on a node which request GPUs."""
  config.load_kube_config()
  v1 = client.CoreV1Api()

  pods = v1.list_pod_for_all_namespaces(
      field_selector=f'spec.nodeName={node_name}'
  ).items

  pod_to_container_names = dict()
  for pod in pods:
    pod_name = pod.metadata.name
    for container_status in pod.status.container_statuses:
      if container_status.state.running:
        # Get the container spec to access the resource requests
        for container_spec in pod.spec.containers:
          if (
              container_spec.name == container_status.name
              and container_spec.resources.requests
              and int(
                  container_spec.resources.requests.get('nvidia.com/gpu', 0)
              )
              > 0
          ):
            pod_to_container_names[pod_name] = container_spec.name
            break

  return pod_to_container_names