in cli/configcheck.py [0:0]
def _get_workload_containers_on_node(node_name: str) -> dict[str, str]:
"""Returns containers actively running on a node which request GPUs."""
config.load_kube_config()
v1 = client.CoreV1Api()
pods = v1.list_pod_for_all_namespaces(
field_selector=f'spec.nodeName={node_name}'
).items
pod_to_container_names = dict()
for pod in pods:
pod_name = pod.metadata.name
for container_status in pod.status.container_statuses:
if container_status.state.running:
# Get the container spec to access the resource requests
for container_spec in pod.spec.containers:
if (
container_spec.name == container_status.name
and container_spec.resources.requests
and int(
container_spec.resources.requests.get('nvidia.com/gpu', 0)
)
> 0
):
pod_to_container_names[pod_name] = container_spec.name
break
return pod_to_container_names