def find_schedulable_pods()

in gpudirect-tcpxo/topology-scheduler/schedule-daemon.py [0:0]


def find_schedulable_pods(pods, gate_name):
  """Finds pods that can be scheduled."""
  pods_to_schedule = {}

  for pod in pods:
    if pod.spec.scheduling_gates:
      gates = pod.spec.scheduling_gates
      for gate in gates:
        if gate.name == gate_name:
          pod_name = pod.metadata.name
          pod_namespace = pod.metadata.namespace

          pod_index = None
          job_name = None
          if pod.metadata.labels is not None:
            if (
                'batch.kubernetes.io/job-completion-index'
                in pod.metadata.labels
            ):
              pod_index = pod.metadata.labels[
                  'batch.kubernetes.io/job-completion-index'
              ]
            else:
              print('Unable to find index in metadata. Can not queue jobs')

            if 'job-name' in pod.metadata.labels:
              job_name = pod.metadata.labels['job-name']
            else:
              print('Unable to find job_name in metadata. Can not queue jobs')
          else:
            print('No labels on pod to extract job metadata from.')

          creation_time = None
          if pod.metadata.creation_timestamp is not None:
            creation_time = pod.metadata.creation_timestamp
          else:
            print(
                'Unable to find creation_time in metadata. Can not queue jobs'
            )

          used_cpu = 0
          used_memory = 0
          used_gpu = 0

          for container in pod.spec.containers:
            requests = container.resources.requests or {}
            used_cpu += parse_quantity(requests.get('cpu', 0))
            used_memory += parse_quantity(requests.get('memory', 0))
            used_gpu += int(requests.get('nvidia.com/gpu', 0))

          pods_to_schedule[pod_name] = {
              'name': pod_name,
              'namespace': pod_namespace,
              'index': pod_index,
              'cpu': used_cpu,
              'memory': used_memory,
              'gpu': used_gpu,
              'node_selector': pod.spec.node_selector,
              'spec': pod.spec,
              'metadata': pod.metadata,
              'job_name': job_name,
              'creation_time': creation_time
          }

          print(
              f'Found schedulable pod: {pod_namespace}/{pod_name}, CPU:'
              f' {used_cpu}, Memory: {used_memory}, GPU: {used_gpu}'
              f' Index: {pod_index}'
          )

  return pods_to_schedule