in gpudirect-tcpxo/topology-scheduler/schedule-daemon.py [0:0]
def schedule_pod_with_gate(v1, pods, gate):
pods_to_schedule = find_schedulable_pods(pods, gate)
nodes = v1.list_node().items
print(f'Pods to schedule: {len(pods_to_schedule)}')
jobs = split_pods_based_on_jobs(pods_to_schedule.values())
sorted_jobs = sorted(jobs, key=sort_jobs_by_time)
for job in sorted_jobs:
job_name = job[0].get('job_name')
creation_time = job[0].get('creation_time')
print(f'Attempting to schedule job: {job_name} created: {creation_time}')
tolerated_taints = get_pods_taint_toleration(job)
nodes_to_schedule = find_schedulable_nodes(nodes, pods, tolerated_taints)
sorted_pods = sorted(job, key=pod_sorting_key)
sorted_nodes = sorted(nodes_to_schedule.values(), key=node_topology_key)
print(f'Nodes to schedule: {len(nodes_to_schedule)}')
best_assignment = calculate_pods_assignment(sorted_nodes, sorted_pods)
if not best_assignment:
print(
f'No scheduling for job: {job_name} with gate {gate} has been found.'
' Skipping job.'
)
continue
else:
print(f'Assignment found, scheduling {job_name} with {len(jobs)} pods.')
for i in range(0, len(sorted_pods)):
pod = sorted_pods[i]
node = sorted_nodes[best_assignment[i]]
schedule_pod_on_node(
v1, pod['name'], pod['namespace'], node, gate
)