in cluster-trace-gpu-v2020/simulator/cluster.py [0:0]
def tic_job(self, delta=1):
# Unlike tic_svc(), it receives simulator's cur_time as its own cur_time
# Here it returns a "cur_time" value to the simulator
# If succeed: return cur_time >= 0
# Else: return cur_time < 0 ==> exit_flag = 1
self.cur_time += delta
if self.export_cluster_util and self.cur_time % 10000 == 0:
self.record_cluster_util()
self.retrieve_job_from_full_list() # update self.job_list
job_runn_list = self.job_runn_list
if len(job_runn_list) > 0:
for job in job_runn_list:
job['on_time'] += delta
job['progress'] = job['on_time'] * job['num_gpu']
# Job done logic
if job['on_time'] >= job['duration']:
over_tic_time = job['on_time'] - job['duration'] # only if delta > 1
job['on_time'] -= over_tic_time
job['progress'] -= over_tic_time * job['num_gpu']
job['done'] = 1
host_node_id = job['node']
host_node = self.node_dict.get(host_node_id)
suc = host_node.release_job(job=job)
assert suc
job['jct'] = self.cur_time - over_tic_time - job['submit_time'] # deduct submit_time
self.job_history.add_done_job(job)
print_fn("%sDONE: %s || %s" % (self.log_prefix, _repr_job_done(job), job))
return self.cur_time # exit_flag = 0, still going
# len(job_runn_list) <= 0,
elif len(self.job_list) > 0: # empty cluster with job pending
self.idle_cluster_counter += 1
print_fn("%sIDLE cluster until jobs: %s" % (self.log_prefix, [_repr_job_preempt(e) for e in self.job_list]))
if self.idle_cluster_counter % 10000 == 0:
print_fn('{} idle cluster: {}'.format(self.idle_cluster_counter, [_repr_job_preempt(e) for e in self.job_list]), level=2)
return self.cur_time # exit_flag = 0, still going
elif len(self.job_full_list) > 0: # i.e., empty cluster waiting for jobs to come
wake_time = self.job_full_list[-1]['submit_time'] - delta # the submit_time of the earliest job
assert self.cur_time <= wake_time # if ==, i.e., the stride is unnecessary
self.cur_time = wake_time
return self.cur_time # exit_flag = 0, still going
else: # no running job, no pending job, no coming job => exit.
return -1 # exit