in smallpond/execution/scheduler.py [0:0]
def update_state(self, current_epoch: int) -> bool:
num_missed_probes = current_epoch - self.last_acked_probe.epoch
if self.state == ExecutorState.STOPPED:
return False
elif num_missed_probes > self.ctx.max_num_missed_probes:
if self.state != ExecutorState.FAIL:
self.state = ExecutorState.FAIL
logger.error(f"find failed executor: {self}, missed probes: {num_missed_probes}, current epoch: {current_epoch}")
return True
elif self.state == ExecutorState.STOPPING:
if self.stop_request_acked:
self.state = ExecutorState.STOPPED
logger.info(f"find stopped executor: {self}")
return True
elif self.stop_request_sent:
if self.state != ExecutorState.STOPPING:
self.state = ExecutorState.STOPPING
return True
elif self.last_acked_probe.resource_low:
if self.state != ExecutorState.RESOURCE_LOW:
self.state = ExecutorState.RESOURCE_LOW
logger.warning(f"find low-resource executor: {self}")
return True
elif self.last_acked_probe.status == WorkStatus.SUCCEED:
if self.state != ExecutorState.GOOD:
self.state = ExecutorState.GOOD
logger.info(f"find working executor: {self}")
return True
return False