in dora/executor.py [0:0]
def __exit__(self, exc_type, exc_value, traceback):
if exc_value is not None:
log(f"An exception happened while starting workers {exc_value}")
self.failed = True
try:
while self.children and not self.failed:
for child in list(self.children):
try:
exitcode = child.wait(0.05)
except sp.TimeoutExpired:
continue
else:
self.children.remove(child)
if exitcode:
log(f"Worker {child.rank} died, killing all workers")
self.failed = True
except KeyboardInterrupt:
log("Received keyboard interrupt, trying to kill all workers.")
self.failed = True
for child in self.children:
child.terminate()
if not self.failed:
log("All workers completed successfully")