in src/sagemaker_tensorflow_container/training.py [0:0]
def _wait_until_master_is_down(master):
while True:
try:
subprocess.check_call(
["curl", "{}:2222".format(master)], stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
logger.info("master {} is still up, waiting for it to exit".format(master))
time.sleep(10)
except subprocess.CalledProcessError:
logger.info("master {} is down, stopping parameter server".format(master))
return