build_obelics/03_parallel_dl_images_create_dataset.py (31 lines of code) (raw):
import os
import signal
import subprocess
import sys
import numpy as np
idx_machine = int(sys.argv[1])
IDX_REMAINING = [idx for idx in range(200)]
NUM_MACHINES = 21
IDX = [el.tolist() for el in np.array_split(IDX_REMAINING, NUM_MACHINES)][idx_machine]
PATH_LOG = "/scratch/log.txt"
for idx in IDX:
f = open(PATH_LOG, "a")
f.write(f"Starting job {idx}\n")
f.close()
os.system("sudo truncate -s 0 /var/log/syslog")
p = subprocess.Popen(
f"python3 m4/sourcing/data_collection/callers/dl_images_create_dataset.py {idx} --download_only 1",
shell=True,
preexec_fn=os.setsid,
)
try:
p.wait(2 * 60 * 60)
except subprocess.TimeoutExpired:
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
# p.kill()
f = open(PATH_LOG, "a")
f.write(f"{idx} done with download only\n")
f.close()
os.system(f"python3 m4/sourcing/data_collection/callers/dl_images_create_dataset.py {idx} --U 1")
f = open(PATH_LOG, "a")
f.write(f"{idx} done with create image dataset only\n")
f.close()