in floresv1/scripts/train.py [0:0]
def train(src, tgt, train_config, savedir, databin):
# expect to have 'hyperparameters', 'src', 'tgt', 'databin' in train_config
os.makedirs(savedir, exist_ok=True)
logpath = os.path.join(savedir, 'train.log')
checkpoint = os.path.join(savedir, 'checkpoint_best.pt')
if check_last_line(logpath, 'done') and os.path.exists(checkpoint):
print(f"Training is finished. Best checkpoint: {checkpoint}")
return
cuda_visible_devices = list(range(torch.cuda.device_count()))
num_visible_gpu = len(cuda_visible_devices)
num_gpu = min(train_config['gpu'], 2**int(math.log2(num_visible_gpu)))
cuda_devices_clause = f"CUDA_VISIBLE_DEVICES={','.join([str(i) for i in cuda_visible_devices[:num_gpu]])}"
update_freq = train_config['gpu'] / num_gpu
call(f"""{cuda_devices_clause} fairseq-train {databin} \