in preprocess/_build_gym.py [0:0]
def md5_verify(args):
failed = []
flags = {k: False for k in MD5SUM.keys()}
subdirectories = sorted([x[0] for x in os.walk(args.output_dir)])
for subdirectory in subdirectories[1:]:
print("Verifying {}".format(subdirectory))
files = os.listdir(subdirectory)
for filename in files:
if not filename.endswith(".tsv"):
continue
if filename not in MD5SUM:
print("Unexpected file ``{}``".format(filename))
continue
md5sum = get_md5(os.path.join(subdirectory, filename))
if md5sum != MD5SUM[filename]:
print("{} is not consistent ...".format(filename))
failed.append(filename)
else:
flags[filename] = True
print("\n===== Verification Finished =====")
if len(failed) == 0 and all(flags.values()):
print("[Success] All files are consistent.")
elif len(failed) != 0:
print("[Failed] Some files are not consistent. \nPlease try re-running individual scripts for these tasks:\n{}".format(failed))
else:
print("[Failed] Some files are missing. \nPlease try re-running individual scripts for these tasks:")
missing_files = [k for k, v in flags.items() if not v]
print(missing_files)