def md5_verify()

in preprocess/_build_gym.py [0:0]


def md5_verify(args):
    failed = []
    flags = {k: False for k in MD5SUM.keys()}

    subdirectories = sorted([x[0] for x in os.walk(args.output_dir)])
    for subdirectory in subdirectories[1:]:

        print("Verifying {}".format(subdirectory))

        files = os.listdir(subdirectory)
        for filename in files:
            if not filename.endswith(".tsv"):
                continue

            if filename not in MD5SUM:
                print("Unexpected file ``{}``".format(filename))
                continue

            md5sum = get_md5(os.path.join(subdirectory, filename))
            if md5sum != MD5SUM[filename]:
                print("{} is not consistent ...".format(filename))
                failed.append(filename)
            else:
                flags[filename] = True

    print("\n===== Verification Finished =====")
    if len(failed) == 0 and all(flags.values()):
        print("[Success] All files are consistent.")
    elif len(failed) != 0:
        print("[Failed] Some files are not consistent. \nPlease try re-running individual scripts for these tasks:\n{}".format(failed))
    else:
        print("[Failed] Some files are missing. \nPlease try re-running individual scripts for these tasks:")
        missing_files = [k for k, v in flags.items() if not v]
        print(missing_files)