def main()

in bindings/python/scripts/convert.py [0:0]


def main():
    pretraineds = [
        "albert-base-v1",
        "albert-large-v1",
        "albert-xlarge-v1",
        "albert-xxlarge-v1",
        "albert-base-v2",
        "albert-large-v2",
        "albert-xlarge-v2",
        "albert-xxlarge-v2",
        "camembert-base",
        "xlm-roberta-base",
        "xlm-roberta-large",
        "xlm-roberta-large-finetuned-conll02-dutch",
        "xlm-roberta-large-finetuned-conll02-spanish",
        "xlm-roberta-large-finetuned-conll03-english",
        "xlm-roberta-large-finetuned-conll03-german",
        "facebook/mbart-large-en-ro",
        "facebook/mbart-large-cc25",
        "xlnet-base-cased",
        "xlnet-large-cased",
        "google/reformer-crime-and-punishment",
        "t5-small",
        "google/pegasus-large",
    ]
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--filename",
        required=True,
        type=str,
        help="The filename that we are going to encode in both versions to check that conversion worked",
    )
    parser.add_argument(
        "--models",
        type=lambda s: s.split(","),
        default=pretraineds,
        help=f"The pretrained tokenizers you want to test against, (default: {pretraineds})",
    )
    args = parser.parse_args()

    print(args.filename)

    model_len = 50
    status_len = 6
    speedup_len = 8
    print(f"|{'Model':^{model_len}}|{'Status':^{status_len}}|{'Speedup':^{speedup_len}}|")
    print(f"|{'-'*model_len}|{'-'*status_len}|{'-'*speedup_len}|")
    for pretrained in args.models:
        status, speedup = check(pretrained, args.filename)
        print(f"|{pretrained:<{model_len}}|{status:^{status_len}}|{speedup:^{speedup_len - 1}.2f}x|")