in bindings/python/scripts/convert.py [0:0]
def main():
pretraineds = [
"albert-base-v1",
"albert-large-v1",
"albert-xlarge-v1",
"albert-xxlarge-v1",
"albert-base-v2",
"albert-large-v2",
"albert-xlarge-v2",
"albert-xxlarge-v2",
"camembert-base",
"xlm-roberta-base",
"xlm-roberta-large",
"xlm-roberta-large-finetuned-conll02-dutch",
"xlm-roberta-large-finetuned-conll02-spanish",
"xlm-roberta-large-finetuned-conll03-english",
"xlm-roberta-large-finetuned-conll03-german",
"facebook/mbart-large-en-ro",
"facebook/mbart-large-cc25",
"xlnet-base-cased",
"xlnet-large-cased",
"google/reformer-crime-and-punishment",
"t5-small",
"google/pegasus-large",
]
parser = argparse.ArgumentParser()
parser.add_argument(
"--filename",
required=True,
type=str,
help="The filename that we are going to encode in both versions to check that conversion worked",
)
parser.add_argument(
"--models",
type=lambda s: s.split(","),
default=pretraineds,
help=f"The pretrained tokenizers you want to test against, (default: {pretraineds})",
)
args = parser.parse_args()
print(args.filename)
model_len = 50
status_len = 6
speedup_len = 8
print(f"|{'Model':^{model_len}}|{'Status':^{status_len}}|{'Speedup':^{speedup_len}}|")
print(f"|{'-'*model_len}|{'-'*status_len}|{'-'*speedup_len}|")
for pretrained in args.models:
status, speedup = check(pretrained, args.filename)
print(f"|{pretrained:<{model_len}}|{status:^{status_len}}|{speedup:^{speedup_len - 1}.2f}x|")