in benchmark/benchmark_pytext_vocab.py [0:0]
def benchmark_experimental_vocab():
train, = AG_NEWS(data_select='train')
vocab = train.get_vocab()
tokens: List[str] = []
tokens_lists: List[List[str]] = []
for (_, text) in train:
cur_tokens = []
for id in text.tolist():
cur_tokens.append(vocab.itos[id])
tokens_lists.append(cur_tokens)
tokens += cur_tokens
print("Tokens size:", len(tokens))
print("Tokens list size:", len(tokens_lists))
counter = Counter(tokens)
sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
vocab_list = [pair[0] for pair in sorted_by_freq_tuples]
vocab_list.insert(0, "<unk>")
ordered_dict = OrderedDict(sorted_by_freq_tuples)
# pytext vocab construction
print("Pytext Vocabulary")
t0 = time.monotonic()
pytext_vocab = PytextVocabulary(vocab_list)
print("Construction time:", time.monotonic() - t0)
# pytext ScriptVocab construction
print("Pytext Script Vocabulary")
t0 = time.monotonic()
pytext_script_vocab = PytextScriptVocabulary(vocab_list)
print("Construction time:", time.monotonic() - t0)
jit_pytext_script_vocab = torch.jit.script(pytext_script_vocab)
# experimental ScriptVocab construction
print("Experimental Script Vocabulary")
t0 = time.monotonic()
experimental_script_vocab = ExperimentalScriptVocabulary(ordered_dict, unk_token="<unk>")
print("Construction time:", time.monotonic() - t0)
jit_experimental_script_vocab = torch.jit.script(experimental_script_vocab)
# pytext Vocab eager lookup
print("Pytext Vocabulary - Eager Mode")
_run_benchmark_lookup(tokens, pytext_vocab)
_run_benchmark_lookup([tokens], pytext_vocab)
_run_benchmark_lookup(tokens_lists, pytext_vocab)
# pytext ScriptVocab eager lookup
print("Pytext ScriptVocab - Eager Mode")
_run_benchmark_lookup(tokens, pytext_script_vocab)
_run_benchmark_lookup([tokens], pytext_script_vocab)
_run_benchmark_lookup(tokens_lists, pytext_script_vocab)
# experimental ScriptVocab eager lookup
print("Experimental ScriptVocab - Eager Mode")
_run_benchmark_lookup(tokens, experimental_script_vocab)
_run_benchmark_lookup([tokens], experimental_script_vocab)
_run_benchmark_lookup(tokens_lists, experimental_script_vocab)
# pytext ScriptVocab jit lookup
print("Pytext ScriptVocab - Jit Mode")
_run_benchmark_lookup(tokens, jit_pytext_script_vocab)
_run_benchmark_lookup([tokens], jit_pytext_script_vocab)
_run_benchmark_lookup(tokens_lists, jit_pytext_script_vocab)
# experimental ScriptVocab jit lookup
print("Experimental ScriptVocab - Jit Mode")
_run_benchmark_lookup(tokens, jit_experimental_script_vocab)
_run_benchmark_lookup([tokens], jit_experimental_script_vocab)
_run_benchmark_lookup(tokens_lists, jit_experimental_script_vocab)
# pytext ScriptVocab JITed for loop
print("Pytext ScriptVocab - Jit For Loop")
_run_benchmark_lookup_jit_for_loop(tokens, jit_pytext_script_vocab)
_run_benchmark_lookup_jit_for_loop([tokens], jit_pytext_script_vocab)
_run_benchmark_lookup_jit_for_loop(tokens_lists, jit_pytext_script_vocab)
# experimental ScriptVocab JITed for loop
print("Experimental ScriptVocab - Jit For Loop")
_run_benchmark_lookup_jit_for_loop(tokens, jit_experimental_script_vocab)
_run_benchmark_lookup_jit_for_loop([tokens], jit_experimental_script_vocab)
_run_benchmark_lookup_jit_for_loop(tokens_lists, jit_experimental_script_vocab)