in tokenizers/python/baronetokenizer.py [0:0]
def tokenize_all(input_file):
with open(input_file) as f:
for i, line in tqdm(enumerate(f)):
tokens = tokenize_line(line)
if len(tokens) == 0: continue
yield dict(filename='dummyfile%s' % i, tokens=tokens)