in tokenizers/python/tokenizepythoncorpus.py [0:0]
def tokenize_file(filepath: str, all_tokens: bool=False)-> Iterator[str]:
tokens = []
try:
with open(filepath, 'rb') as f:
for toknum, tokval, _, _, _ in tokenize(f.readline):
if all_tokens or toknum in {NAME, STRING}:
if not keyword.iskeyword(tokval):
tokens.append(tokval)
except Exception as e:
print('Error tokenizing %s because %s' % (filepath, e))
return dict(filename=filepath, tokens=tokens)