in scripts/convert_all_datasets.py [0:0]
def convert_dataset(filepath, extend_with, vocab):
print('-- Generating {} '.format(filepath))
sys.stdout.flush()
st = SentenceTokenizer(vocab, maxlen)
tokenized, dicts, _ = st.split_train_val_test(texts,
labels,
[data['train_ind'],
data['val_ind'],
data['test_ind']],
extend_with=extend_with)
pick = format_pickle(dset, tokenized[0], tokenized[1], tokenized[2],
dicts[0], dicts[1], dicts[2])
with open(filepath, 'w') as f:
pickle.dump(pick, f)
cover = coverage(tokenized[2])
print(' done. Coverage: {}'.format(cover))