def convert_dataset()

in scripts/convert_all_datasets.py [0:0]


def convert_dataset(filepath, extend_with, vocab):
    print('-- Generating {} '.format(filepath))
    sys.stdout.flush()
    st = SentenceTokenizer(vocab, maxlen)
    tokenized, dicts, _ = st.split_train_val_test(texts,
                                                  labels,
                                                  [data['train_ind'],
                                                   data['val_ind'],
                                                   data['test_ind']],
                                                  extend_with=extend_with)
    pick = format_pickle(dset, tokenized[0], tokenized[1], tokenized[2],
                        dicts[0], dicts[1], dicts[2])
    with open(filepath, 'w') as f:
        pickle.dump(pick, f)
    cover = coverage(tokenized[2])

    print('     done. Coverage: {}'.format(cover))