in tseval/qats.py [0:0]
def extract_data_from_dataframe(df, aspect, tokenize=True):
assert aspect in ASPECTS, f'Aspect must be one of: {", ".join(ASPECTS)}'
aspect = {'grammaticality': 'G', 'meaning_preservation': 'M', 'simplicity': 'S', 'overall': 'Overall'}[aspect]
label_to_int = {'bad': 0, 'ok': 50, 'good': 100}
sentences = df[['Original', 'Simplified']].values.astype(str)
if tokenize:
sentences = np.vectorize(nist_tokenize)(sentences)
labels = np.array([label_to_int[label] for label in df[aspect].values])
return sentences, labels