in src/utils/str.py [0:0]
def batched_f1_score(prediction_list, ground_truth_list, term='f1score', show_progress=True, prediction_counter_list=None):
def generate_1gram(text):
return Counter(normalize_statement(text).split())
if prediction_counter_list is None:
if show_progress:
prediction_list = tqdm(prediction_list, desc='processing prediction')
prediction_counter_list = [generate_1gram(prediction) for prediction in prediction_list]
if show_progress:
ground_truth_list = tqdm(ground_truth_list, desc='processing ground-truth')
ground_truth_counter_list = [generate_1gram(ground_truth) for ground_truth in ground_truth_list]
n, m = len(prediction_counter_list), len(ground_truth_counter_list)
res_dict = {k: np.zeros((n, m)) for k in ['precision', 'recall', 'f1score']}
pbar = tqdm(prediction_counter_list, desc='rouge cumputing') if show_progress else prediction_counter_list
for i, prediction_counter in enumerate(pbar):
for j, ground_truth_counter in enumerate(ground_truth_counter_list):
common = prediction_counter & ground_truth_counter
num_same = sum(common.values())
if num_same != 0:
precision = 1.0 * num_same / sum(prediction_counter.values())
recall = 1.0 * num_same / sum(ground_truth_counter.values())
f1 = (2 * precision * recall) / (precision + recall)
res_dict['precision'][i, j] = precision
res_dict['recall'][i, j] = recall
res_dict['f1score'][i, j] = f1
return res_dict[term], prediction_counter_list