def batched_f1_score()

in src/utils/str.py [0:0]


def batched_f1_score(prediction_list, ground_truth_list, term='f1score', show_progress=True, prediction_counter_list=None):
    def generate_1gram(text):
        return Counter(normalize_statement(text).split())

    if prediction_counter_list is None:
        if show_progress:
            prediction_list = tqdm(prediction_list, desc='processing prediction')
        prediction_counter_list = [generate_1gram(prediction) for prediction in prediction_list]

    if show_progress:
        ground_truth_list = tqdm(ground_truth_list, desc='processing ground-truth')
    ground_truth_counter_list = [generate_1gram(ground_truth) for ground_truth in ground_truth_list]

    n, m = len(prediction_counter_list), len(ground_truth_counter_list)
    res_dict = {k: np.zeros((n, m)) for k in ['precision', 'recall', 'f1score']}
    pbar = tqdm(prediction_counter_list, desc='rouge cumputing') if show_progress else prediction_counter_list 
    for i, prediction_counter in enumerate(pbar):
        for j, ground_truth_counter in enumerate(ground_truth_counter_list):
            common = prediction_counter & ground_truth_counter
            num_same = sum(common.values())

            if num_same != 0:
                precision = 1.0 * num_same / sum(prediction_counter.values())
                recall = 1.0 * num_same / sum(ground_truth_counter.values())
                f1 = (2 * precision * recall) / (precision + recall)

                res_dict['precision'][i, j] = precision
                res_dict['recall'][i, j] = recall
                res_dict['f1score'][i, j] = f1
    
    return res_dict[term], prediction_counter_list