def get_ranking_metrics()

in kilt/eval_retrieval.py [0:0]


def get_ranking_metrics(guess_item, gold_item, ks, rank_keys):

    Rprec = 0.0
    P_at_k = {"precision@{}".format(k): 0 for k in sorted(ks) if k > 0}
    R_at_k = {"recall@{}".format(k): 0 for k in sorted(ks) if k > 1}
    S_at_k = {"success_rate@{}".format(k): 0 for k in sorted(ks) if k > 1}
    A_at_k = {"answer_in_context@{}".format(k): 0 for k in sorted(ks) if k > 0}
    AE_at_k = {"answer_and_ent_in_context@{}".format(k): 0 for k in sorted(ks) if k > 0}

    assert (
        "output" in guess_item and len(guess_item["output"]) == 1
    ), f"guess should provide exactly one output for {guess_item['id']}"

    Rprec = rprecision(guess_item, gold_item, rank_keys=rank_keys)
    eii = entity_in_input(gold_item)
    for k in ks:

        # 0. get rank
        rank, num_distinct_evidence_sets = get_rank(
            guess_item, gold_item, k, rank_keys=rank_keys
        )

        if num_distinct_evidence_sets > 0:

            # 1. precision
            P_at_k["precision@{}".format(k)] = _precision_at_k(rank, k)

            # 2. recall
            R_at_k["recall@{}".format(k)] = _recall_at_k(
                rank, num_distinct_evidence_sets, k
            )

            # 3. success rate
            S_at_k["success_rate@{}".format(k)] = _success_rate_at_k(rank, k)

        # 4. answer in context
        A_at_k["answer_in_context@{}".format(k)] = _answer_in_context_at_k(
            guess_item, gold_item, k
        )

        AE_at_k[
            "answer_and_ent_in_context@{}".format(k)
        ] = _answer_and_ent_in_context_at_k(guess_item, gold_item, k)

    return {
        "Rprec": Rprec,
        **P_at_k,
        **R_at_k,
        **S_at_k,
        **A_at_k,
        **AE_at_k,
        "entity_in_input": eii,
    }