in lama/evaluation_metrics.py [0:0]
def get_ranking(log_probs, masked_indices, vocab, label_index = None, index_list = None, topk = 1000, P_AT = 10, print_generation=True):
experiment_result = {}
log_probs, index_max_probs, value_max_probs = __max_probs_values_indices(masked_indices, log_probs, topk=topk)
result_masked_topk, return_msg = __print_top_k(value_max_probs, index_max_probs, vocab, topk, index_list)
experiment_result['topk'] = result_masked_topk
if print_generation:
print(return_msg)
MRR = 0.
P_AT_X = 0.
P_AT_1 = 0.
PERPLEXITY = None
if label_index is not None:
# check if the labe_index should be converted to the vocab subset
if index_list is not None:
label_index = index_list.index(label_index)
query = torch.full(value_max_probs.shape, label_index, dtype=torch.long).numpy().astype(int)
ranking_position = (index_max_probs==query).nonzero()
# LABEL PERPLEXITY
tokens = torch.from_numpy(np.asarray(label_index))
label_perplexity = log_probs.gather(
dim=0,
index=tokens,
)
PERPLEXITY = label_perplexity.item()
if len(ranking_position) >0 and ranking_position[0].shape[0] != 0:
rank = ranking_position[0][0] + 1
# print("rank: {}".format(rank))
if rank >= 0:
MRR = (1/rank)
if rank >= 0 and rank <= P_AT:
P_AT_X = 1.
if rank == 1:
P_AT_1 = 1.
experiment_result["MRR"] = MRR
experiment_result["P_AT_X"] = P_AT_X
experiment_result["P_AT_1"] = P_AT_1
experiment_result["PERPLEXITY"] = PERPLEXITY
#
# print("MRR: {}".format(experiment_result["MRR"]))
# print("P_AT_X: {}".format(experiment_result["P_AT_X"]))
# print("P_AT_1: {}".format(experiment_result["P_AT_1"]))
# print("PERPLEXITY: {}".format(experiment_result["PERPLEXITY"]))
return MRR, P_AT_X, experiment_result, return_msg