in lmms_eval/tasks/mmvet/utils.py [0:0]
def mmvet_aggregate_results(results):
"""
Args:
results: a list of values returned by process_results
Returns:
A score
"""
# Calculate the overall score
overall_score = sum([result["score"] for result in results]) / len(results) * 100
eval_logger.info(f"Overall Score: {overall_score:.2f}")
# Initialize dictionaries to store scores for each capability and detail
cap_scores = {cap: 0 for cap in cap_columns.squeeze().tolist()}
cap_details_scores = {detail: 0 for detail in cap_details_columns.squeeze().tolist()}
# Count the number of results for each capability and detail
cap_counts = {cap: 0 for cap in cap_scores}
cap_details_counts = {detail: 0 for detail in cap_details_scores}
# Aggregate scores for each capability and detail
for result in results:
for cap in cap_scores:
if cap in result["capabilities"]:
cap_scores[cap] += result["score"]
cap_counts[cap] += 1
for detail in cap_details_scores:
detail_set = set(detail.split("_"))
result_detail_set = set(result["capabilities"].split(","))
if detail_set == result_detail_set:
cap_details_scores[detail] += result["score"]
cap_details_counts[detail] += 1
# Calculate the average score for each capability
for cap in cap_scores:
if cap_counts[cap] > 0:
cap_scores[cap] = cap_scores[cap] / cap_counts[cap] * 100
eval_logger.info(f"Score for {cap}: {cap_scores[cap]:.2f}")
# Calculate the average score for each detailed capability
for detail in cap_details_scores:
if cap_details_counts[detail] > 0:
cap_details_scores[detail] = cap_details_scores[detail] / cap_details_counts[detail] * 100
eval_logger.info(f"Score for {detail}: {cap_details_scores[detail]:.2f}")
return overall_score