in mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py [0:0]
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
"""
submission and answers are dataframes with columns `Id` and `PredictionString`
We grade the submission using the 3D mAP metric described in the competition overview:
https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/overview/evaluation
The competition organizers provide a Python script (mAP_evaluation.py) to perform this metric calculation
(search text for "3d version of the COCO mAP") for a (gt, predictions) pair from a single sample.
https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/discussion/133895
For each sample, we first parse the PredictionString in the format expected by mAP_evaluation.py, then calculate
the mAP over the requested range of IoU thresholds, and get a mean mAP over all thresholds for that sample.
The final score is the mean over all samples (i.e. the mean of the mean mAPs).
"""
if len(submission) != len(answers):
raise InvalidSubmissionError("Submission and answers must have the same length")
if "Id" not in submission.columns:
raise InvalidSubmissionError("Submission must have an 'Id' column")
if "PredictionString" not in submission.columns:
raise InvalidSubmissionError("Submission must have a 'PredictionString' column")
assert "Id" in answers.columns, "Answers must have an 'Id' column"
assert "PredictionString" in answers.columns, "Answers must have a 'PredictionString' column"
submission = submission.sort_values("Id")
answers = answers.sort_values("Id")
if (submission["Id"].values != answers["Id"].values).any():
raise InvalidSubmissionError("Submission and answers must have the same ids")
# Empty values in the PredictionString column are allowed, but must be filled with an empty string
# (pandas converts them to NaN, so we convert them back)
submission["PredictionString"] = submission["PredictionString"].fillna("")
# Parse each row into the format expected by mAP_evaluation.py
submission_samples = [
parse_row(row["Id"], row["PredictionString"]) for _, row in submission.iterrows()
]
answer_samples = [
parse_row(row["Id"], row["PredictionString"]) for _, row in answers.iterrows()
]
iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
# (from https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/overview/evaluation)
# Prepare the data for parallel processing
sample_pairs = list(zip(answer_samples, submission_samples))
# Use multiprocessing to parallelize the computation
num_cpus = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=num_cpus) as pool:
results = list(
tqdm(
pool.imap(partial(process_sample, iou_thresholds=iou_thresholds), sample_pairs),
total=len(sample_pairs),
desc="Processing samples",
)
)
# Flatten the results
mAPs = [mAP for sample_mAPs in results for mAP in sample_mAPs]
# Average over all samples and IoU thresholds
final_mAP = np.mean(mAPs)
return final_mAP