mlebench/competitions/3d-object-detection-for-autonomous-vehicles/grade.py (85 lines of code) (raw):
import multiprocessing
from functools import partial
from typing import Any, Dict, List
import numpy as np
import pandas as pd
from pyquaternion import Quaternion
from tqdm import tqdm
from mlebench.grade_helpers import InvalidSubmissionError
from .mAP_evaluation import get_average_precisions, get_class_names
def parse_row(sample_token: str, prediction_string: str) -> List[Dict[str, Any]]:
"""
Parse a row comprising of a sample_token and a prediction_string
`prediction_string` has the form:
`{confidence} center_x center_y center_z width length height yaw class_name`
where `confidence` is optional
Example row:
```
97ce3ab08ccbc0baae0267cbf8d4da947e1f11ae1dbcb80c3f4408784cd9170c,1.0 2742.152625996093 673.1631800662494 -18.6561112411676 1.834 4.609 1.648 2.619835541569646 car 0.5 2728.9634555684484 657.8296521874645 -18.54676216218047 1.799 4.348 1.728 -0.5425527100619654 bus
```
Returns list of dicts with keys "sample_token", "translation", "size", "rotation", "name", "score", e.g.
```
[{
'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207fbb039a550991a5149214f98cec136ac',
'translation': [971.8343488872263, 1713.6816097857359, -25.82534357061308],
'size': [2.519726579986132, 7.810161372666739, 3.483438286096803],
'rotation': [0.10913582721095375, 0.04099572636992043, 0.01927712319721745, 1.029328402625659],
'name': 'car',
'score': 0.3077029437237213
}]
```
"""
if prediction_string.strip() == "":
return []
prediction_string = prediction_string.split(" ")
single_prediction_length = 9
if len(prediction_string) % single_prediction_length != 0:
raise InvalidSubmissionError(
f"Prediction string must have {single_prediction_length} tokens per object, but got {len(prediction_string)} tokens"
)
n_predictions = len(prediction_string) / single_prediction_length
preds = []
for i in range(int(n_predictions)):
prediction_segment = prediction_string[
i * single_prediction_length : (i + 1) * single_prediction_length
]
confidence = float(prediction_segment[0])
prediction_segment = prediction_segment[1:]
# Parse the prediction string
translation = [float(x) for x in prediction_segment[0:3]]
size = [float(x) for x in prediction_segment[3:6]]
yaw = float(prediction_segment[6])
name = prediction_segment[7]
quat = Quaternion(axis=[0, 0, 1], radians=yaw)
# (from https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/discussion/110884#638390)
preds.append(
{
"sample_token": sample_token,
"translation": translation,
"size": size,
"rotation": list(quat),
"name": name,
"score": confidence,
}
)
return preds
def process_sample(gt_pred_pair, iou_thresholds):
gt, predictions = gt_pred_pair
class_names = get_class_names(gt)
sample_mAPs = []
for iou_threshold in iou_thresholds:
average_precisions = get_average_precisions(gt, predictions, class_names, iou_threshold)
mAP = np.mean(average_precisions)
sample_mAPs.append(mAP)
return sample_mAPs
def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
"""
submission and answers are dataframes with columns `Id` and `PredictionString`
We grade the submission using the 3D mAP metric described in the competition overview:
https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/overview/evaluation
The competition organizers provide a Python script (mAP_evaluation.py) to perform this metric calculation
(search text for "3d version of the COCO mAP") for a (gt, predictions) pair from a single sample.
https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/discussion/133895
For each sample, we first parse the PredictionString in the format expected by mAP_evaluation.py, then calculate
the mAP over the requested range of IoU thresholds, and get a mean mAP over all thresholds for that sample.
The final score is the mean over all samples (i.e. the mean of the mean mAPs).
"""
if len(submission) != len(answers):
raise InvalidSubmissionError("Submission and answers must have the same length")
if "Id" not in submission.columns:
raise InvalidSubmissionError("Submission must have an 'Id' column")
if "PredictionString" not in submission.columns:
raise InvalidSubmissionError("Submission must have a 'PredictionString' column")
assert "Id" in answers.columns, "Answers must have an 'Id' column"
assert "PredictionString" in answers.columns, "Answers must have a 'PredictionString' column"
submission = submission.sort_values("Id")
answers = answers.sort_values("Id")
if (submission["Id"].values != answers["Id"].values).any():
raise InvalidSubmissionError("Submission and answers must have the same ids")
# Empty values in the PredictionString column are allowed, but must be filled with an empty string
# (pandas converts them to NaN, so we convert them back)
submission["PredictionString"] = submission["PredictionString"].fillna("")
# Parse each row into the format expected by mAP_evaluation.py
submission_samples = [
parse_row(row["Id"], row["PredictionString"]) for _, row in submission.iterrows()
]
answer_samples = [
parse_row(row["Id"], row["PredictionString"]) for _, row in answers.iterrows()
]
iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
# (from https://www.kaggle.com/competitions/3d-object-detection-for-autonomous-vehicles/overview/evaluation)
# Prepare the data for parallel processing
sample_pairs = list(zip(answer_samples, submission_samples))
# Use multiprocessing to parallelize the computation
num_cpus = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=num_cpus) as pool:
results = list(
tqdm(
pool.imap(partial(process_sample, iou_thresholds=iou_thresholds), sample_pairs),
total=len(sample_pairs),
desc="Processing samples",
)
)
# Flatten the results
mAPs = [mAP for sample_mAPs in results for mAP in sample_mAPs]
# Average over all samples and IoU thresholds
final_mAP = np.mean(mAPs)
return final_mAP