mlebench/competitions/smartphone-decimeter-2022/notebook.py (60 lines of code) (raw):
"""
Helper functions from the Kaggle notebook `GSDC2 - baseline submission`.
Adapted from https://www.kaggle.com/code/saitodevel01/gsdc2-baseline-submission.
"""
from dataclasses import dataclass
import numpy as np
WGS84_SEMI_MAJOR_AXIS = 6378137.0
WGS84_SEMI_MINOR_AXIS = 6356752.314245
WGS84_SQUARED_FIRST_ECCENTRICITY = 6.69437999013e-3
WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3
HAVERSINE_RADIUS = 6_371_000
@dataclass
class ECEF:
x: np.array
y: np.array
z: np.array
def to_numpy(self):
return np.stack([self.x, self.y, self.z], axis=0)
@staticmethod
def from_numpy(pos):
x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)]
return ECEF(x=x, y=y, z=z)
@dataclass
class BLH:
lat: np.array
lng: np.array
hgt: np.array
def ECEF_to_BLH(ecef):
a = WGS84_SEMI_MAJOR_AXIS
b = WGS84_SEMI_MINOR_AXIS
e2 = WGS84_SQUARED_FIRST_ECCENTRICITY
e2_ = WGS84_SQUARED_SECOND_ECCENTRICITY
x = ecef.x
y = ecef.y
z = ecef.z
r = np.sqrt(x**2 + y**2)
t = np.arctan2(z * (a / b), r)
B = np.arctan2(z + (e2_ * b) * np.sin(t) ** 3, r - (e2 * a) * np.cos(t) ** 3)
L = np.arctan2(y, x)
n = a / np.sqrt(1 - e2 * np.sin(B) ** 2)
H = (r / np.cos(B)) - n
return BLH(lat=B, lng=L, hgt=H)
def haversine_distance(blh_1, blh_2):
dlat = blh_2.lat - blh_1.lat
dlng = blh_2.lng - blh_1.lng
a = np.sin(dlat / 2) ** 2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng / 2) ** 2
dist = 2 * HAVERSINE_RADIUS * np.arcsin(np.sqrt(a))
return dist
def pandas_haversine_distance(df1, df2):
blh1 = BLH(
lat=np.deg2rad(df1["LatitudeDegrees"].to_numpy()),
lng=np.deg2rad(df1["LongitudeDegrees"].to_numpy()),
hgt=0,
)
blh2 = BLH(
lat=np.deg2rad(df2["LatitudeDegrees"].to_numpy()),
lng=np.deg2rad(df2["LongitudeDegrees"].to_numpy()),
hgt=0,
)
return haversine_distance(blh1, blh2)
def calc_score(pred_df, gt_df):
d = pandas_haversine_distance(pred_df, gt_df)
score = np.mean([np.quantile(d, 0.50), np.quantile(d, 0.95)])
return score