mlebench/competitions/smartphone-decimeter-2022/notebook.py (60 lines of code) (raw):

""" Helper functions from the Kaggle notebook `GSDC2 - baseline submission`. Adapted from https://www.kaggle.com/code/saitodevel01/gsdc2-baseline-submission. """ from dataclasses import dataclass import numpy as np WGS84_SEMI_MAJOR_AXIS = 6378137.0 WGS84_SEMI_MINOR_AXIS = 6356752.314245 WGS84_SQUARED_FIRST_ECCENTRICITY = 6.69437999013e-3 WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3 HAVERSINE_RADIUS = 6_371_000 @dataclass class ECEF: x: np.array y: np.array z: np.array def to_numpy(self): return np.stack([self.x, self.y, self.z], axis=0) @staticmethod def from_numpy(pos): x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)] return ECEF(x=x, y=y, z=z) @dataclass class BLH: lat: np.array lng: np.array hgt: np.array def ECEF_to_BLH(ecef): a = WGS84_SEMI_MAJOR_AXIS b = WGS84_SEMI_MINOR_AXIS e2 = WGS84_SQUARED_FIRST_ECCENTRICITY e2_ = WGS84_SQUARED_SECOND_ECCENTRICITY x = ecef.x y = ecef.y z = ecef.z r = np.sqrt(x**2 + y**2) t = np.arctan2(z * (a / b), r) B = np.arctan2(z + (e2_ * b) * np.sin(t) ** 3, r - (e2 * a) * np.cos(t) ** 3) L = np.arctan2(y, x) n = a / np.sqrt(1 - e2 * np.sin(B) ** 2) H = (r / np.cos(B)) - n return BLH(lat=B, lng=L, hgt=H) def haversine_distance(blh_1, blh_2): dlat = blh_2.lat - blh_1.lat dlng = blh_2.lng - blh_1.lng a = np.sin(dlat / 2) ** 2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng / 2) ** 2 dist = 2 * HAVERSINE_RADIUS * np.arcsin(np.sqrt(a)) return dist def pandas_haversine_distance(df1, df2): blh1 = BLH( lat=np.deg2rad(df1["LatitudeDegrees"].to_numpy()), lng=np.deg2rad(df1["LongitudeDegrees"].to_numpy()), hgt=0, ) blh2 = BLH( lat=np.deg2rad(df2["LatitudeDegrees"].to_numpy()), lng=np.deg2rad(df2["LongitudeDegrees"].to_numpy()), hgt=0, ) return haversine_distance(blh1, blh2) def calc_score(pred_df, gt_df): d = pandas_haversine_distance(pred_df, gt_df) score = np.mean([np.quantile(d, 0.50), np.quantile(d, 0.95)]) return score