in competitions/leaderboard.py [0:0]
def fetch(self, private=False):
if private:
submissions = self._process_private_lb()
else:
submissions = self._process_public_lb()
if len(submissions) == 0:
return pd.DataFrame()
df = pd.DataFrame(submissions)
# convert submission datetime to pandas datetime
df["submission_datetime"] = pd.to_datetime(df["submission_datetime"], format="%Y-%m-%d %H:%M:%S")
# only keep submissions before the end date
df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
# sort by submission datetime
# sort by public score and submission datetime
if self.eval_higher_is_better:
if private:
df = df.sort_values(
by=[self.scoring_metric, "submission_datetime"],
ascending=[False, True],
)
else:
df = df.sort_values(
by=[self.scoring_metric, "submission_datetime"],
ascending=[False, True],
)
else:
if private:
df = df.sort_values(
by=[self.scoring_metric, "submission_datetime"],
ascending=[True, True],
)
else:
df = df.sort_values(
by=[self.scoring_metric, "submission_datetime"],
ascending=[True, True],
)
# only keep 4 significant digits in the scores
for col in df.columns:
if col in self.non_score_columns:
continue
df[col] = df[col].round(4)
# reset index
df = df.reset_index(drop=True)
df["rank"] = df.index + 1
# convert datetime column to string
df["submission_datetime"] = df["submission_datetime"].dt.strftime("%Y-%m-%d %H:%M:%S")
# send submission_datetime to the end
columns = df.columns.tolist()
columns.remove("submission_datetime")
columns.append("submission_datetime")
df = df[columns]
# send rank to first position
columns = df.columns.tolist()
columns.remove("rank")
columns = ["rank"] + columns
df = df[columns]
team_metadata = hf_hub_download(
repo_id=self.competition_id,
filename="teams.json",
token=self.token,
repo_type="dataset",
)
with open(team_metadata, "r", encoding="utf-8") as f:
team_metadata = json.load(f)
df["id"] = df["id"].apply(lambda x: team_metadata[x]["name"])
return df