in src/sal/utils/score.py [0:0]
def score(dataset: Dataset, config: Config) -> Dataset:
dataset = dataset.map(
lambda x: {"agg_scores": [aggregate_scores(s, "last") for s in x["scores"]]}
)
subsets = [2**i for i in range(config.n) if 2**i <= config.n]
for n in tqdm(subsets, desc="Computing majority & weighted predictions"):
dataset = dataset.map(
subsample_completions,
fn_kwargs={"n": n},
num_proc=config.num_proc,
desc=f"Subsample {n}",
)
dataset = dataset.map(
extract_completion_answers,
fn_kwargs={"n": n},
num_proc=config.num_proc,
desc=f"Extract answers {n}",
)
dataset = dataset.map(
compute_weighted_pred,
fn_kwargs={"n": n},
num_proc=config.num_proc,
desc=f"Compute weighted pred {n}",
)
dataset = dataset.map(
compute_maj_pred,
fn_kwargs={"n": n},
num_proc=config.num_proc,
desc=f"Compute majority pred {n}",
)
dataset = dataset.map(
compute_naive_pred,
fn_kwargs={"n": n},
num_proc=config.num_proc,
desc=f"Compute naive pred {n}",
)
# Nuke unused columns to keep dataset lean
dataset = dataset.remove_columns(
[f"completions@{n}", f"agg_scores@{n}", f"preds@{n}"]
)
return dataset