in evals/elsuite/identifying_variables/scripts/make_plots.py [0:0]
def count_tokens(results_dir: Path, total) -> Tuple[Dict, pd.DataFrame]:
eval_names = [
"identifying_variables.corrset.default",
"identifying_variables.language-tabular.default",
]
solver_names = [
"generation/hhh/gpt-4-base",
"generation/direct/gpt-3.5-turbo",
"generation/direct/gpt-4-1106-preview",
"generation/cot_hhh/gpt-4-base",
"generation/cot/gpt-3.5-turbo",
"generation/cot/gpt-4-1106-preview",
]
solver_to_eval = {
solver: eval_names[0] if "cot" not in solver else eval_names[1]
for solver in solver_names
}
solver_to_tree = {
solver: False if "cot" not in solver else True for solver in solver_names
}
solver_to_tokens = {
solver: {"input": [], "output": [], "total": []} for solver in solver_names
}
total_input = 0
total_output = 0
for log in tqdm(results_dir.glob("*.log"), total=total):
spec = log_utils.extract_spec(log)
solver = spec["completion_fns"][0]
if solver not in solver_names:
print(f"Skipping {solver}: token counting not supported.")
continue
eval_name = spec["eval_name"]
seed = spec["run_config"]["seed"]
tree = "show_tree=True" in spec["run_config"]["command"]
samplings = log_utils.extract_individual_results(log, "sampling")
samplings = handle_cot_double_sampling(samplings, solver)
for sampling in samplings:
usage = sampling["usage"]
if (
solver in solver_to_eval
and eval_name == solver_to_eval[solver]
and seed == 1
and tree != solver_to_tree[solver]
):
solver_to_tokens[solver]["input"].append(
np_nan_if_none(usage["prompt_tokens"])
)
solver_to_tokens[solver]["output"].append(
np_nan_if_none(usage["completion_tokens"])
)
solver_to_tokens[solver]["total"].append(
np_nan_if_none(usage["total_tokens"])
)
total_input += zero_if_none(usage["prompt_tokens"])
total_output += zero_if_none(usage["completion_tokens"])
total_tokens = {"input": total_input, "output": total_output}
tokens_per_sample_df = make_token_per_sample_df(solver_to_eval, solver_to_tokens)
return total_tokens, tokens_per_sample_df