def count_tokens()

in evals/elsuite/identifying_variables/scripts/make_plots.py [0:0]


def count_tokens(results_dir: Path, total) -> Tuple[Dict, pd.DataFrame]:
    eval_names = [
        "identifying_variables.corrset.default",
        "identifying_variables.language-tabular.default",
    ]
    solver_names = [
        "generation/hhh/gpt-4-base",
        "generation/direct/gpt-3.5-turbo",
        "generation/direct/gpt-4-1106-preview",
        "generation/cot_hhh/gpt-4-base",
        "generation/cot/gpt-3.5-turbo",
        "generation/cot/gpt-4-1106-preview",
    ]
    solver_to_eval = {
        solver: eval_names[0] if "cot" not in solver else eval_names[1]
        for solver in solver_names
    }
    solver_to_tree = {
        solver: False if "cot" not in solver else True for solver in solver_names
    }
    solver_to_tokens = {
        solver: {"input": [], "output": [], "total": []} for solver in solver_names
    }
    total_input = 0
    total_output = 0
    for log in tqdm(results_dir.glob("*.log"), total=total):
        spec = log_utils.extract_spec(log)
        solver = spec["completion_fns"][0]
        if solver not in solver_names:
            print(f"Skipping {solver}: token counting not supported.")
            continue
        eval_name = spec["eval_name"]
        seed = spec["run_config"]["seed"]
        tree = "show_tree=True" in spec["run_config"]["command"]
        samplings = log_utils.extract_individual_results(log, "sampling")
        samplings = handle_cot_double_sampling(samplings, solver)
        for sampling in samplings:
            usage = sampling["usage"]
            if (
                solver in solver_to_eval
                and eval_name == solver_to_eval[solver]
                and seed == 1
                and tree != solver_to_tree[solver]
            ):
                solver_to_tokens[solver]["input"].append(
                    np_nan_if_none(usage["prompt_tokens"])
                )
                solver_to_tokens[solver]["output"].append(
                    np_nan_if_none(usage["completion_tokens"])
                )
                solver_to_tokens[solver]["total"].append(
                    np_nan_if_none(usage["total_tokens"])
                )
            total_input += zero_if_none(usage["prompt_tokens"])
            total_output += zero_if_none(usage["completion_tokens"])

    total_tokens = {"input": total_input, "output": total_output}
    tokens_per_sample_df = make_token_per_sample_df(solver_to_eval, solver_to_tokens)

    return total_tokens, tokens_per_sample_df