Path Lines of Code LICENSE.md 104 README.md 60 SECURITY.md 3 evals/elsuite/already_said_that/README.md 141 evals/elsuite/ballots/readme.md 59 evals/elsuite/cant_do_that_anymore/README.md 43 evals/elsuite/error_recovery/README.md 62 evals/elsuite/function_deduction/scripts/dataset/raw_code.txt 124 evals/elsuite/hr_ml_agent_bench/README.md 182 evals/elsuite/hr_ml_agent_bench/benchmarks/ant/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/scripts/requirements.txt 2 evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/scripts/requirements.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/scripts/read_only_files.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/scripts/requirements.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/env/data_description.txt 7 evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/env/evaluation_details.txt 14 evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/scripts/read_only_files.txt 2 evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/scripts/source_code.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/env/data_description.txt 399 evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/scripts/read_only_files.txt 2 evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/scripts/requirements.txt 3 evals/elsuite/hr_ml_agent_bench/benchmarks/imdb/scripts/requirements.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/scripts/read_only_files.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/scripts/requirements.txt 16 evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/env/data_description.txt 26 evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/env/evaluation_details.txt 9 evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/read_only_files.txt 5 evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/source_code.txt 2 evals/elsuite/hr_ml_agent_bench/benchmarks/pong/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/env/environment.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/env/task_descriptor.txt 18 evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/read_only_files.txt 2 evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/requirements.txt 1 evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/source_code.txt 1 evals/elsuite/hr_ml_agent_bench/devcontainer.json 15 evals/elsuite/hr_ml_agent_bench/requirements.txt 6 evals/elsuite/identifying_variables/README.md 127 evals/elsuite/incontext_rl/README.md 48 evals/elsuite/incontext_rl/requirements.txt 3 evals/elsuite/make_me_say/readme.md 51 evals/elsuite/multistep_web_tasks/README.md 58 evals/elsuite/multistep_web_tasks/docker/homepage/requirements.txt 1 evals/elsuite/multistep_web_tasks/reproducibility/all_tasks.json 280 evals/elsuite/sandbagging/README.md 70 evals/elsuite/schelling_point/README.md 64 evals/elsuite/skill_acquisition/readme.md 44 evals/elsuite/steganography/scripts/dataset/README.md 8 evals/elsuite/steganography/scripts/dataset/requirements.txt 6 evals/elsuite/text_compression/readme.md 58 evals/elsuite/text_compression/scripts/dataset/README.md 8 evals/elsuite/text_compression/scripts/dataset/requirements.txt 6 evals/elsuite/theory_of_mind/readme.md 18 evals/elsuite/track_the_stat/README.md 103 evals/elsuite/twenty_questions/readme.md 57 evals/registry/data/README.md 37 evals/registry/data/german-part-of-speech/promptDe.txt 10 evals/registry/data/integer-sequence-predictions/misc-recent-sequences-info.txt 636 evals/registry/data/integer-sequence-predictions/notable-sequences-info.txt 246 evals/registry/data/integer-sequence-predictions/obscure-sequences-info.txt 107 evals/registry/data/solve-for-variable/tools/README.md 38 evals/registry/data/theory_of_mind/hitom/license.md 169 evals/registry/data/theory_of_mind/socialiqa/license.md 305 evals/registry/data/theory_of_mind/tomi/license.md 318 evals/registry/data/ukraine_gec/README.md 2 evals/solvers/README.md 52 evals/solvers/postprocessors/README.md 23 evals/solvers/providers/google/requirements.txt 1 examples/lafand-mt.ipynb 250 examples/lambada.ipynb 105 examples/mmlu.ipynb 182 examples/retrieval-completionfn.ipynb 113 mypy.ini 36