Pairs |
# same commits |
# commits 1 |
# commits 2 |
latest commit |
run_multilingual_mmlu.py simple_evals.py |
5 |
7 (71%) |
22 (22%) |
2025-02-01 |
simple_evals.py sampler/responses_sampler.py |
4 |
22 (18%) |
7 (57%) |
2025-04-21 |
simpleqa_eval.py simple_evals.py |
4 |
10 (40%) |
22 (18%) |
2025-04-10 |
gpqa_eval.py simple_evals.py |
3 |
7 (42%) |
22 (13%) |
2025-01-31 |
browsecomp_eval.py simple_evals.py |
3 |
6 (50%) |
22 (13%) |
2025-04-10 |
drop_eval.py simple_evals.py |
2 |
3 (66%) |
22 (9%) |
2025-01-31 |
gpqa_eval.py run_multilingual_mmlu.py |
2 |
7 (28%) |
7 (28%) |
2025-01-31 |
gpqa_eval.py common.py |
2 |
7 (28%) |
9 (22%) |
2025-01-31 |
simpleqa_eval.py mgsm_eval.py |
2 |
10 (20%) |
6 (33%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py drop_eval.py |
2 |
2 (100%) |
3 (66%) |
2025-01-31 |
gpqa_eval.py simpleqa_eval.py |
2 |
7 (28%) |
10 (20%) |
2025-01-31 |
mmlu_eval.py simple_evals.py |
2 |
5 (40%) |
22 (9%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py run_multilingual_mmlu.py |
2 |
2 (100%) |
7 (28%) |
2025-01-31 |
math_eval.py simpleqa_eval.py |
2 |
6 (33%) |
10 (20%) |
2025-01-31 |
drop_eval.py run_multilingual_mmlu.py |
2 |
3 (66%) |
7 (28%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py simpleqa_eval.py |
2 |
2 (100%) |
10 (20%) |
2025-01-31 |
mmlu_eval.py drop_eval.py |
2 |
5 (40%) |
3 (66%) |
2025-01-31 |
math_eval.py mgsm_eval.py |
2 |
6 (33%) |
6 (33%) |
2025-01-31 |
mmlu_eval.py gpqa_eval.py |
2 |
5 (40%) |
7 (28%) |
2025-01-31 |
gpqa_eval.py mgsm_eval.py |
2 |
7 (28%) |
6 (33%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py mgsm_eval.py |
2 |
2 (100%) |
6 (33%) |
2025-01-31 |
common.py simpleqa_eval.py |
2 |
9 (22%) |
10 (20%) |
2025-01-31 |
math_eval.py gpqa_eval.py |
2 |
6 (33%) |
7 (28%) |
2025-01-31 |
mmlu_eval.py math_eval.py |
2 |
5 (40%) |
6 (33%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py math_eval.py |
2 |
2 (100%) |
6 (33%) |
2025-01-31 |
common.py mgsm_eval.py |
2 |
9 (22%) |
6 (33%) |
2025-01-31 |
mmlu_eval.py run_multilingual_mmlu.py |
2 |
5 (40%) |
7 (28%) |
2025-01-31 |
common.py simple_evals.py |
2 |
9 (22%) |
22 (9%) |
2025-01-31 |
simpleqa_eval.py browsecomp_eval.py |
2 |
10 (20%) |
6 (33%) |
2025-04-10 |
sampler/o_chat_completion_sampler.py simple_evals.py |
2 |
2 (100%) |
22 (9%) |
2025-01-31 |
drop_eval.py mgsm_eval.py |
2 |
3 (66%) |
6 (33%) |
2025-01-31 |
mgsm_eval.py simple_evals.py |
2 |
6 (33%) |
22 (9%) |
2025-01-31 |
mmlu_eval.py simpleqa_eval.py |
2 |
5 (40%) |
10 (20%) |
2025-01-31 |
gpqa_eval.py drop_eval.py |
2 |
7 (28%) |
3 (66%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py gpqa_eval.py |
2 |
2 (100%) |
7 (28%) |
2025-01-31 |
mmlu_eval.py mgsm_eval.py |
2 |
5 (40%) |
6 (33%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py mmlu_eval.py |
2 |
2 (100%) |
5 (40%) |
2025-01-31 |
common.py run_multilingual_mmlu.py |
2 |
9 (22%) |
7 (28%) |
2025-01-31 |
mmlu_eval.py common.py |
2 |
5 (40%) |
9 (22%) |
2025-01-31 |
run_multilingual_mmlu.py simpleqa_eval.py |
2 |
7 (28%) |
10 (20%) |
2025-01-31 |
drop_eval.py common.py |
2 |
3 (66%) |
9 (22%) |
2025-01-31 |
drop_eval.py simpleqa_eval.py |
2 |
3 (66%) |
10 (20%) |
2025-01-31 |
math_eval.py drop_eval.py |
2 |
6 (33%) |
3 (66%) |
2025-01-31 |
sampler/o_chat_completion_sampler.py common.py |
2 |
2 (100%) |
9 (22%) |
2025-01-31 |
math_eval.py simple_evals.py |
2 |
6 (33%) |
22 (9%) |
2025-01-31 |
math_eval.py run_multilingual_mmlu.py |
2 |
6 (33%) |
7 (28%) |
2025-01-31 |
run_multilingual_mmlu.py mgsm_eval.py |
2 |
7 (28%) |
6 (33%) |
2025-01-31 |
math_eval.py common.py |
2 |
6 (33%) |
9 (22%) |
2025-01-31 |