in src/lighteval/tasks/extended/lcb/codegen_metrics.py [0:0]
def run_test(sample: dict[str, str], test=None, timeout: int = 6) -> list[int | bool]:
"""If test(generated_code) is not None it'll try to run the code.
otherwise it'll just return an input and output pair.
The return codes are all for errors. A True value means the function worked, and a False
value means the function did not work.
"""
signal.signal(signal.SIGALRM, timeout_handler)
# Disable functionalities that can make destructive changes to the test.
# max memory is set to 4GB
reliability_guard()
try:
in_outs = json.loads(sample["input_output"])
except ValueError:
in_outs = None
if in_outs:
if in_outs.get("fn_name") is None:
which_type = CODE_TYPE.standard_input # Standard input
method_name = None
else:
which_type = CODE_TYPE.call_based # Call-based
method_name = in_outs["fn_name"]
if test is None:
# assert False, "should not happen: test code is none"
return in_outs
if which_type == CODE_TYPE.call_based:
signal.alarm(timeout)
try:
return grade_call_based(
code=test,
all_inputs=in_outs["inputs"],
all_outputs=in_outs["outputs"],
fn_name=method_name,
timeout=timeout,
)
except Exception:
return [-4]
finally:
signal.alarm(0)
elif which_type == CODE_TYPE.standard_input:
signal.alarm(timeout)
try:
return grade_stdio(
code=test,
all_inputs=in_outs["inputs"],
all_outputs=in_outs["outputs"],
timeout=timeout,
)
except Exception:
return [-4]
finally:
signal.alarm(0)
return [-4]