def run_test()

in src/lighteval/tasks/extended/lcb/codegen_metrics.py [0:0]


def run_test(sample: dict[str, str], test=None, timeout: int = 6) -> list[int | bool]:
    """If test(generated_code) is not None it'll try to run the code.
    otherwise it'll just return an input and output pair.
    The return codes are all for errors. A True value means the function worked, and a False
    value means the function did not work.
    """
    signal.signal(signal.SIGALRM, timeout_handler)

    # Disable functionalities that can make destructive changes to the test.
    # max memory is set to 4GB
    reliability_guard()

    try:
        in_outs = json.loads(sample["input_output"])
    except ValueError:
        in_outs = None

    if in_outs:
        if in_outs.get("fn_name") is None:
            which_type = CODE_TYPE.standard_input  # Standard input
            method_name = None

        else:
            which_type = CODE_TYPE.call_based  # Call-based
            method_name = in_outs["fn_name"]

    if test is None:
        # assert False, "should not happen: test code is none"
        return in_outs

    if which_type == CODE_TYPE.call_based:
        signal.alarm(timeout)
        try:
            return grade_call_based(
                code=test,
                all_inputs=in_outs["inputs"],
                all_outputs=in_outs["outputs"],
                fn_name=method_name,
                timeout=timeout,
            )

        except Exception:
            return [-4]

        finally:
            signal.alarm(0)

    elif which_type == CODE_TYPE.standard_input:
        signal.alarm(timeout)
        try:
            return grade_stdio(
                code=test,
                all_inputs=in_outs["inputs"],
                all_outputs=in_outs["outputs"],
                timeout=timeout,
            )

        except Exception:
            return [-4]

        finally:
            signal.alarm(0)

    return [-4]