def validator

def validator_cb()

in compiler_gym/envs/llvm/datasets/cbench.py [0:0]
92 lines of code
21 McCabe index (conditional complexity)

    def validator_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
        """The validation callback."""
        with _CBENCH_DOWNLOAD_THREAD_LOCK:
            with fasteners.InterProcessLock(cache_path(".cbench-v1-runtime-data.LOCK")):
                download_cBench_runtime_data()

        cbench_data = site_data_path("llvm-v0/cbench-v1-runtime-data/runtime_data")
        for input_file_name in input_files:
            path = cbench_data / input_file_name
            if not path.is_file():
                raise FileNotFoundError(f"Required benchmark input not found: {path}")

        # Create a temporary working directory to execute the benchmark in.
        with tempfile.TemporaryDirectory(dir=env.service.connection.working_dir) as d:
            cwd = Path(d)

            # Expand shell variable substitutions in the benchmark command.
            expanded_command = cmd.replace("$D", str(cbench_data))

            # Translate the output file names into paths inside the working
            # directory.
            output_paths = [cwd / o for o in output_files]

            if pre_execution_callback:
                pre_execution_callback(cwd)

            # Produce a gold-standard output using a reference version of
            # the benchmark.
            if compare_output or output_files:
                gs_env = env.fork()
                try:
                    # Reset to the original benchmark state and compile it.
                    gs_env.reset(benchmark=env.benchmark)
                    gs_env.write_bitcode(cwd / "benchmark.bc")
                    gold_standard = _compile_and_run_bitcode_file(
                        bitcode_file=cwd / "benchmark.bc",
                        cmd=expanded_command,
                        cwd=cwd,
                        num_runs=1,
                        # Use default optimizations for gold standard.
                        linkopts=linkopts + ["-O2"],
                        # Always assume safe.
                        sanitizer=None,
                        env=os_env,
                    )
                    if gold_standard.error:
                        return ValidationError(
                            type=f"Gold standard: {gold_standard.error.type}",
                            data=gold_standard.error.data,
                        )
                finally:
                    gs_env.close()

                # Check that the reference run produced the expected output
                # files.
                for path in output_paths:
                    if not path.is_file():
                        try:
                            output = gold_standard.output
                        except UnicodeDecodeError:
                            output = "<binary>"
                        raise FileNotFoundError(
                            f"Expected file '{path.name}' not generated\n"
                            f"Benchmark: {env.benchmark}\n"
                            f"Command: {cmd}\n"
                            f"Output: {output}"
                        )
                    path.rename(f"{path}.gold_standard")

            # Serialize the benchmark to a bitcode file that will then be
            # compiled to a binary.
            env.write_bitcode(cwd / "benchmark.bc")
            outcome = _compile_and_run_bitcode_file(
                bitcode_file=cwd / "benchmark.bc",
                cmd=expanded_command,
                cwd=cwd,
                num_runs=num_runs,
                linkopts=linkopts,
                sanitizer=sanitizer,
                env=os_env,
            )

            if outcome.error:
                return outcome.error

            # Run a user-specified validation hook.
            if validate_result:
                validate_result(outcome)

            # Difftest the console output.
            if compare_output and gold_standard.output != outcome.output:
                return ValidationError(
                    type="Wrong output",
                    data={"expected": gold_standard.output, "actual": outcome.output},
                )

            # Difftest the output files.
            for path in output_paths:
                if not path.is_file():
                    return ValidationError(
                        type="Output not generated",
                        data={"path": path.name, "command": cmd},
                    )
                with Popen(
                    ["diff", str(path), f"{path}.gold_standard"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                ) as diff:
                    stdout, _ = diff.communicate(timeout=300)
                    if diff.returncode:
                        try:
                            stdout = stdout.decode("utf-8")
                            return ValidationError(
                                type="Wrong output (file)",
                                data={"path": path.name, "diff": stdout},
                            )
                        except UnicodeDecodeError:
                            return ValidationError(
                                type="Wrong output (file)",
                                data={"path": path.name, "diff": "<binary>"},
                            )