def eval_llvm_instcount_policy()

in compiler_gym/leaderboard/llvm_instcount.py [0:0]


def eval_llvm_instcount_policy(policy: Policy) -> None:
    """Evaluate an LLVM codesize policy and generate results for a leaderboard
    submission.

    To use it, you define your policy as a function that takes an
    :class:`LlvmEnv <compiler_gym.envs.LlvmEnv>` instance as input and modifies
    it in place. For example, for a trivial random policy:

        >>> from compiler_gym.envs import LlvmEnv
        >>> def my_policy(env: LlvmEnv) -> None:
        ....   # Defines a policy that takes 10 random steps.
        ...    for _ in range(10):
        ...        _, _, done, _ = env.step(env.action_space.sample())
        ...        if done: break

    If your policy is stateful, you can use a class and override the
    :code:`__call__()` method:

        >>> class MyPolicy:
        ...     def __init__(self):
        ...         self.my_stateful_vars = {}  # or similar
        ...     def __call__(self, env: LlvmEnv) -> None:
        ...         pass # ... do fun stuff!
        >>> my_policy = MyPolicy()

    The role of your policy is to perform a sequence of actions on the supplied
    environment so as to maximize cumulative reward. By default, no observation
    space is set on the environment, so :meth:`env.step()
    <compiler_gym.envs.CompilerEnv.step>` will return :code:`None` for the
    observation. You may set a new observation space:

        >>> env.observation_space = "InstCount"  # Set a new space for env.step()
        >>> env.observation["InstCount"]  # Calculate a one-off observation.

    However, the policy may not change the reward space of the environment, or
    the benchmark.

    Once you have defined your policy, call the
    :func:`eval_llvm_instcount_policy()
    <compiler_gym.leaderboard.llvm_instcount.eval_llvm_instcount_policy>` helper
    function, passing it your policy as its only argument:

    >>> eval_llvm_instcount_policy(my_policy)

    The :func:`eval_llvm_instcount_policy()
    <compiler_gym.leaderboard.llvm_instcount.eval_llvm_instcount_policy>`
    function calls the policy function for each benchmark in the dataset, one at
    a time, from a single thread. Stateful policies can assume thread safe
    access to member variables.

    Put together as a complete example, a leaderboard submission script may look
    like:

    .. code-block:: python

        # my_policy.py
        from compiler_gym.leaderboard.llvm_instcount import eval_llvm_instcount_policy
        from compiler_gym.envs import LlvmEnv

        def my_policy(env: LlvmEnv) -> None:
            env.observation_space = "InstCount"  # we're going to use instcount space
            pass # ... do fun stuff!

        if __name__ == "__main__":
            eval_llvm_instcount_policy(my_policy)

    The :func:`eval_llvm_instcount_policy()
    <compiler_gym.leaderboard.llvm_instcount.eval_llvm_instcount_policy>` helper
    defines a number of commandline flags that can be overriden to control the
    behavior of the evaluation. For example the flag :code:`--n` determines the
    number of times the policy is run on each benchmark (default is 10), and
    :code:`--leaderboard_results` determines the path of the generated results
    file:

    .. code-block::

        $ python my_policy.py --n=5 --leaderboard_results=my_policy_results.csv

    You can use :code:`--helpfull` flag to list all of the flags that are
    defined:

    .. code-block::

        $ python my_policy.py --helpfull

    Once you are happy with your approach, see the `contributing guide
    <https://github.com/facebookresearch/CompilerGym/blob/development/CONTRIBUTING.md#leaderboard-submissions>`_
    for instructions on preparing a submission to the leaderboard.
    """

    def main(argv):
        assert len(argv) == 1, f"Unknown args: {argv[:1]}"
        assert FLAGS.n > 0, "n must be > 0"

        with gym.make("llvm-ic-v0") as env:

            # Stream verbose CompilerGym logs to file.
            logger = logging.getLogger("compiler_gym")
            logger.setLevel(logging.DEBUG)
            log_handler = logging.FileHandler(FLAGS.leaderboard_logfile)
            logger.addHandler(log_handler)
            logger.propagate = False

            print(f"Writing results to {FLAGS.leaderboard_results}")
            print(f"Writing logs to {FLAGS.leaderboard_logfile}")

            # Build the list of benchmarks to evaluate.
            benchmarks = env.datasets[FLAGS.test_dataset].benchmark_uris()
            if FLAGS.max_benchmarks:
                benchmarks = islice(benchmarks, FLAGS.max_benchmarks)
            benchmarks = list(benchmarks)

            # Repeat the searches for the requested number of iterations.
            benchmarks *= FLAGS.n
            total_count = len(benchmarks)

            # If we are resuming from a previous job, read the states that have
            # already been proccessed and remove those benchmarks from the list
            # of benchmarks to evaluate.
            init_states = []
            if FLAGS.resume and Path(FLAGS.leaderboard_results).is_file():
                with CompilerEnvStateReader(open(FLAGS.leaderboard_results)) as reader:
                    for state in reader:
                        init_states.append(state)
                        if state.benchmark in benchmarks:
                            benchmarks.remove(state.benchmark)

            # Run the benchmark loop in background so that we can asynchronously
            # log progress.
            worker = _EvalPolicyWorker(env, benchmarks, policy, init_states)
            worker.start()
            timer = Timer().reset()
            try:
                print(
                    f"=== Evaluating policy on "
                    f"{humanize.intcomma(total_count)} "
                    f"{FLAGS.test_dataset} benchmarks ==="
                    "\n\n"  # Blank lines will be filled below
                )
                while worker.is_alive():
                    done_count = len(worker.states)
                    remaining_count = total_count - done_count
                    time = timer.time
                    gmean_reward = geometric_mean([s.reward for s in worker.states])
                    mean_walltime = (
                        arithmetic_mean([s.walltime for s in worker.states]) or time
                    )
                    print(
                        "\r\033[2A"
                        "\033[K"
                        f"Runtime: {humanize_duration_hms(time)}. "
                        f"Estimated completion: {humanize_duration_hms(mean_walltime * remaining_count)}. "
                        f"Completed: {humanize.intcomma(done_count)} / {humanize.intcomma(total_count)} "
                        f"({done_count / total_count:.1%})."
                        "\n\033[K"
                        f"Current mean walltime: {mean_walltime:.3f}s / benchmark."
                        "\n\033[K"
                        f"Current geomean reward: {gmean_reward:.4f}.",
                        flush=True,
                        end="",
                    )
                    sleep(1)
            except KeyboardInterrupt:
                print("\nkeyboard interrupt", flush=True)
                worker.alive = False
                # User interrupt, don't validate.
                FLAGS.validate = False

        if FLAGS.validate:
            FLAGS.env = "llvm-ic-v0"
            validate(["argv0", FLAGS.leaderboard_results])

    app.run(main)