ax/benchmark2/benchmark_result.py (109 lines of code) (raw):

# Copyright (c) Meta Platforms, Inc. and affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from __future__ import annotations from dataclasses import dataclass from typing import List, Tuple, Iterable import numpy as np import pandas as pd from ax.core.experiment import Experiment from ax.core.utils import get_model_times from ax.service.scheduler import Scheduler from ax.utils.common.typeutils import not_none @dataclass(frozen=True) class BenchmarkResult: """The result of a single optimization loop from one (BenchmarkProblem, BenchmarkMethod) pair. More information will be added to the BenchmarkResult as the suite develops. """ name: str experiment: Experiment # Tracks best point if single-objective problem, max hypervolume if MOO optimization_trace: np.ndarray fit_time: float gen_time: float @classmethod def from_scheduler(cls, scheduler: Scheduler) -> BenchmarkResult: fit_time, gen_time = get_model_times(experiment=scheduler.experiment) return cls( name=scheduler.experiment.name, experiment=scheduler.experiment, optimization_trace=cls._get_trace(scheduler=scheduler), fit_time=fit_time, gen_time=gen_time, ) @staticmethod def _get_trace(scheduler: Scheduler) -> np.ndarray: if scheduler.experiment.is_moo_problem: return np.array( [ scheduler.get_hypervolume( trial_indices=[*range(i + 1)], use_model_predictions=False ) if i != 0 else 0 # TODO[mpolson64] on i=0 we get an error with SearchspaceToChoice for i in range(len(scheduler.experiment.trials)) ], ) best_trials = [ scheduler.get_best_trial( trial_indices=[*range(i + 1)], use_model_predictions=False ) for i in range(len(scheduler.experiment.trials)) ] return np.array( [ not_none(not_none(trial)[2])[0][ not_none( scheduler.experiment.optimization_config ).objective.metric.name ] for trial in best_trials if trial is not None and not_none(trial)[2] is not None ] ) @dataclass(frozen=True) class AggregatedBenchmarkResult: """The result of a benchmark test, or series of replications. Scalar data present in the BenchmarkResult is here represented as (mean, sem) pairs. More information will be added to the AggregatedBenchmarkResult as the suite develops. """ name: str experiments: Iterable[Experiment] # mean, sem columns optimization_trace: pd.DataFrame # (mean, sem) pairs fit_time: Tuple[float, float] gen_time: Tuple[float, float] @classmethod def from_benchmark_results( cls, results: List[BenchmarkResult], ) -> AggregatedBenchmarkResult: return cls( name=results[0].name, experiments=[result.experiment for result in results], optimization_trace=pd.DataFrame( { "mean": [ np.mean( [ results[j].optimization_trace[i] for j in range(len(results)) ] ) for i in range(len(results[0].optimization_trace)) ], "sem": [ cls._series_to_sem( series=[ results[j].optimization_trace[i] for j in range(len(results)) ] ) for i in range(len(results[0].optimization_trace)) ], } ), fit_time=cls._series_to_mean_sem( series=[result.fit_time for result in results] ), gen_time=cls._series_to_mean_sem( series=[result.gen_time for result in results] ), ) @staticmethod def _series_to_mean_sem(series: List[float]) -> Tuple[float, float]: return ( np.mean(series), AggregatedBenchmarkResult._series_to_sem(series=series), ) @staticmethod def _series_to_sem(series: List[float]) -> float: return np.std(series, ddof=1) / np.sqrt(len(series))