def evaluate_arm()

in kats/utils/time_series_parameter_tuning.py [0:0]


    def evaluate_arm(self, arm) -> Dict:
        """Evaluates the performance of an arm.

        Takes an arm object, gets its parameter values, runs
        evaluation_function and returns what that function returns
        after reformatting it.

        Args:
            arm: The arm object to be evaluated.

        Returns:
            Either a dict or a list of dict. These dict objects need
            to have metric name that describes the metric, arm_name,
            mean which is the mean of the evaluation value and its
            standard error.
        """

        # Arm evaluation requires mean and standard error or dict for multiple metrics
        evaluation_result = self.evaluation_function(arm.parameters)
        if isinstance(evaluation_result, dict):
            return [
                {
                    "metric_name": name,
                    "arm_name": arm.name,
                    "mean": value[0],
                    "sem": value[1],
                }
                for (name, value) in evaluation_result.items()
            ]
        elif isinstance(evaluation_result, Number):
            evaluation_result = (evaluation_result, 0.0)
        elif (
            isinstance(evaluation_result, tuple)
            and len(evaluation_result) == 2
            and all(isinstance(n, Number) for n in evaluation_result)
        ):
            pass
        else:
            raise TypeError(
                "Evaluation function should either return a single numeric "
                "value that represents the error or a tuple of two numeric "
                "values, one for the mean of error and the other for the "
                "standard error of the mean of the error."
            )
        return {
            "metric_name": self.name,
            "arm_name": arm.name,
            "mean": evaluation_result[0],
            "sem": evaluation_result[1],
        }