in kats/utils/time_series_parameter_tuning.py [0:0]
def evaluate_arm(self, arm) -> Dict:
"""Evaluates the performance of an arm.
Takes an arm object, gets its parameter values, runs
evaluation_function and returns what that function returns
after reformatting it.
Args:
arm: The arm object to be evaluated.
Returns:
Either a dict or a list of dict. These dict objects need
to have metric name that describes the metric, arm_name,
mean which is the mean of the evaluation value and its
standard error.
"""
# Arm evaluation requires mean and standard error or dict for multiple metrics
evaluation_result = self.evaluation_function(arm.parameters)
if isinstance(evaluation_result, dict):
return [
{
"metric_name": name,
"arm_name": arm.name,
"mean": value[0],
"sem": value[1],
}
for (name, value) in evaluation_result.items()
]
elif isinstance(evaluation_result, Number):
evaluation_result = (evaluation_result, 0.0)
elif (
isinstance(evaluation_result, tuple)
and len(evaluation_result) == 2
and all(isinstance(n, Number) for n in evaluation_result)
):
pass
else:
raise TypeError(
"Evaluation function should either return a single numeric "
"value that represents the error or a tuple of two numeric "
"values, one for the mean of error and the other for the "
"standard error of the mean of the error."
)
return {
"metric_name": self.name,
"arm_name": arm.name,
"mean": evaluation_result[0],
"sem": evaluation_result[1],
}