in prediction_generation/old-code/cpdbench_mozilla_rep_debug.py [0:0]
def analyze(revision_data, weight_fn=None):
"""Returns the average and sample variance (s**2) of a list of floats.
`weight_fn` is a function that takes a list index and a window width, and
returns a weight that is used to calculate a weighted average. For example,
see `default_weights` or `linear_weights` below. If no function is passed,
`default_weights` is used and the average will be uniformly weighted.
"""
if weight_fn is None:
weight_fn = default_weights
# get a weighted average for the full set of data -- this is complicated
# by the fact that we might have multiple data points from each revision
# which we would want to weight equally -- do this by creating a set of
# weights only for each bucket containing (potentially) multiple results
# for each value
num_revisions = len(revision_data)
weights = [weight_fn(i, num_revisions) for i in range(num_revisions)]
weighted_sum = 0
sum_of_weights = 0
for i in range(num_revisions):
weighted_sum += sum(value * weights[i] for value in revision_data[i].values)
sum_of_weights += weights[i] * len(revision_data[i].values)
weighted_avg = weighted_sum / sum_of_weights if num_revisions > 0 else 0.0
# now that we have a weighted average, we can calculate the variance of the
# whole series
all_data = [v for datum in revision_data for v in datum.values]
variance = (
(sum(pow(d - weighted_avg, 2) for d in all_data) / (len(all_data) - 1))
if len(all_data) > 1
else 0.0
)
return {"avg": weighted_avg, "n": len(all_data), "variance": variance}