in treeherder/perfalert/perfalert/__init__.py [0:0]
def detect_changes(data, min_back_window=12, max_back_window=24, fore_window=12, t_threshold=7):
# Use T-Tests
# Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k]
data = sorted(data)
last_seen_regression = 0
for i in range(1, len(data)):
di = data[i]
# keep on getting previous data until we've either got at least 12
# data points *or* we've hit the maximum back window
jw = []
di.amount_prev_data = 0
prev_indice = i - 1
while (
di.amount_prev_data < max_back_window
and prev_indice >= 0
and (
(i - prev_indice)
<= min(max(last_seen_regression, min_back_window), max_back_window)
)
):
jw.append(data[prev_indice])
di.amount_prev_data += len(jw[-1].values)
prev_indice -= 1
# accumulate present + future data until we've got at least 12 values
kw = []
di.amount_next_data = 0
next_indice = i
while di.amount_next_data < fore_window and next_indice < len(data):
kw.append(data[next_indice])
di.amount_next_data += len(kw[-1].values)
next_indice += 1
di.historical_stats = analyze(jw)
di.forward_stats = analyze(kw)
di.t = abs(calc_t(jw, kw, linear_weights))
# add additional historical data points next time if we
# haven't detected a likely regression
if di.t > t_threshold:
last_seen_regression = 0
else:
last_seen_regression += 1
# Now that the t-test scores are calculated, go back through the data to
# find where changes most likely happened.
for i in range(1, len(data)):
di = data[i]
# if we don't have enough data yet, skip for now (until more comes
# in)
if di.amount_prev_data < min_back_window or di.amount_next_data < fore_window:
continue
if di.t <= t_threshold:
continue
# Check the adjacent points
prev = data[i - 1]
if prev.t > di.t:
continue
# next may or may not exist if it's the last in the series
if (i + 1) < len(data):
next = data[i + 1]
if next.t > di.t:
continue
# This datapoint has a t value higher than the threshold and higher
# than either neighbor. Mark it as the cause of a regression.
di.change_detected = True
return data