in pageload-summary/summarize.py [0:0]
def temporal_aggregation(times, timespan=24):
"""Aggregates times formatted like `YYYY-mm-dd HH:MM`.
After aggregation, the result will contain lists of all
points that were grouped together. Timespan distancing
starts from the newest data point.
"""
aggr_times = []
diff = datetime.timedelta(hours=timespan)
curr = []
for t in sorted(times)[::-1]:
dt = datetime.datetime.strptime(t, "%Y-%m-%d %H:%M")
if len(curr) == 0:
curr.append(dt)
elif curr[0] - dt < diff:
# If we are within the `timespan` window, merge the point
curr.append(dt)
else:
aggr_times.append([c.strftime("%Y-%m-%d %H:%M") for c in curr])
curr = [dt]
if len(curr) >= 0 and len(aggr_times) == 0:
# When there's a single data point, there's nothing to aggregate temporally
aggr_times.append([c.strftime("%Y-%m-%d %H:%M") for c in curr])
return aggr_times[::-1]