def temporal_aggregation()

in pageload-summary/summarize.py [0:0]


def temporal_aggregation(times, timespan=24):
    """Aggregates times formatted like `YYYY-mm-dd HH:MM`.

    After aggregation, the result will contain lists of all
    points that were grouped together. Timespan distancing
    starts from the newest data point.
    """
    aggr_times = []
    diff = datetime.timedelta(hours=timespan)

    curr = []
    for t in sorted(times)[::-1]:

        dt = datetime.datetime.strptime(t, "%Y-%m-%d %H:%M")
        if len(curr) == 0:
            curr.append(dt)
        elif curr[0] - dt < diff:
            # If we are within the `timespan` window, merge the point
            curr.append(dt)
        else:
            aggr_times.append([c.strftime("%Y-%m-%d %H:%M") for c in curr])
            curr = [dt]

    if len(curr) >= 0 and len(aggr_times) == 0:
        # When there's a single data point, there's nothing to aggregate temporally
        aggr_times.append([c.strftime("%Y-%m-%d %H:%M") for c in curr])

    return aggr_times[::-1]