def compute_temporal_loss()

in next_steps/data_science/diagnose/diagnose.py [0:0]


def compute_temporal_loss(df, freq, method, hist_len):
    tic = time.time()

    df_cnt = df.groupby([pd.Grouper(freq=freq), 'ITEM_ID']).size()
    df_cnt = df_cnt.to_frame('_cnt').reset_index(level=1)

    index = pd.date_range(
        df_cnt.index.min(),
        df_cnt.index.max(),
        freq=freq)
    df_wgt = df.groupby(pd.Grouper(freq=freq)).size().reindex(index, fill_value=0)

    df_cnt['_i'] = np.searchsorted(index, df_cnt.index)
    df_cnt['_j'] = df_cnt['ITEM_ID'].astype('category').cat.codes
    N = len(df_cnt['ITEM_ID'].unique())

    # sparse data
    Y = ss.coo_matrix((
        df_cnt['_cnt'], (df_cnt['_i'], df_cnt['_j'])
    ), shape=(len(index), N)).tocsr()

    try: # binary rolling sum
        B = Y
        c = 1
        X = Y*0
        X.eliminate_zeros()
        for p,b in enumerate(reversed('{0:b}'.format(hist_len))):
            if b == '1' and c < len(index):
                X = X + ss.vstack([ss.csr_matrix((c, N)), B[:-c]])
                c = c + 2**p
            if 2**p < len(index):
                B = B + ss.vstack([ss.csr_matrix((2**p, N)), B[:-2**p]]) # sum 0 .. 2**(p+1)-1

        assert np.allclose(X[-1:].sum(axis=0), Y[-hist_len-1:-1].sum(axis=0))

    except Exception:
        traceback.print_exc()
        warnings.warn("falling back to plain rolling sum")

        rolling = 0
        for t in range(hist_len):
            rolling = rolling + ss.eye(len(index), k=-t-1)
        X = rolling .dot( Y )

    return compute_distribution_shift(index, df_wgt, Y, X, method, hist_len, freq, tic)