def compute_bootstrap_loss()

in next_steps/data_science/diagnose/diagnose.py [0:0]


def compute_bootstrap_loss(df, freq, method):
    tic = time.time()

    df = df.copy()
    df['_bs'] = np.random.rand(len(df))<0.5

    df_cnt = df.groupby(['_bs', pd.Grouper(freq=freq), 'ITEM_ID']).size()
    df_cnt = df_cnt.to_frame('_cnt').reset_index(level=(0,2))

    index = pd.date_range(
        df_cnt.index.min(),
        df_cnt.index.max(),
        freq=freq)
    df_wgt = df.groupby(pd.Grouper(freq=freq)).size().reindex(index, fill_value=0)

    df_cnt['_i'] = np.searchsorted(index, df_cnt.index)
    df_cnt['_j'] = df_cnt['ITEM_ID'].astype('category').cat.codes
    N = len(df_cnt['ITEM_ID'].unique())

    Y, X = [ss.coo_matrix((
        df_cnt[df_cnt['_bs'] == split]['_cnt'],
        (df_cnt[df_cnt['_bs'] == split]['_i'],
         df_cnt[df_cnt['_bs'] == split]['_j'])
    ), shape=(len(index), N)).tocsr() for split in [0, 1]]

    return compute_distribution_shift(index, df_wgt, Y, X, method, 0, freq, tic)