in next_steps/data_science/diagnose/diagnose.py [0:0]
def compute_bootstrap_loss(df, freq, method):
tic = time.time()
df = df.copy()
df['_bs'] = np.random.rand(len(df))<0.5
df_cnt = df.groupby(['_bs', pd.Grouper(freq=freq), 'ITEM_ID']).size()
df_cnt = df_cnt.to_frame('_cnt').reset_index(level=(0,2))
index = pd.date_range(
df_cnt.index.min(),
df_cnt.index.max(),
freq=freq)
df_wgt = df.groupby(pd.Grouper(freq=freq)).size().reindex(index, fill_value=0)
df_cnt['_i'] = np.searchsorted(index, df_cnt.index)
df_cnt['_j'] = df_cnt['ITEM_ID'].astype('category').cat.codes
N = len(df_cnt['ITEM_ID'].unique())
Y, X = [ss.coo_matrix((
df_cnt[df_cnt['_bs'] == split]['_cnt'],
(df_cnt[df_cnt['_bs'] == split]['_i'],
df_cnt[df_cnt['_bs'] == split]['_j'])
), shape=(len(index), N)).tocsr() for split in [0, 1]]
return compute_distribution_shift(index, df_wgt, Y, X, method, 0, freq, tic)