in next_steps/data_science/diagnose/diagnose.py [0:0]
def compute_temporal_loss(df, freq, method, hist_len):
tic = time.time()
df_cnt = df.groupby([pd.Grouper(freq=freq), 'ITEM_ID']).size()
df_cnt = df_cnt.to_frame('_cnt').reset_index(level=1)
index = pd.date_range(
df_cnt.index.min(),
df_cnt.index.max(),
freq=freq)
df_wgt = df.groupby(pd.Grouper(freq=freq)).size().reindex(index, fill_value=0)
df_cnt['_i'] = np.searchsorted(index, df_cnt.index)
df_cnt['_j'] = df_cnt['ITEM_ID'].astype('category').cat.codes
N = len(df_cnt['ITEM_ID'].unique())
# sparse data
Y = ss.coo_matrix((
df_cnt['_cnt'], (df_cnt['_i'], df_cnt['_j'])
), shape=(len(index), N)).tocsr()
try: # binary rolling sum
B = Y
c = 1
X = Y*0
X.eliminate_zeros()
for p,b in enumerate(reversed('{0:b}'.format(hist_len))):
if b == '1' and c < len(index):
X = X + ss.vstack([ss.csr_matrix((c, N)), B[:-c]])
c = c + 2**p
if 2**p < len(index):
B = B + ss.vstack([ss.csr_matrix((2**p, N)), B[:-2**p]]) # sum 0 .. 2**(p+1)-1
assert np.allclose(X[-1:].sum(axis=0), Y[-hist_len-1:-1].sum(axis=0))
except Exception:
traceback.print_exc()
warnings.warn("falling back to plain rolling sum")
rolling = 0
for t in range(hist_len):
rolling = rolling + ss.eye(len(index), k=-t-1)
X = rolling .dot( Y )
return compute_distribution_shift(index, df_wgt, Y, X, method, hist_len, freq, tic)