in afa/core.py [0:0]
def make_demand_classification(df, freq):
"""Run analyses on each timeseries, e.g. to determine
retired/intermittent/continuous timeseries. This needs to be run on a
non-normalized dataframe (i.e. prior to pre-processing in the pipeline).
"""
def _retired(y):
return y[-tail_len:].sum() < 1
def _life_periods(y):
y = np.trim_zeros(y)
y = y[np.logical_not(np.isnan(y)) & (y > 0)]
return len(y)
def _category(r):
if r["retired"]:
if r["life_periods"] < r["len"] / 4.0:
return "short"
return "medium"
else:
return "continuous"
def _spectral_entropy(y):
y = y[np.logical_not(np.isnan(y))]
f, Pxx_den = signal.periodogram(y)
psd_norm = np.divide(Pxx_den, Pxx_den.sum())
psd_norm += 1e-6
return -np.multiply(psd_norm, np.log2(psd_norm)).sum().round(2)
def _lambda_boxcox(y):
return stats.boxcox(y.clip(lower=0.1))[1].round(2)
assert "demand" in df
tail_len = TAIL_LEN[freq]
# df_analysis = \
# ts_groups(df).agg({"demand": [_retired, _life_periods, len,
# _spectral_entropy, _lambda_boxcox]})
df_analysis = \
ts_groups(df).agg({"demand": [_retired, _life_periods, len,
_spectral_entropy]})
# flatten column names
df_analysis.columns = ["|".join([c.lstrip("_") for c in col])
.strip(" |")
.replace("demand|", "")
for col in df_analysis.columns.values]
df_analysis["intermittent"] = df_analysis["spectral_entropy"] > 5.0
# classify series as short, medium ("med"), or continuous ("cont")
df_analysis["category"] = df_analysis.apply(_category, axis=1)
df_analysis = df_analysis.astype({"life_periods": int, "len": int})
return df_analysis