def make_demand_classification()

in afa/core.py [0:0]


def make_demand_classification(df, freq):
    """Run analyses on each timeseries, e.g. to determine
    retired/intermittent/continuous timeseries. This needs to be run on a
    non-normalized dataframe (i.e. prior to pre-processing in the pipeline).

    """

    def _retired(y):
        return y[-tail_len:].sum() < 1

    def _life_periods(y):
        y = np.trim_zeros(y)
        y = y[np.logical_not(np.isnan(y)) & (y > 0)]
        return len(y)

    def _category(r):
        if r["retired"]:
            if r["life_periods"] < r["len"] / 4.0:
                return "short"
            return "medium"
        else:
            return "continuous"

    def _spectral_entropy(y):
        y = y[np.logical_not(np.isnan(y))]
        f, Pxx_den = signal.periodogram(y)
        psd_norm = np.divide(Pxx_den, Pxx_den.sum())
        psd_norm += 1e-6
        return -np.multiply(psd_norm, np.log2(psd_norm)).sum().round(2)

    def _lambda_boxcox(y):
        return stats.boxcox(y.clip(lower=0.1))[1].round(2)

    assert "demand" in df

    tail_len = TAIL_LEN[freq]

#   df_analysis = \
#       ts_groups(df).agg({"demand": [_retired, _life_periods, len,
#                                     _spectral_entropy, _lambda_boxcox]})
    df_analysis = \
        ts_groups(df).agg({"demand": [_retired, _life_periods, len,
                                      _spectral_entropy]})

    # flatten column names
    df_analysis.columns = ["|".join([c.lstrip("_") for c in col])
                              .strip(" |")
                              .replace("demand|", "")
                           for col in df_analysis.columns.values]

    df_analysis["intermittent"] = df_analysis["spectral_entropy"] > 5.0

    # classify series as short, medium ("med"), or continuous ("cont")
    df_analysis["category"] = df_analysis.apply(_category, axis=1)

    df_analysis = df_analysis.astype({"life_periods": int, "len": int})

    return df_analysis