def transform()

in sa-dsml-many-models/code/util/timeseries_utilities.py [0:0]


    def transform(self, X):
        """
        Create lag features of the target for the input data.
        The transform uses data cached at fit time, if necessary, to provide
        continuity of lag features.
        """
        X_trans = X.copy()
        added_target = False
        if self.target_column_name not in X_trans.columns:
            X_trans[self.target_column_name] = np.nan
            added_target = True

        # decide if we need to use the training cache i.e. are we in a test scenario?
        train_latest = self._train_tail.index.max()
        X_earliest = X_trans.index.min()
        if train_latest < X_earliest:
            # X data is later than the training period - append the cached tail of training data
            X_trans = pd.concat((self._train_tail, X_trans[self._column_order]))

        # Ensure data is sorted by time before making lags
        X_trans.sort_index(ascending=True, inplace=True)

        # Make the lag features
        for lag_order in self.lag_orders:
            X_trans['lag_' + str(lag_order)] = X_trans[self.target_column_name].shift(lag_order)

        # Return transformed dataframe with the same time range as X
        if added_target:
            X_trans.drop(columns=[self.target_column_name], inplace=True)
        return X_trans.loc[X.index]