in sa-dsml-many-models/code/util/timeseries_utilities.py [0:0]
def transform(self, X):
"""
Create lag features of the target for the input data.
The transform uses data cached at fit time, if necessary, to provide
continuity of lag features.
"""
X_trans = X.copy()
added_target = False
if self.target_column_name not in X_trans.columns:
X_trans[self.target_column_name] = np.nan
added_target = True
# decide if we need to use the training cache i.e. are we in a test scenario?
train_latest = self._train_tail.index.max()
X_earliest = X_trans.index.min()
if train_latest < X_earliest:
# X data is later than the training period - append the cached tail of training data
X_trans = pd.concat((self._train_tail, X_trans[self._column_order]))
# Ensure data is sorted by time before making lags
X_trans.sort_index(ascending=True, inplace=True)
# Make the lag features
for lag_order in self.lag_orders:
X_trans['lag_' + str(lag_order)] = X_trans[self.target_column_name].shift(lag_order)
# Return transformed dataframe with the same time range as X
if added_target:
X_trans.drop(columns=[self.target_column_name], inplace=True)
return X_trans.loc[X.index]