in kats/models/nowcasting/dynamic_nowcasting.py [0:0]
def feature_extraction(self) -> None:
"""
Extarct features for training
"""
# Add the hour, minute, and x column to the data
self.df_poly["hour"] = self.df_poly["time"].apply(lambda y: y.hour)
self.df_poly["minute"] = self.df_poly["time"].apply(lambda y: y.minute)
self.df_poly["x"] = self.df_poly["hour"] * 60 + self.df_poly["minute"]
# Empty list to hold the feature names
poly_feature_names = []
# Add the poly columns to the df_poly
for degree in [0, 1, 2, 3, 4, 5]:
self.df_poly = poly(self.df_poly, degree)
poly_feature_names.append("poly_" + str(degree))
# filterout + - inf, nan
self.df_poly = self.df_poly[
~self.df_poly.isin([np.nan, np.inf, -np.inf]).any(1)
]
# Save the poly feature name
self.poly_feature_names = poly_feature_names
feature_names = []
#########################################################################################
train_index_poly = self.df_poly[
~self.df_poly.isin([np.nan, np.inf, -np.inf]).any(1)
].index
X_train_poly, y_train_poly = (
self.df_poly[self.poly_feature_names].loc[train_index_poly],
self.df_poly["y"].loc[train_index_poly],
)
# Build the Polynomial Regression Model
lin_reg = LinearRegression()
lin_reg.fit(X_train_poly, y_train_poly)
self.poly_model = lin_reg
y_train_season = lin_reg.predict(X_train_poly)
self.y_train_season_obj = y_train_season
# print(self.df_poly)
# print(X_train_poly)
# print(self.y_train_season_obj)
#########################################################################################
# If we have a 0, switch to basic nowcasting!
if np.sum(self.df["y"].isin([0.0])) > 0:
for n in [10, 15, 20, 25, 30]:
self.df = MOM(self.df, n)
feature_names.append("MOM_" + str(n))
for n in [10, 15, 20, 25, 30]:
self.df = LAG(self.df, n)
feature_names.append("LAG_" + str(n))
self.df = self.df[
~self.df.isin([np.nan, np.inf, -np.inf]).any(1)
] # filterout + - inf, nan
self.feature_names = feature_names
else:
for n in [10, 15, 20, 25, 30]:
self.df = ROC(self.df, n)
feature_names.append("ROC_" + str(n))
for n in [10, 15, 20, 25, 30]:
self.df = MOM(self.df, n)
feature_names.append("MOM_" + str(n))
for n in [1, 2, 3, 4, 5, 20]:
self.df = LAG(self.df, n)
feature_names.append("LAG_" + str(n))
for n in [10, 20, 30, 40]:
self.df = MA(self.df, n)
feature_names.append("MA_" + str(n))
self.df = self.df[
~self.df.isin([np.nan, np.inf, -np.inf]).any(1)
] # filterout + - inf, nan
self.feature_names = feature_names