def feature_extraction()

in kats/models/nowcasting/nowcastingplus.py [0:0]


    def feature_extraction(self) -> None:
        """
        Extracts features for time series data.
        """
        # Add the hour, minute, and x column to the data
        self.df_poly["hour"] = self.df_poly["time"].apply(lambda y: y.hour)
        self.df_poly["minute"] = self.df_poly["time"].apply(lambda y: y.minute)
        self.df_poly["x"] = self.df_poly["hour"] * 60 + self.df_poly["minute"]

        # Empty list to hold the feature names
        poly_feature_names = []

        # Add the poly columns to the df_poly
        for degree in [0, 1, 2, 3, 4, 5]:
            self.df_poly = poly(self.df_poly, degree)
            poly_feature_names.append("poly_" + str(degree))

        # filterout + - inf, nan
        self.df_poly = self.df_poly[
            ~self.df_poly.isin([np.nan, np.inf, -np.inf]).any(1)
        ]

        # Save the poly feature name
        self.poly_feature_names = poly_feature_names
        feature_names = []

        #########################################################################################
        train_index_poly = self.df_poly[
            ~self.df_poly.isin([np.nan, np.inf, -np.inf]).any(1)
        ].index
        X_train_poly, y_train_poly = (
            self.df_poly[self.poly_feature_names].loc[train_index_poly],
            self.df_poly["y"].loc[train_index_poly],
        )

        # Build the Polynomial Regression Model
        lin_reg = LinearRegression()
        lin_reg.fit(X_train_poly, y_train_poly)
        self.poly_model = lin_reg
        y_train_season = lin_reg.predict(X_train_poly)
        self.y_train_season_obj = y_train_season
        #########################################################################################

        for n in [10, 15, 20, 25, 30]:
            self.df = MOM(self.df, n)
            feature_names.append("MOM_" + str(n))
        for n in [10, 15, 20, 25, 30]:
            self.df = ROC(self.df, n)
            feature_names.append("ROC_" + str(n))
        for n in [1, 2, 3, 4, 5]:
            self.df = LAG(self.df, n)
            feature_names.append("LAG_" + str(n))
        for n in [10, 20, 30]:
            self.df = MA(self.df, n)
            feature_names.append("MA_" + str(n))

        self.df = self.df[
            ~self.df.isin([np.nan, np.inf, -np.inf]).any(1)
        ]  # filterout + - inf, nan
        self.feature_names = feature_names