def fit()

in sa-dsml-many-models/code/util/timeseries_utilities.py [0:0]


    def fit(self, X, y=None):
        """
        Fit the sklearn model on the input dataframe.
        """
        assert self.target_column_name in X.columns, \
            "Target column is missing from the input dataframe."

        # Drop rows with missing values and check that we still have data left
        X_fit = X.dropna()
        assert len(X_fit) > 0, 'Training dataframe is empty after dropping NA values'

        # Check that data is all numeric type
        # This simple pipeline does not handle categoricals or other non-numeric types
        full_col_set = set(X_fit.columns)
        numeric_col_set = set(X_fit.select_dtypes(include=[np.number]).columns)
        assert full_col_set == numeric_col_set, \
            ('Found non-numeric columns {} in the input dataframe. Please drop them prior to modeling.'
             .format(full_col_set - numeric_col_set))

        # Fit the scikit model
        y_fit = X_fit.pop(self.target_column_name)
        self._column_order = X_fit.columns
        self.sklearn_model.fit(X_fit.values, y_fit.values)
        return self