in sa-dsml-many-models/code/util/timeseries_utilities.py [0:0]
def fit(self, X, y=None):
"""
Fit the sklearn model on the input dataframe.
"""
assert self.target_column_name in X.columns, \
"Target column is missing from the input dataframe."
# Drop rows with missing values and check that we still have data left
X_fit = X.dropna()
assert len(X_fit) > 0, 'Training dataframe is empty after dropping NA values'
# Check that data is all numeric type
# This simple pipeline does not handle categoricals or other non-numeric types
full_col_set = set(X_fit.columns)
numeric_col_set = set(X_fit.select_dtypes(include=[np.number]).columns)
assert full_col_set == numeric_col_set, \
('Found non-numeric columns {} in the input dataframe. Please drop them prior to modeling.'
.format(full_col_set - numeric_col_set))
# Fit the scikit model
y_fit = X_fit.pop(self.target_column_name)
self._column_order = X_fit.columns
self.sklearn_model.fit(X_fit.values, y_fit.values)
return self