in src/sagemaker_sklearn_extension/feature_extraction/date_time.py [0:0]
def fit(self, X, y=None):
"""Filter the extracted field so as not to contain constant columns.
Parameters
----------
X : {array-like}, datetime.datetime or str
Notes
-----
If fitting with a 2d array with more than one column, any data type that is not constant in any column will
remain. If for example, column 1 has year=1999 for all rows but column 2 has two or more possible year values,
we will still produce an output with the year information from column 1. To avoid this, run fit on each column
separately, and obtain a separate DateTimeVectorizer for each column
Returns
-------
self : DateTimeVectorizer
"""
X = check_array(X, dtype=None, force_all_finite="allow-nan")
X = np.array(X)
X = self._to_datetime_array(X)
if self.mode not in ["cyclic", "ordinal"]:
raise ValueError("mode must be either cyclic or ordinal. Current value is {}".format(self.mode))
self.extract_ = self.extract or self.default_data
if self.ignore_constant_columns:
new_extract = []
for col in range(X.shape[1]):
# convert the current column to get the different property values
transformed = self._convert(X[:, col].reshape((-1, 1)), mode="ordinal")
# check for constant columns
transformed_var = np.nanvar(transformed, axis=0)
for i, cur_var in enumerate(transformed_var):
if cur_var > 0 and self.extract_[i] not in new_extract:
new_extract.append(self.extract_[i])
if not new_extract:
new_extract = [self.extract_[0]]
self.extract_ = new_extract
return self