def fit()

in src/sagemaker_sklearn_extension/feature_extraction/date_time.py [0:0]


    def fit(self, X, y=None):
        """Filter the extracted field so as not to contain constant columns.

        Parameters
        ----------
        X : {array-like}, datetime.datetime or str

        Notes
        -----
        If fitting with a 2d array with more than one column, any data type that is not constant in any column will
        remain. If for example, column 1 has year=1999 for all rows but column 2 has two or more possible year values,
        we will still produce an output with the year information from column 1. To avoid this, run fit on each column
        separately, and obtain a separate DateTimeVectorizer for each column

        Returns
        -------
        self : DateTimeVectorizer
        """

        X = check_array(X, dtype=None, force_all_finite="allow-nan")
        X = np.array(X)
        X = self._to_datetime_array(X)

        if self.mode not in ["cyclic", "ordinal"]:
            raise ValueError("mode must be either cyclic or ordinal. Current value is {}".format(self.mode))

        self.extract_ = self.extract or self.default_data

        if self.ignore_constant_columns:
            new_extract = []
            for col in range(X.shape[1]):
                # convert the current column to get the different property values
                transformed = self._convert(X[:, col].reshape((-1, 1)), mode="ordinal")
                # check for constant columns
                transformed_var = np.nanvar(transformed, axis=0)
                for i, cur_var in enumerate(transformed_var):
                    if cur_var > 0 and self.extract_[i] not in new_extract:
                        new_extract.append(self.extract_[i])
            if not new_extract:
                new_extract = [self.extract_[0]]
            self.extract_ = new_extract

        return self