def add_features

def add_features_from()

in python-package/lightgbm/basic.py [0:0]
74 lines of code
28 McCabe index (conditional complexity)

    def add_features_from(self, other):
        """Add features from other Dataset to the current Dataset.

        Both Datasets must be constructed before calling this method.

        Parameters
        ----------
        other : Dataset
            The Dataset to take features from.

        Returns
        -------
        self : Dataset
            Dataset with the new features added.
        """
        if self.handle is None or other.handle is None:
            raise ValueError('Both source and target Datasets must be constructed before adding features')
        _safe_call(_LIB.LGBM_DatasetAddFeaturesFrom(self.handle, other.handle))
        was_none = self.data is None
        old_self_data_type = type(self.data).__name__
        if other.data is None:
            self.data = None
        elif self.data is not None:
            if isinstance(self.data, np.ndarray):
                if isinstance(other.data, np.ndarray):
                    self.data = np.hstack((self.data, other.data))
                elif scipy.sparse.issparse(other.data):
                    self.data = np.hstack((self.data, other.data.toarray()))
                elif isinstance(other.data, pd_DataFrame):
                    self.data = np.hstack((self.data, other.data.values))
                elif isinstance(other.data, dt_DataTable):
                    self.data = np.hstack((self.data, other.data.to_numpy()))
                else:
                    self.data = None
            elif scipy.sparse.issparse(self.data):
                sparse_format = self.data.getformat()
                if isinstance(other.data, np.ndarray) or scipy.sparse.issparse(other.data):
                    self.data = scipy.sparse.hstack((self.data, other.data), format=sparse_format)
                elif isinstance(other.data, pd_DataFrame):
                    self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
                elif isinstance(other.data, dt_DataTable):
                    self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
                else:
                    self.data = None
            elif isinstance(self.data, pd_DataFrame):
                if not PANDAS_INSTALLED:
                    raise LightGBMError("Cannot add features to DataFrame type of raw data "
                                        "without pandas installed. "
                                        "Install pandas and restart your session.")
                if isinstance(other.data, np.ndarray):
                    self.data = concat((self.data, pd_DataFrame(other.data)),
                                       axis=1, ignore_index=True)
                elif scipy.sparse.issparse(other.data):
                    self.data = concat((self.data, pd_DataFrame(other.data.toarray())),
                                       axis=1, ignore_index=True)
                elif isinstance(other.data, pd_DataFrame):
                    self.data = concat((self.data, other.data),
                                       axis=1, ignore_index=True)
                elif isinstance(other.data, dt_DataTable):
                    self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())),
                                       axis=1, ignore_index=True)
                else:
                    self.data = None
            elif isinstance(self.data, dt_DataTable):
                if isinstance(other.data, np.ndarray):
                    self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
                elif scipy.sparse.issparse(other.data):
                    self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
                elif isinstance(other.data, pd_DataFrame):
                    self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
                elif isinstance(other.data, dt_DataTable):
                    self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
                else:
                    self.data = None
            else:
                self.data = None
        if self.data is None:
            err_msg = (f"Cannot add features from {type(other.data).__name__} type of raw data to "
                       f"{old_self_data_type} type of raw data.\n")
            err_msg += ("Set free_raw_data=False when construct Dataset to avoid this"
                        if was_none else "Freeing raw data")
            _log_warning(err_msg)
        self.feature_name = self.get_feature_name()
        _log_warning("Reseting categorical features.\n"
                     "You can set new categorical features via ``set_categorical_feature`` method")
        self.categorical_feature = "auto"
        self.pandas_categorical = None
        return self