def _build_combinations()

in src/sagemaker_sklearn_extension/preprocessing/data.py [0:0]
23 lines of code
13 McCabe index (conditional complexity)

    def _build_combinations(self, n_features, random_state):
        """Calculate the feature pairs to be added to the input data based on parameters and number of input columns.

        If ``interaction_only`` is ``True``, all squared features are omitted. Otherwise, they are added before
        interaction features. If there is enough space--as indicated by ``max_n_features``--to add all squared features,
        then do so. Otherwise, take a random sub-sample. Then, if there's enough space to add all interaction features,
        do so. Otherwise, return a random sub-sample of those.

        Parameters
        ----------
        n_features : int
            The number of columns in the input vector.
        random_state : RandomState
            The prepared (using ``check_random_state``) ``RandomState`` instance.
        """
        # First calculate how many new features of each kind (squared and interaction) we can add.
        added_feature_budget = self.max_n_features - n_features - int(self.include_bias)
        if added_feature_budget <= 0:
            message = "max_n_features must be large enough for the output to contain more than the original dataset"
            if self.include_bias:
                message += " and bias column"
            raise ValueError(message)
        squared_feature_budget = 0 if self.interaction_only else min(added_feature_budget, n_features)
        interaction_feature_budget = max(0, added_feature_budget - squared_feature_budget)

        # Produce squared feature pairs.
        squared_features = []
        if squared_feature_budget == n_features:
            # No need to reorder if we can fit all squared features.
            squared_features = [(i, i) for i in range(n_features)]
        elif squared_feature_budget > 0:
            # Otherwise, take a random sample of them.
            squared_features = [
                (i, i) for i in random_state.choice(range(n_features), size=squared_feature_budget, replace=False)
            ]

        # Produce interaction feature pairs.
        interaction_features = []
        if interaction_feature_budget > 0:
            interaction_features = list(combinations(range(n_features), 2))

            # Take a random sample of feature interactions if not all can fit.
            if len(interaction_features) > interaction_feature_budget:
                random_state.shuffle(interaction_features)

            interaction_features = interaction_features[:interaction_feature_budget]

        return squared_features + interaction_features