def _standardize_user

def _standardize_user_data()

in keras/engine/training.py [0:0]
141 lines of code
46 McCabe index (conditional complexity)

    def _standardize_user_data(self, x,
                               y=None,
                               sample_weight=None,
                               class_weight=None,
                               check_array_lengths=True,
                               batch_size=None):
        all_inputs = []
        if not self.built:
            # We need to use `x` to set the model inputs.
            # We type-check that `x` and `y` are either single arrays
            # or lists of arrays.
            if isinstance(x, (list, tuple)):
                if not all(isinstance(v, np.ndarray) or
                           K.is_tensor(v) for v in x):
                    raise ValueError('Please provide as model inputs '
                                     'either a single '
                                     'array or a list of arrays. '
                                     'You passed: x=' + str(x))
                all_inputs += list(x)
            elif isinstance(x, dict):
                raise ValueError('Please do not pass a dictionary '
                                 'as model inputs.')
            else:
                if not isinstance(x, np.ndarray) and not K.is_tensor(x):
                    raise ValueError('Please provide as model inputs '
                                     'either a single '
                                     'array or a list of arrays. '
                                     'You passed: x=' + str(x))
                all_inputs.append(x)

            # Build the model using the retrieved inputs (value or symbolic).
            # If values, then in symbolic-mode placeholders will be created
            # to match the value shapes.
            if not self.inputs:
                self._set_inputs(x)

        if y is not None:
            if not self.optimizer:
                raise RuntimeError('You must compile a model before '
                                   'training/testing. '
                                   'Use `model.compile(optimizer, loss)`.')
            if not self._is_compiled:
                # On-the-fly compilation of the model.
                # We need to use `y` to set the model targets.
                if isinstance(y, (list, tuple)):
                    if not all(isinstance(v, np.ndarray) or
                               K.is_tensor(v) for v in y):
                        raise ValueError('Please provide as model targets '
                                         'either a single '
                                         'array or a list of arrays. '
                                         'You passed: y=' + str(y))
                elif isinstance(y, dict):
                    raise ValueError('Please do not pass a dictionary '
                                     'as model targets.')
                else:
                    if not isinstance(y, np.ndarray) and not K.is_tensor(y):
                        raise ValueError('Please provide as model targets '
                                         'either a single '
                                         'array or a list of arrays. '
                                         'You passed: y=' + str(y))
                # Typecheck that all inputs are *either* value *or* symbolic.
                if y is not None:
                    all_inputs += to_list(y, allow_tuple=True)
                if any(K.is_tensor(v) for v in all_inputs):
                    if not all(K.is_tensor(v) for v in all_inputs):
                        raise ValueError('Do not pass inputs that mix Numpy '
                                         'arrays and symbolic tensors. '
                                         'You passed: x=' + str(x) +
                                         '; y=' + str(y))

                # Handle target tensors if any passed.
                y = to_list(y, allow_tuple=True)
                target_tensors = [v for v in y if K.is_tensor(v)]
                if not target_tensors:
                    target_tensors = None
                self.compile(optimizer=self.optimizer,
                             loss=self.loss,
                             metrics=self.metrics,
                             loss_weights=self.loss_weights,
                             target_tensors=target_tensors)

        # If `x` and `y` were all symbolic,
        # then the model should not be fed any inputs and targets.
        # Note: in this case, `any` and `all` are equivalent since we disallow
        # mixed symbolic/value inputs.
        if any(K.is_tensor(v) for v in all_inputs):
            return [], [], []

        # What follows is input validation and standardization to list format,
        # in the case where all inputs are value arrays.

        if not self._is_graph_network:
            # Case: symbolic-mode subclassed network.
            # Do not do shape validation.
            feed_input_names = self._feed_input_names
            feed_input_shapes = None
        else:
            # Case: symbolic-mode graph network.
            # In this case, we run extensive shape validation checks.
            feed_input_names = self._feed_input_names
            feed_input_shapes = self._feed_input_shapes

        # Standardize the inputs.
        x = standardize_input_data(
            x,
            feed_input_names,
            feed_input_shapes,
            check_batch_axis=False,  # Don't enforce the batch size.
            exception_prefix='input')

        if y is not None:
            if not self._is_graph_network:
                feed_output_names = self._feed_output_names
                feed_output_shapes = None
                # Sample weighting not supported in this case.
                # TODO: consider supporting it.
                feed_sample_weight_modes = [None for _ in self.outputs]
            else:
                feed_output_names = self._feed_output_names
                feed_sample_weight_modes = self._feed_sample_weight_modes
                feed_output_shapes = []
                for output_shape, loss_fn in zip(self._feed_output_shapes,
                                                 self._feed_loss_fns):
                    if loss_fn is losses.sparse_categorical_crossentropy:
                        if K.image_data_format() == 'channels_first' and len(
                                output_shape) in [4, 5]:
                            feed_output_shapes.append(
                                (output_shape[0], 1) + output_shape[2:])
                        else:
                            feed_output_shapes.append(output_shape[:-1] + (1,))
                    elif (not hasattr(loss_fn, '__name__') or
                            getattr(losses, loss_fn.__name__, None) is None):
                        # If `loss_fn` is not a function (e.g. callable class)
                        # or if it not in the `losses` module, then
                        # it is a user-defined loss and we make no assumptions
                        # about it.
                        feed_output_shapes.append(None)
                    else:
                        feed_output_shapes.append(output_shape)

            check_last_layer_shape = True
            # multi_hot_sparse_categorical_crossentropy only available in mxnet backend
            if K.backend() == 'mxnet':
                for loss_fn in self.loss_functions:
                    if loss_fn is losses.multi_hot_sparse_categorical_crossentropy:
                        # does not check the last layer shape when multi_hot_sparse_categorical_crossentropy \
                        # is used, since we reduce the dimension of sparse labels.
                        check_last_layer_shape = False
            # Standardize the outputs.
            y = standardize_input_data(
                y,
                feed_output_names,
                feed_output_shapes,
                check_batch_axis=False,  # Don't enforce the batch size.
                exception_prefix='target',
                check_last_layer_shape=check_last_layer_shape)

            # Generate sample-wise weight values given the `sample_weight` and
            # `class_weight` arguments.
            sample_weights = standardize_sample_weights(
                sample_weight, feed_output_names)
            class_weights = standardize_class_weights(
                class_weight, feed_output_names)
            sample_weights = [
                standardize_weights(ref, sw, cw, mode)
                for (ref, sw, cw, mode) in
                zip(y, sample_weights, class_weights,
                    feed_sample_weight_modes)
            ]
            # Check that all arrays have the same length.
            check_array_length_consistency(x, y, sample_weights)
            if self._is_graph_network:
                # Additional checks to avoid users mistakenly
                # using improper loss fns.
                check_loss_and_target_compatibility(
                    y, self._feed_loss_fns, feed_output_shapes)
        else:
            y = []
            sample_weights = []

        if self.stateful and batch_size:
            # Check that for stateful networks, number of samples is a multiple
            # of the static batch size.
            if x[0].shape[0] % batch_size != 0:
                raise ValueError('In a stateful network, '
                                 'you should only pass inputs with '
                                 'a number of samples that can be '
                                 'divided by the batch size. Found: ' +
                                 str(x[0].shape[0]) + ' samples')
        return x, y, sample_weights