def fit()

in tensorflow_decision_forests/keras/core.py [0:0]
47 lines of code
14 McCabe index (conditional complexity)

  def fit(self,
          x=None,
          y=None,
          callbacks=None,
          verbose: Optional[int] = None,
          **kwargs) -> tf.keras.callbacks.History:
    """Trains the model.

    The following dataset formats are supported:

      1. "x" is a tf.data.Dataset containing a tuple "(features, labels)".
         "features" can be a dictionary a tensor, a list of tensors or a
         dictionary of tensors (recommended). "labels" is a tensor.

      2. "x" is a tensor, list of tensors or dictionary of tensors containing
         the input features. "y" is a tensor.

      3. "x" is a numpy-array, list of numpy-arrays or dictionary of
         numpy-arrays containing the input features. "y" is a numpy-array.

    Unlike classical neural networks, the learning algorithm requires to scan
    the training dataset exactly once. Therefore, the dataset should not be
    repeated. The algorithm also does not benefit from shuffling the dataset.

    Input features generally do not need to be normalized (numerical) or indexed
    (categorical features stored as string). Also, missing values are well
    supported (i.e. not need to replace missing values).

    Pandas Dataframe can be prepared with "dataframe_to_tf_dataset":
      dataset = pandas.Dataframe(...)
      model.fit(pd_dataframe_to_tf_dataset(dataset, label="my_label"))

    Some of the learning algorithm will support distributed training with the
    ParameterServerStrategy e.g.:

      with tf.distribute.experimental.ParameterServerStrategy(...).scope():
        model = DistributedGradientBoostedTreesModel()
      model.fit(...)

    Args:
      x: Training dataset (See details above for the supported formats).
      y: Label of the training dataset. Only used if "x" does not contains the
        labels.
      callbacks: Callbacks triggered during the training.
      verbose: Verbosity mode. 0 = silent, 1 = small details, 2 = full details.
      **kwargs: Arguments passed to the core keras model's fit.

    Returns:
      A `History` object. Its `History.history` attribute is not yet
      implemented for decision forests algorithms, and will return empty.
      All other fields are filled as usual for `Keras.Mode.fit()`.
    """

    if verbose is not None:
      self._verbose = verbose

    self._clear_function_cache()

    # Check for a Pandas Dataframe without injecting a dependency.
    if str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
      raise ValueError(
          "`fit` cannot consume Pandas' dataframes directly. Instead, use the "
          "`pd_dataframe_to_tf_dataset` utility function. For example: "
          "`model.fit(tfdf.keras.pd_dataframe_to_tf_dataset(train_dataframe, "
          "label=\"label_column\"))")

    # If the dataset was created with "pd_dataframe_to_tf_dataset", ensure that
    # the task is correctly set.
    if hasattr(x, "_tfdf_task"):
      dataset_task = getattr(x, "_tfdf_task")
      if dataset_task != self._task:
        raise ValueError(
            f"The model's `task` attribute ({Task.Name(self._task)}) does "
            "not match the `task` attribute passed to "
            f"`pd_dataframe_to_tf_dataset` ({Task.Name(dataset_task)}).")

    # Check the dataset.
    if self._check_dataset and isinstance(x, tf.data.Dataset):
      _check_dataset(x)

    # Call "compile" if the user forgot to do so.
    if not self._is_compiled:
      self.compile()

    if "epochs" in kwargs:
      if kwargs["epochs"] != 1:
        raise ValueError("all decision forests algorithms train with only 1 " +
                         "epoch, epochs={} given".format(kwargs["epochs"]))
      del kwargs["epochs"]  # Not needed since we force it to 1 below.

    # This callback will trigger the training at the end of the first epoch.
    callbacks = [_TrainerCallBack(self)] + (callbacks if callbacks else [])

    # We want the model trained before any evaluation is done at the
    # end of the epoch. This may fail in case any of the `on_train_batch_*`
    # callbacks calls `evaluate()` before the end of the 1st epoch.
    self._train_on_evaluate = True

    # Reset the training status.
    self._is_trained.assign(False)

    # Keras's verbose cannot be "1" in case of distributed training (for
    # "performance" reasons).
    keras_verbose = "auto" if self._verbose == 1 else self._verbose

    try:
      history = super(CoreModel, self).fit(
          x=x,
          y=y,
          epochs=1,
          callbacks=callbacks,
          verbose=keras_verbose,
          **kwargs)
    finally:
      self._train_on_evaluate = False

    self._build(x)

    return history