in tensorflow_decision_forests/keras/core.py [0:0]
def fit(self,
x=None,
y=None,
callbacks=None,
verbose: Optional[int] = None,
**kwargs) -> tf.keras.callbacks.History:
"""Trains the model.
The following dataset formats are supported:
1. "x" is a tf.data.Dataset containing a tuple "(features, labels)".
"features" can be a dictionary a tensor, a list of tensors or a
dictionary of tensors (recommended). "labels" is a tensor.
2. "x" is a tensor, list of tensors or dictionary of tensors containing
the input features. "y" is a tensor.
3. "x" is a numpy-array, list of numpy-arrays or dictionary of
numpy-arrays containing the input features. "y" is a numpy-array.
Unlike classical neural networks, the learning algorithm requires to scan
the training dataset exactly once. Therefore, the dataset should not be
repeated. The algorithm also does not benefit from shuffling the dataset.
Input features generally do not need to be normalized (numerical) or indexed
(categorical features stored as string). Also, missing values are well
supported (i.e. not need to replace missing values).
Pandas Dataframe can be prepared with "dataframe_to_tf_dataset":
dataset = pandas.Dataframe(...)
model.fit(pd_dataframe_to_tf_dataset(dataset, label="my_label"))
Some of the learning algorithm will support distributed training with the
ParameterServerStrategy e.g.:
with tf.distribute.experimental.ParameterServerStrategy(...).scope():
model = DistributedGradientBoostedTreesModel()
model.fit(...)
Args:
x: Training dataset (See details above for the supported formats).
y: Label of the training dataset. Only used if "x" does not contains the
labels.
callbacks: Callbacks triggered during the training.
verbose: Verbosity mode. 0 = silent, 1 = small details, 2 = full details.
**kwargs: Arguments passed to the core keras model's fit.
Returns:
A `History` object. Its `History.history` attribute is not yet
implemented for decision forests algorithms, and will return empty.
All other fields are filled as usual for `Keras.Mode.fit()`.
"""
if verbose is not None:
self._verbose = verbose
self._clear_function_cache()
# Check for a Pandas Dataframe without injecting a dependency.
if str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
raise ValueError(
"`fit` cannot consume Pandas' dataframes directly. Instead, use the "
"`pd_dataframe_to_tf_dataset` utility function. For example: "
"`model.fit(tfdf.keras.pd_dataframe_to_tf_dataset(train_dataframe, "
"label=\"label_column\"))")
# If the dataset was created with "pd_dataframe_to_tf_dataset", ensure that
# the task is correctly set.
if hasattr(x, "_tfdf_task"):
dataset_task = getattr(x, "_tfdf_task")
if dataset_task != self._task:
raise ValueError(
f"The model's `task` attribute ({Task.Name(self._task)}) does "
"not match the `task` attribute passed to "
f"`pd_dataframe_to_tf_dataset` ({Task.Name(dataset_task)}).")
# Check the dataset.
if self._check_dataset and isinstance(x, tf.data.Dataset):
_check_dataset(x)
# Call "compile" if the user forgot to do so.
if not self._is_compiled:
self.compile()
if "epochs" in kwargs:
if kwargs["epochs"] != 1:
raise ValueError("all decision forests algorithms train with only 1 " +
"epoch, epochs={} given".format(kwargs["epochs"]))
del kwargs["epochs"] # Not needed since we force it to 1 below.
# This callback will trigger the training at the end of the first epoch.
callbacks = [_TrainerCallBack(self)] + (callbacks if callbacks else [])
# We want the model trained before any evaluation is done at the
# end of the epoch. This may fail in case any of the `on_train_batch_*`
# callbacks calls `evaluate()` before the end of the 1st epoch.
self._train_on_evaluate = True
# Reset the training status.
self._is_trained.assign(False)
# Keras's verbose cannot be "1" in case of distributed training (for
# "performance" reasons).
keras_verbose = "auto" if self._verbose == 1 else self._verbose
try:
history = super(CoreModel, self).fit(
x=x,
y=y,
epochs=1,
callbacks=callbacks,
verbose=keras_verbose,
**kwargs)
finally:
self._train_on_evaluate = False
self._build(x)
return history