in tensorflow_decision_forests/keras/core.py [0:0]
def __init__(self,
task: Optional[TaskType] = Task.CLASSIFICATION,
learner: Optional[str] = "RANDOM_FOREST",
learner_params: Optional[HyperParameters] = None,
features: Optional[List[FeatureUsage]] = None,
exclude_non_specified_features: Optional[bool] = False,
preprocessing: Optional["models.Functional"] = None,
postprocessing: Optional["models.Functional"] = None,
ranking_group: Optional[str] = None,
uplift_treatment: Optional[str] = None,
temp_directory: Optional[str] = None,
verbose: int = 1,
advanced_arguments: Optional[AdvancedArguments] = None,
num_threads: Optional[int] = None,
name: Optional[str] = None,
max_vocab_count: Optional[int] = 2000,
try_resume_training: Optional[bool] = True,
check_dataset: Optional[bool] = True) -> None:
super(CoreModel, self).__init__(name=name)
self._task = task
self._learner = learner
self._learner_params = learner_params
self._features = features or []
self._exclude_non_specified = exclude_non_specified_features
self._preprocessing = preprocessing
self._postprocessing = postprocessing
self._ranking_group = ranking_group
self._uplift_treatment = uplift_treatment
self._temp_directory = temp_directory
self._verbose = verbose
self._num_threads = num_threads
self._max_vocab_count = max_vocab_count
self._try_resume_training = try_resume_training
self._check_dataset = check_dataset
# Internal, indicates whether the first evaluation during training,
# triggered by providing validation data, should trigger the training
# itself.
self._train_on_evaluate: bool = False
# Determine the optimal number of threads.
if self._num_threads is None:
self._num_threads = os.cpu_count()
if self._num_threads is None:
if self._verbose >= 1:
tf_logging.warning(
"Cannot determine the number of CPUs. Set num_threads=6")
self._num_threads = 6
else:
if self._num_threads >= 32:
if self._verbose >= 1:
tf_logging.warning(
"The `num_threads` constructor argument is not set and the "
"number of CPU is os.cpu_count()=%d > 32. Setting num_threads "
"to 32. Set num_threads manually to use more than 32 cpus." %
self._num_threads)
self._num_threads = 32
else:
if self._verbose >= 2:
tf_logging.info("Use %d thread(s) for training", self._num_threads)
if advanced_arguments is None:
self._advanced_arguments = AdvancedArguments()
else:
self._advanced_arguments = copy.deepcopy(advanced_arguments)
# Copy the metadata
if (not self._advanced_arguments.yggdrasil_training_config.metadata
.HasField("framework") and self._advanced_arguments.metadata_framework):
self._advanced_arguments.yggdrasil_training_config.metadata.framework = self._advanced_arguments.metadata_framework
if (not self._advanced_arguments.yggdrasil_training_config.metadata
.HasField("owner") and self._advanced_arguments.metadata_owner):
self._advanced_arguments.yggdrasil_training_config.metadata.owner = self._advanced_arguments.metadata_owner
if not self._features and exclude_non_specified_features:
raise ValueError(
"The model does not have any input features: "
"exclude_non_specified_features is True and not features are "
"provided as input.")
if self._temp_directory is None:
self._temp_directory_handle = tempfile.TemporaryDirectory()
self._temp_directory = self._temp_directory_handle.name
if self._verbose >= 1:
tf_logging.info("Use %s as temporary training directory",
self._temp_directory)
if (self._task == Task.RANKING) != (ranking_group is not None):
raise ValueError(
"ranking_key is used iif. the task is RANKING or the loss is a "
"ranking loss")
# True iif. the model is trained.
self._is_trained = tf.Variable(False, trainable=False, name="is_trained")
# Unique ID to identify the model during training.
self._training_model_id = str(uuid.uuid4())
# The following fields contain the trained model. They are set during the
# graph construction and training process.
# The compiled Yggdrasil model.
self._model: Optional[tf_op.ModelV2] = None
# Compiled Yggdrasil model specialized for returning the active leaves.
# This model is initialized at the first call to "call_get_leaves" or
# "predict_get_leaves".
self._model_get_leaves: Optional[tf_op.ModelV2] = None
# Semantic of the input features.
# Also defines what are the input features of the model.
self._semantics: Optional[Dict[Text, FeatureSemantic]] = None
# List of Yggdrasil feature identifiers i.e. feature seen by the Yggdrasil
# learner. Those are computed after the preprocessing, unfolding and
# casting.
self._normalized_input_keys: Optional[List[Text]] = None
# Textual description of the model.
self._description: Optional[Text] = None
# If the model is trained with weights.
self._weighted_training = False
# True if the user provides a validation dataset to `fit`.
self._has_validation_dataset = False
self._time_begin_data_feed: Optional[datetime] = None
self._time_end_data_feed: Optional[datetime] = None
self._time_begin_training: Optional[datetime] = None
self._time_end_training: Optional[datetime] = None