in tensorflow_decision_forests/component/builder/builder.py [0:0]
def _finalize_dataspec(self):
"""Finalizes the creation of the dataspec.
Details:
- For each numerical feature, if the mean numerical values is not set in
the dataspec, set it (if possible) such that the model look to have been
trained with global imputation.
"""
conditions = py_tree.node.ConditionValueAndDefaultEvaluation()
for tree in self._trees:
tree.root.collect_condition_parameter_and_default_evaluation(conditions)
for column in self._dataspec.columns:
if (column.type == ColumnType.NUMERICAL and
not column.numerical.HasField("mean")):
condition_values = conditions.numerical_higher_than[column.name]
if not condition_values:
continue
# Determine the maximum threshold of default true conditions, and the
# minimum threshold of default false conditions.
max_true_default = None
min_false_default = None
for threshold, default_eval in condition_values:
if default_eval:
if max_true_default is None or max_true_default < threshold:
max_true_default = threshold
else:
if min_false_default is None or min_false_default > threshold:
min_false_default = threshold
if max_true_default is None and min_false_default is None:
# The feature is not used.
continue
if max_true_default is None:
# There are not default true conditions.
max_true_default = min_false_default - 1.0
if (math.isinf(max_true_default) or
max_true_default == min_false_default):
max_true_default = np.nextafter(min_false_default, -np.inf)
if min_false_default is None:
# There are not default false conditions.
min_false_default = max_true_default + 1.0
if (math.isinf(min_false_default) or
max_true_default == min_false_default):
min_false_default = np.nextafter(max_true_default, np.inf)
if max_true_default < min_false_default:
column.numerical.mean = (max_true_default + min_false_default) / 2