in src/python/tensorflow_cloud/tuner/tuner.py [0:0]
def _get_remote_training_metrics(
self,
log_reader,
partial_epoch_metrics: Dict[Text, float],
is_validation: Optional[bool] = False,
) -> _TrainingMetrics:
"""Retrieves delta epoch metrics from tensorboard logs since last run.
This method reports any complete epoch metrics that are available since
last run. When this method is called while training is running, all
metrics for the last epoch may not be available. Any incomplete metrics
for the last epoch are returned in partial_epoch_metrics to be used
as a starting point for the next round of _get_remote_training_metrics.
All complete epochs metrics (including the last epoch if applicable) are
returned as training_metrics.
Args:
log_reader: An instance of tensorboard DirectoryWatcher that is
pointing to the tensorboard logs directory.
partial_epoch_metrics: Any incomplete epoch metrics from previous
runs that should be used as a starting point.
is_validation: If True, get validation metrics.
Returns:
An instance of _TrainingMetrics a Namedtuple with
- 'completed_epoch_metrics'- a list of epoch metrics for completed
epochs.
- 'partial_epoch_metrics' - Any incomplete epoch metrics for the
last epoch. Once training completes, the final epoch metrics
will be stored here, this is not included in
completed_epoch_metrics.
"""
completed_epoch_metrics = []
for event in log_reader.Load():
for value in event.summary.value:
# Note: tf.keras.callbacks.TensorBoard.on_epoch_end() logs the
# epoch related metrics with a "epoch_" prefix. Please refer to
# https://github.com/tensorflow/tensorflow/blob/fcc4b966f1265f466e82617020af93670141b009/tensorflow/python/keras/callbacks.py#L2179 # pylint: disable=line-too-long
if value.tag.startswith("epoch_"):
if is_validation:
metric = value.tag.replace("epoch_", "val_")
# Validation metrics are calculated on trial end.
partial_epoch_metrics[metric] = tf.make_ndarray(
event.summary.value[0].tensor)
completed_epoch_metrics.append(partial_epoch_metrics)
else:
metric = value.tag.replace("epoch_", "")
# If this metric has been seen, this is a new epoch.
if metric in partial_epoch_metrics:
completed_epoch_metrics.append(partial_epoch_metrics)
partial_epoch_metrics = {}
# Note this method captures all metrics even if they
# are not part of the oracle objectives. We rely on
# oracle to ignore the unrelated Objectives.
partial_epoch_metrics[metric] = tf.make_ndarray(
event.summary.value[0].tensor)
return _TrainingMetrics(completed_epoch_metrics, partial_epoch_metrics)