def _create_hooks()

in adanet/core/iteration.py [0:0]
46 lines of code
17 McCabe index (conditional complexity)

  def _create_hooks(self, base_global_step, subnetwork_specs, candidates,
                    num_subnetworks, rebuilding, train_manager_dir, is_chief):
    """Returns the hooks to monitor and train this iteration.

    Args:
      base_global_step: Integer global step at the beginning of this iteration.
      subnetwork_specs: List of `_SubnetworkSpec` instances.
      candidates: List of `_Candidate` instances to compare.
      num_subnetworks: Integer number of subnetwork builders generated for the
        current iteration.
      rebuilding: Boolean whether the iteration is being rebuilt only to restore
        the previous best subnetworks and ensembles.
      train_manager_dir: Directory for the TrainManager to store spec metadata.
      is_chief: Whether the current worker is chief.

    Returns:
      A 3-tuple of a _TrainManager for monitoring training, a list of
      `SessionRunHooks` to run on chief, and a list of `SessionRunHooks` to run
      on all workers.
    """

    training_chief_hooks, training_hooks = [], []
    ensemble_specs = [c.ensemble_spec for c in candidates]
    train_manager = _TrainManager(subnetwork_specs, ensemble_specs,
                                  train_manager_dir, is_chief)
    if not self._use_tpu:
      # On TPU, the global step gets incremented in an op since it doesn't have
      # hook run granularity of CPU and GPU training.
      training_chief_hooks.append(
          _GlobalStepSetterHook(train_manager, subnetwork_specs,
                                base_global_step,
                                self._global_step_combiner_fn))
    should_train_subnetworks = (
        self._placement_strategy.should_train_subnetworks(num_subnetworks))
    for spec in subnetwork_specs:
      if not self._use_tpu:
        training_hooks.append(_NanLossHook(train_manager, spec))
      # We increment the step along with the global step as part of the train
      # op on TPU, whereas on CPU and GPU we use hooks for fine grained control.
      if self._use_tpu or not should_train_subnetworks or spec.train_op is None:
        increment_step_op = None
      else:
        with tf.control_dependencies([spec.train_op.train_op]):
          increment_step_op = spec.step.assign_add(1)
      # TPU also supports uneven training, but up to num_iterations_per_loop.
      training_hooks.append(
          _TrainingLimitHook(
              train_manager,
              spec,
              self._max_steps,
              increment_step_op=increment_step_op))
      if not should_train_subnetworks and not rebuilding:
        continue
      self._add_hooks(spec, train_manager, training_chief_hooks, training_hooks)
    for spec in ensemble_specs:
      if not self._use_tpu:
        training_hooks.append(_NanLossHook(train_manager, spec))
      # See above comment about incrementing the step on CPU vs. TPU.
      if self._use_tpu or spec.train_op is None:
        increment_step_op = None
      else:
        with tf.control_dependencies([spec.train_op.train_op]):
          increment_step_op = spec.step.assign_add(1)
      training_hooks.append(
          _TrainingLimitHook(
              train_manager,
              spec,
              self._max_steps,
              increment_step_op=increment_step_op))
      self._add_hooks(spec, train_manager, training_chief_hooks, training_hooks)
    return train_manager, training_chief_hooks, training_hooks