tensor2tensor/models/transformer.py [1084:1115]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if top_beams == 1:
      decoded_ids = decoded_ids[:, 0, 1:]
      scores = scores[:, 0]
    else:
      decoded_ids = decoded_ids[:, :top_beams, 1:]
      scores = scores[:, :top_beams]
  else:  # Greedy

    def inner_loop(i, hit_eos, next_id, decoded_ids, cache, log_prob):
      """One step of greedy decoding."""
      logits, cache = symbols_to_logits_fn(next_id, i, cache)
      log_probs = common_layers.log_prob_from_logits(logits)
      temperature = sampling_temperature
      if hparams.sampling_method == "random_per_example":
        next_id = common_layers.sample_temperature_per_example(
            logits, temperature, top_k)
      else:
        if hparams.sampling_method == "argmax":
          temperature = 0.0
        next_id = common_layers.sample_with_temperature(logits, temperature,
                                                        top_k)

      log_prob_indices = tf.stack([tf.range(tf.to_int64(batch_size)), next_id],
                                  axis=1)
      log_prob += tf.gather_nd(
          log_probs, log_prob_indices) * (1 - tf.to_float(hit_eos))
      # Note(thangluong): we purposely update hit_eos after aggregating log_prob
      # There is a subtle detail here that we want to include log_probs up to
      # (and inclusive of) the first eos generated, but not subsequent tokens.
      hit_eos |= tf.equal(next_id, eos_id)

      next_id = tf.expand_dims(next_id, axis=1)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


tensor2tensor/models/transformer.py [1236:1267]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if top_beams == 1:
      decoded_ids = decoded_ids[:, 0, 1:]
      scores = scores[:, 0]
    else:
      decoded_ids = decoded_ids[:, :top_beams, 1:]
      scores = scores[:, :top_beams]
  else:  # Greedy

    def inner_loop(i, hit_eos, next_id, decoded_ids, cache, log_prob):
      """One step of greedy decoding."""
      logits, cache = symbols_to_logits_fn(next_id, i, cache)
      log_probs = common_layers.log_prob_from_logits(logits)
      temperature = sampling_temperature
      if hparams.sampling_method == "random_per_example":
        next_id = common_layers.sample_temperature_per_example(
            logits, temperature, top_k)
      else:
        if hparams.sampling_method == "argmax":
          temperature = 0.0
        next_id = common_layers.sample_with_temperature(logits, temperature,
                                                        top_k)

      log_prob_indices = tf.stack([tf.range(tf.to_int64(batch_size)), next_id],
                                  axis=1)
      log_prob += tf.gather_nd(
          log_probs, log_prob_indices) * (1 - tf.to_float(hit_eos))
      # Note(thangluong): we purposely update hit_eos after aggregating log_prob
      # There is a subtle detail here that we want to include log_probs up to
      # (and inclusive of) the first eos generated, but not subsequent tokens.
      hit_eos |= tf.equal(next_id, eos_id)

      next_id = tf.expand_dims(next_id, axis=1)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -