def extract_sequence_probabilities()

in ludwig/decoders/sequence_decoders.py [0:0]


def extract_sequence_probabilities(decoder_output, beam_width, sequence_id=0):
    # obtain tesnors needed
    predictions = decoder_output.predicted_ids[:, :, sequence_id]
    all_log_probs = decoder_output.beam_search_decoder_output.scores
    top_ids = decoder_output.beam_search_decoder_output.predicted_ids
    parent_rows = decoder_output.beam_search_decoder_output.parent_ids

    # tile predictions so that they have the same shape
    # of top ids, [b, s, beam]
    preds_tiled = tf.tile(tf.expand_dims(predictions, -1),
                          [1, 1, beam_width])
    # figure out the location among the top k ids of the ones
    # we ended using for predictions, by first obtaining a boolean tensor
    # that reports if they match or not, and then using tf.where
    # to obtain the coordinates where they appear.
    # the output is a tensor preds_locs_all of size = [n, dims]
    # where n is ]the number of matches and dims is
    # the number of axes of the coordinates
    # (the rank of the preds_locs_bool tesnor)
    # They are not always the first, because of the way beam search works.
    preds_locs_bool = tf.equal(preds_tiled, top_ids)
    preds_locs_all = tf.where(preds_locs_bool)
    # the predicted ids may have appeared multiple times across
    # the different beams, so we need to select the first one (as it's the
    # one with highest probability.
    # to do so we create segment ids to use with the segment_min function.
    # to obtain the segments we use the first 2 coordinates of preds_locs
    # multiply the first by the max length of the second and then
    # add the second to obtain contgous numbering.
    # for example if we know that the maximum length is 12,
    # location [2,3] becomes segment 2 * 12 + 3 = 27
    segments = ((preds_locs_all[:, 0] *
                 tf.cast(tf.shape(predictions)[-1], tf.int64)) +
                preds_locs_all[:, 1])
    # degment min takes the min (first occurrence) of
    # the predicted sequence elment among all the beams
    preds_locs = tf.math.segment_min(
        preds_locs_all[:, 2], segments
    )
    # as we want to gather the values in parent rows,
    # we need to construct the coordinates xs and ys as the preds_locs
    # are the values of the third axis (beam size)
    # from which we want to gather).
    # we know for sure the values of xs and ys because we know for sure
    # that at least one of the besms contains the pred id at each step,
    # so we know for sure that there will be b*s rows in pred_loc
    # and so we can concatenate xs and ys that have the same size
    xs = tf.repeat(
        tf.range(tf.shape(parent_rows)[0], dtype=tf.int64),
        tf.repeat(
            tf.shape(parent_rows)[1], tf.shape(parent_rows)[0])
    )
    ys = tf.tile(tf.range(tf.shape(parent_rows)[1], dtype=tf.int64),
                 tf.shape(parent_rows)[0:1])
    preds_locs_for_gather = tf.concat(
        [xs[:, tf.newaxis], ys[:, tf.newaxis],
         preds_locs[:, tf.newaxis]],
        axis=-1
    )
    # now that we have a [b*s, x, y ,z] tensor of coordinates,
    # we can use it to gather from the parent rows tensor
    rows_from_log_probs_to_select = tf.gather_nd(
        parent_rows,
        preds_locs_for_gather
    )
    # we can reuse xs and ys to concatenate to the id of rows
    # from log probs to select in order to obtain the coordinates
    # in the all_log_probs tensor to gather
    rows_from_log_probs_for_gather = tf.concat(
        [xs[:, tf.newaxis], ys[:, tf.newaxis],
         tf.cast(rows_from_log_probs_to_select[:, tf.newaxis],
                 dtype=tf.int64)],
        axis=-1
    )
    # let's finally gather the logprobs
    log_probs_to_reshape = tf.gather_nd(
        all_log_probs,
        rows_from_log_probs_for_gather
    )
    # and let's reshape them in a [b,s,v] shape where v is
    # the size of the output vocabulary
    log_probs = tf.reshape(
        log_probs_to_reshape,
        tf.stack(
            [tf.shape(all_log_probs)[0], tf.shape(all_log_probs)[1],
             tf.shape(all_log_probs)[3]], axis=0)
    )
    # as they are log probs, exponentiating them return probabilities
    probabilities = tf.exp(log_probs)

    return probabilities