def add_blank_grams()

in scripts/build_transitions.py [0:0]


def add_blank_grams(pruned_ngrams, num_tokens, blank):
    all_grams = [gram for grams in pruned_ngrams for gram in grams]
    maxorder = len(pruned_ngrams)
    blank_grams = {}
    if blank == "forced":
        pruned_ngrams = [pruned_ngrams[0] if i == 0 else [] for i in range(maxorder)]
    pruned_ngrams[0].append(tuple([num_tokens]))
    blank_grams[tuple([num_tokens])] = True
    for gram in all_grams:
        # Iterate over all possibilities by using a vector of 0s, 1s to
        # denote whether a blank is being used at each position
        if blank == "optional":
            # given a gram ab.. of order n, we have have n+1 positions
            # avaiable whether to use blank or not.
            onehot_vectors = itertools.product([0, 1], repeat=len(gram) + 1)
        elif blank == "forced":
            # must include a blank token in between
            onehot_vectors = [[1] * (len(gram) + 1)]
        else:
            raise ValueError(
                "Invalid value specificed for blank. Must be in |optional|forced|none|"
            )
        for j in onehot_vectors:
            new_array = []
            for idx, oz in enumerate(j[:-1]):
                if oz == 1 and gram[idx] != START_IDX:
                    new_array.append(num_tokens)
                new_array.append(gram[idx])
            if j[-1] == 1 and gram[-1] != END_IDX:
                new_array.append(num_tokens)
            for n in range(maxorder):
                for e in range(n, len(new_array)):
                    cur_gram = tuple(new_array[e - n : e + 1])
                    if num_tokens in cur_gram and cur_gram not in blank_grams:
                        pruned_ngrams[n].append(cur_gram)
                        blank_grams[cur_gram] = True
    return pruned_ngrams