def get_bert_token_mapping()

in empchat/datasets/tokens.py [0:0]


def get_bert_token_mapping(label_set=None):
    label_set_pairs = []
    unused_token_idx = 3
    if label_set is not None:
        for label in SETS_TO_BROKEN_LABELS[label_set]:
            unused_token_idx += 1
            label_set_pairs.append((label, f"[unused{unused_token_idx:d}]"))
    return OrderedDict(
        [
            (UNK_TOKEN, "[UNK]"),
            (PAD_TOKEN, "[PAD]"),
            (PARLAI_PAD_TOKEN, UNUSED_BERT_TOKEN_2),
            (EMPTYPERSONA_TOKEN, UNUSED_BERT_TOKEN_3),
            (START_OF_COMMENT, UNUSED_BERT_TOKEN_1),
            (END_OF_COMMENT, "[SEP]"),
        ]
        + label_set_pairs
    )