in empchat/datasets/tokens.py [0:0]
def get_bert_token_mapping(label_set=None):
label_set_pairs = []
unused_token_idx = 3
if label_set is not None:
for label in SETS_TO_BROKEN_LABELS[label_set]:
unused_token_idx += 1
label_set_pairs.append((label, f"[unused{unused_token_idx:d}]"))
return OrderedDict(
[
(UNK_TOKEN, "[UNK]"),
(PAD_TOKEN, "[PAD]"),
(PARLAI_PAD_TOKEN, UNUSED_BERT_TOKEN_2),
(EMPTYPERSONA_TOKEN, UNUSED_BERT_TOKEN_3),
(START_OF_COMMENT, UNUSED_BERT_TOKEN_1),
(END_OF_COMMENT, "[SEP]"),
]
+ label_set_pairs
)