in src/tokenizer.py [0:0]
def add_special_tokens(self, special_tokens):
# Expanding _tok2id and _id2tok with new special_tokens
logger.info(f"Using special tokens: {special_tokens}")
last_id = len(self._tok2id)
new_tok2id = {}
for idx, tok in enumerate(set(special_tokens)):
new_tok2id[tok] = idx+last_id
new_id2tok = {val:key for key, val in new_tok2id.items()}
self._tok2id = {**self._tok2id, **new_tok2id}
self._id2tok = {**self._id2tok, **new_id2tok}
self.special_tokens = {**self.special_tokens, **new_tok2id}
self.print_special_token_ids()