def __init__()

in modules/SwissArmyTransformer/sat/tokenization/glm/tokenization.py [0:0]


    def __init__(self, text_tokenizer, command_tokens=None):
        # set text tokenizer
        self.text_tokenizer = text_tokenizer
        if not hasattr(self, 'num_text_tokens'):
            self.num_text_tokens = len(self.text_tokenizer)
        self._command_tokens = command_tokens
        self.command_name_map = {tok.name: tok for tok in self.command_tokens}
        self.command_token_map = {tok.token: tok for tok in self.command_tokens}
        self.command_id_map = {tok.Id: tok for tok in self.command_tokens}

        # parse tokens and vocabs from tokenizer
        max_token_id = max(len(self.text_tokenizer.tokens) - 1, max(self.command_id_map.keys()))
        self._tokens = [self.text_tokenizer.tokens[i] if i < len(self.text_tokenizer.tokens) else f'[UNUSED{i}]' for i
                        in range(max_token_id + 1)]
        for idx, token in self.command_id_map.items():
            self._tokens[idx] = token.token
        self._vocab = {t.token: Id for Id, t in self.command_id_map.items()}
        self._vocab.update(self.text_tokenizer.vocab)

        if not hasattr(self, 'num_command_tokens'):
            self.num_command_tokens = len(self.command_tokens)
        if not hasattr(self, 'num_tokens'):
            self.num_tokens = len(self.tokens)

        self._text_tokens = list(self.text_tokenizer.tokens)
        self._text_token_vocab = {t: Id for t, Id in self.text_tokenizer.vocab.items()}

        self._command_token_tokens = list(self.command_token_map.keys())
        self._command_token_vocab = {t: Id for Id, t in self.command_id_map.items()}

        self.spaces_between_special_tokens = True