bindings/python/scripts/convert.py [119:139]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def vocab(self, proto):
        return [
            (piece.piece, piece.score) if check_number_comma(piece.piece) else (piece.piece, piece.score - 100)
            for piece in proto.pieces
        ]

    def normalizer(self, proto):
        normalizers = [Replace("``", '"'), Replace("''", '"')]
        if not self.original_tokenizer.keep_accents:
            normalizers.append(NFKD())
            normalizers.append(StripAccents())
        if self.original_tokenizer.do_lower_case:
            normalizers.append(Lowercase())

        precompiled_charsmap = proto.normalizer_spec.precompiled_charsmap
        normalizers.append(Precompiled(precompiled_charsmap))
        normalizers.append(Replace(Regex(" {2,}"), " "))
        return Sequence(normalizers)

    def post_processor(self, tokenizer):
        return TemplateProcessing(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



bindings/python/scripts/convert.py [254:274]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def vocab(self, proto):
        return [
            (piece.piece, piece.score) if check_number_comma(piece.piece) else (piece.piece, piece.score - 100)
            for piece in proto.pieces
        ]

    def normalizer(self, proto):
        normalizers = [Replace("``", '"'), Replace("''", '"')]
        if not self.original_tokenizer.keep_accents:
            normalizers.append(NFKD())
            normalizers.append(StripAccents())
        if self.original_tokenizer.do_lower_case:
            normalizers.append(Lowercase())

        precompiled_charsmap = proto.normalizer_spec.precompiled_charsmap
        normalizers.append(Precompiled(precompiled_charsmap))
        normalizers.append(Replace(Regex(" {2,}"), " "))
        return Sequence(normalizers)

    def post_processor(self, tokenizer):
        return TemplateProcessing(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



