def tokenize()

in preprocessing/src/javalang_tokenizer.py [0:0]
52 lines of code
20 McCabe index (conditional complexity)

    def tokenize(self, keep_comments=False):
        self.reset()

        # Convert unicode escapes
        self.pre_tokenize()

        while self.i < self.length:
            token_type = None

            c = self.data[self.i]
            c_next = None
            startswith = c

            if self.i + 1 < self.length:
                c_next = self.data[self.i + 1]
                startswith = c + c_next

            if c.isspace():
                self.consume_whitespace()
                continue

            elif startswith in ("//", "/*"):
                comment = self.read_comment()
                if comment.startswith("/**"):
                    self.javadoc = comment
                if keep_comments:
                    token_type = Comment
                    token = token_type(comment)
                    yield token
                continue

            elif startswith == '..' and self.try_operator():
                # Ensure we don't mistake a '...' operator as a sequence of
                # three '.' separators. This is done as an optimization instead
                # of moving try_operator higher in the chain because operators
                # aren't as common and try_operator is expensive
                token_type = Operator

            elif c == '@':
                token_type = Annotation
                self.j = self.i + 1

            elif c == '.' and c_next and c_next.isdigit():
                token_type = self.read_decimal_float_or_integer()

            elif self.try_separator():
                token_type = Separator

            elif c in ("'", '"'):
                token_type = String
                self.read_string()

            elif c in '0123456789':
                token_type = self.read_integer_or_float(c, c_next)

            elif self.is_java_identifier_start(c):
                token_type = self.read_identifier()

            elif self.try_operator():
                token_type = Operator

            else:
                self.error('Could not process token', c)
                self.i = self.i + 1
                continue

            position = Position(self.current_line, self.i - self.start_of_line)
            token = token_type(
                self.data[self.i:self.j], position, self.javadoc)
            yield token

            if self.javadoc:
                self.javadoc = None

            self.i = self.j