func()

in syntax/scan.go [456:825]


func (sc *scanner) nextToken(val *tokenValue) Token {

	// The following distribution of tokens guides case ordering:
	//
	//      COMMA          27   %
	//      STRING         23   %
	//      IDENT          15   %
	//      EQL            11   %
	//      LBRACK          5.5 %
	//      RBRACK          5.5 %
	//      NEWLINE         3   %
	//      LPAREN          2.9 %
	//      RPAREN          2.9 %
	//      INT             2   %
	//      others        < 1   %
	//
	// Although NEWLINE tokens are infrequent, and lineStart is
	// usually (~97%) false on entry, skipped newlines account for
	// about 50% of all iterations of the 'start' loop.

start:
	var c rune

	// Deal with leading spaces and indentation.
	blank := false
	savedLineStart := sc.lineStart
	if sc.lineStart {
		sc.lineStart = false
		col := 0
		for {
			c = sc.peekRune()
			if c == ' ' {
				col++
				sc.readRune()
			} else if c == '\t' {
				const tab = 8
				col += int(tab - (sc.pos.Col-1)%tab)
				sc.readRune()
			} else {
				break
			}
		}

		// The third clause matches EOF.
		if c == '#' || c == '\n' || c == 0 {
			blank = true
		}

		// Compute indentation level for non-blank lines not
		// inside an expression.  This is not the common case.
		if !blank && sc.depth == 0 {
			cur := sc.indentstk[len(sc.indentstk)-1]
			if col > cur {
				// indent
				sc.dents++
				sc.indentstk = append(sc.indentstk, col)
			} else if col < cur {
				// outdent(s)
				for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
					sc.dents--
					sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
				}
				if col != sc.indentstk[len(sc.indentstk)-1] {
					sc.error(sc.pos, "unindent does not match any outer indentation level")
				}
			}
		}
	}

	// Return saved indentation tokens.
	if sc.dents != 0 {
		sc.startToken(val)
		sc.endToken(val)
		if sc.dents < 0 {
			sc.dents++
			return OUTDENT
		} else {
			sc.dents--
			return INDENT
		}
	}

	// start of line proper
	c = sc.peekRune()

	// Skip spaces.
	for c == ' ' || c == '\t' {
		sc.readRune()
		c = sc.peekRune()
	}

	// comment
	if c == '#' {
		if sc.keepComments {
			sc.startToken(val)
		}
		// Consume up to newline (included).
		for c != 0 && c != '\n' {
			sc.readRune()
			c = sc.peekRune()
		}
		if sc.keepComments {
			sc.endToken(val)
			if blank {
				sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw})
			} else {
				sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw})
			}
		}
	}

	// newline
	if c == '\n' {
		sc.lineStart = true

		// Ignore newlines within expressions (common case).
		if sc.depth > 0 {
			sc.readRune()
			goto start
		}

		// Ignore blank lines, except in the REPL,
		// where they emit OUTDENTs and NEWLINE.
		if blank {
			if sc.readline == nil {
				sc.readRune()
				goto start
			} else if len(sc.indentstk) > 1 {
				sc.dents = 1 - len(sc.indentstk)
				sc.indentstk = sc.indentstk[:1]
				goto start
			}
		}

		// At top-level (not in an expression).
		sc.startToken(val)
		sc.readRune()
		val.raw = "\n"
		return NEWLINE
	}

	// end of file
	if c == 0 {
		// Emit OUTDENTs for unfinished indentation,
		// preceded by a NEWLINE if we haven't just emitted one.
		if len(sc.indentstk) > 1 {
			if savedLineStart {
				sc.dents = 1 - len(sc.indentstk)
				sc.indentstk = sc.indentstk[:1]
				goto start
			} else {
				sc.lineStart = true
				sc.startToken(val)
				val.raw = "\n"
				return NEWLINE
			}
		}

		sc.startToken(val)
		sc.endToken(val)
		return EOF
	}

	// line continuation
	if c == '\\' {
		sc.readRune()
		if sc.peekRune() != '\n' {
			sc.errorf(sc.pos, "stray backslash in program")
		}
		sc.readRune()
		goto start
	}

	// start of the next token
	sc.startToken(val)

	// comma (common case)
	if c == ',' {
		sc.readRune()
		sc.endToken(val)
		return COMMA
	}

	// string literal
	if c == '"' || c == '\'' {
		return sc.scanString(val, c)
	}

	// identifier or keyword
	if isIdentStart(c) {
		if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
			//  r"..."
			//  b"..."
			sc.readRune()
			c = sc.peekRune()
			return sc.scanString(val, c)
		} else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') {
			// rb"..."
			sc.readRune()
			sc.readRune()
			c = sc.peekRune()
			return sc.scanString(val, c)
		}

		for isIdent(c) {
			sc.readRune()
			c = sc.peekRune()
		}
		sc.endToken(val)
		if k, ok := keywordToken[val.raw]; ok {
			return k
		}

		return IDENT
	}

	// brackets
	switch c {
	case '[', '(', '{':
		sc.depth++
		sc.readRune()
		sc.endToken(val)
		switch c {
		case '[':
			return LBRACK
		case '(':
			return LPAREN
		case '{':
			return LBRACE
		}
		panic("unreachable")

	case ']', ')', '}':
		if sc.depth == 0 {
			sc.errorf(sc.pos, "unexpected %q", c)
		} else {
			sc.depth--
		}
		sc.readRune()
		sc.endToken(val)
		switch c {
		case ']':
			return RBRACK
		case ')':
			return RPAREN
		case '}':
			return RBRACE
		}
		panic("unreachable")
	}

	// int or float literal, or period
	if isdigit(c) || c == '.' {
		return sc.scanNumber(val, c)
	}

	// other punctuation
	defer sc.endToken(val)
	switch c {
	case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '='
		start := sc.pos
		sc.readRune()
		if sc.peekRune() == '=' {
			sc.readRune()
			switch c {
			case '<':
				return LE
			case '>':
				return GE
			case '=':
				return EQL
			case '!':
				return NEQ
			case '+':
				return PLUS_EQ
			case '-':
				return MINUS_EQ
			case '/':
				return SLASH_EQ
			case '%':
				return PERCENT_EQ
			case '&':
				return AMP_EQ
			case '|':
				return PIPE_EQ
			case '^':
				return CIRCUMFLEX_EQ
			}
		}
		switch c {
		case '=':
			return EQ
		case '<':
			if sc.peekRune() == '<' {
				sc.readRune()
				if sc.peekRune() == '=' {
					sc.readRune()
					return LTLT_EQ
				} else {
					return LTLT
				}
			}
			return LT
		case '>':
			if sc.peekRune() == '>' {
				sc.readRune()
				if sc.peekRune() == '=' {
					sc.readRune()
					return GTGT_EQ
				} else {
					return GTGT
				}
			}
			return GT
		case '!':
			sc.error(start, "unexpected input character '!'")
		case '+':
			return PLUS
		case '-':
			return MINUS
		case '/':
			if sc.peekRune() == '/' {
				sc.readRune()
				if sc.peekRune() == '=' {
					sc.readRune()
					return SLASHSLASH_EQ
				} else {
					return SLASHSLASH
				}
			}
			return SLASH
		case '%':
			return PERCENT
		case '&':
			return AMP
		case '|':
			return PIPE
		case '^':
			return CIRCUMFLEX
		}
		panic("unreachable")

	case ':', ';', '~': // single-char tokens (except comma)
		sc.readRune()
		switch c {
		case ':':
			return COLON
		case ';':
			return SEMI
		case '~':
			return TILDE
		}
		panic("unreachable")

	case '*': // possibly followed by '*' or '='
		sc.readRune()
		switch sc.peekRune() {
		case '*':
			sc.readRune()
			return STARSTAR
		case '=':
			sc.readRune()
			return STAR_EQ
		}
		return STAR
	}

	sc.errorf(sc.pos, "unexpected input character %#q", c)
	panic("unreachable")
}