func()

in build/lex.go [334:645]
236 lines of code
86 McCabe index (conditional complexity)

func (in *input) Lex(val *yySymType) int {
	// Skip past spaces, stopping at non-space or EOF.
	countNL := 0 // number of newlines we've skipped past
	for !in.eof() {
		// Skip over spaces. Count newlines so we can give the parser
		// information about where top-level blank lines are,
		// for top-level comment assignment.
		c := in.peekRune()
		if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
			if c == '\n' {
				in.indent = 0
				in.cleanLine = true
				if in.depth == 0 {
					// Not in a statememt. Tell parser about top-level blank line.
					in.startToken(val)
					in.readRune()
					in.endToken(val)
					return '\n'
				}
				countNL++
			} else if c == ' ' && in.cleanLine {
				in.indent++
			}
			in.readRune()
			continue
		}

		// Comment runs to end of line.
		if c == '#' {
			// If a line contains just a comment its indentation level doesn't matter.
			// Reset it to zero.
			in.indent = 0
			isLineComment := in.cleanLine
			in.cleanLine = true

			// Is this comment the only thing on its line?
			// Find the last \n before this # and see if it's all
			// spaces from there to here.
			// If it's a suffix comment but the last non-space symbol before
			// it is one of (, [, or {, or it's a suffix comment to "):"
			// (e.g. trailing closing bracket or a function definition),
			// treat it as a line comment that should be
			// put inside the corresponding block.
			i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
			prefix := bytes.TrimSpace(in.complete[i+1 : in.pos.Byte])
			prefix = bytes.Replace(prefix, []byte{' '}, []byte{}, -1)
			isSuffix := true
			if len(prefix) == 0 ||
				(len(prefix) == 2 && prefix[0] == ')' && prefix[1] == ':') ||
				prefix[len(prefix)-1] == '[' ||
				prefix[len(prefix)-1] == '(' ||
				prefix[len(prefix)-1] == '{' {
				isSuffix = false
			}

			// Consume comment without the \n it ends with.
			in.startToken(val)
			for len(in.remaining) > 0 && in.peekRune() != '\n' {
				in.readRune()
			}

			in.endToken(val)

			val.tok = strings.TrimRight(val.tok, "\n")
			in.lastToken = "comment"

			// If we are at top level (not in a rule), hand the comment to
			// the parser as a _COMMENT token. The grammar is written
			// to handle top-level comments itself.
			if in.depth == 0 && isLineComment {
				// Not in a statement. Tell parser about top-level comment.
				return _COMMENT
			}

			// Otherwise, save comment for later attachment to syntax tree.
			if countNL > 1 {
				in.lineComments = append(in.lineComments, Comment{val.pos, ""})
			}
			if isSuffix {
				in.suffixComments = append(in.suffixComments, Comment{val.pos, val.tok})
			} else {
				in.lineComments = append(in.lineComments, Comment{val.pos, val.tok})
			}
			countNL = 0
			continue
		}

		if c == '\\' && len(in.remaining) >= 2 && in.remaining[1] == '\n' {
			// We can ignore a trailing \ at end of line together with the \n.
			in.readRune()
			in.readRune()
			continue
		}

		// Found non-space non-comment.
		break
	}

	// Check for changes in indentation
	// Skip if we're inside a statement, or if there were non-space
	// characters before in the current line.
	if in.depth == 0 && in.cleanLine {
		if in.indent > in.currentIndent() {
			// A new indentation block starts
			in.indents = append(in.indents, in.indent)
			in.lastToken = "indent"
			in.cleanLine = false
			return _INDENT
		} else if in.indent < in.currentIndent() {
			// An indentation block ends
			in.indents = in.indents[:len(in.indents)-1]

			// It's a syntax error if the current line indentation level in now greater than
			// currentIndent(), should be either equal (a parent block continues) or still less
			// (need to unindent more).
			if in.indent > in.currentIndent() {
				in.pos = val.pos
				in.Error("unexpected indentation")
			}
			in.lastToken = "unindent"
			return _UNINDENT
		}
	}

	in.cleanLine = false

	// If the file ends with an indented block, return the corresponding amounts of unindents.
	if in.eof() && in.currentIndent() > 0 {
		in.indents = in.indents[:len(in.indents)-1]
		in.lastToken = "unindent"
		return _UNINDENT
	}

	// Found the beginning of the next token.
	in.startToken(val)
	defer in.endToken(val)

	// End of file.
	if in.eof() {
		in.lastToken = "EOF"
		return _EOF
	}

	// Punctuation tokens.
	switch c := in.peekRune(); c {
	case '[', '(', '{':
		in.depth++
		in.readRune()
		return c

	case ']', ')', '}':
		in.depth--
		in.readRune()
		return c

	case '.', ':', ';', ',': // single-char tokens
		in.readRune()
		return c

	case '<', '>', '=', '!', '+', '-', '*', '/', '%', '|', '&', '~', '^': // possibly followed by =
		in.readRune()

		if c == '~' {
			// unary bitwise not, shouldn't be followed by anything
			return c
		}

		if c == '*' && in.peekRune() == '*' {
			// double asterisk
			in.readRune()
			return _STAR_STAR
		}

		if c == '-' && in.peekRune() == '>' {
			// functions type annotation
			in.readRune()
			return _ARROW
		}

		if c == in.peekRune() {
			switch c {
			case '/':
				// integer division
				in.readRune()
				c = _INT_DIV
			case '<':
				// left shift
				in.readRune()
				c = _BIT_LSH
			case '>':
				// right shift
				in.readRune()
				c = _BIT_RSH
			}
		}

		if in.peekRune() == '=' {
			in.readRune()
			switch c {
			case '<':
				return _LE
			case '>':
				return _GE
			case '=':
				return _EQ
			case '!':
				return _NE
			default:
				return _AUGM
			}
		}
		return c

	case 'r': // possible beginning of raw quoted string
		if len(in.remaining) < 2 || in.remaining[1] != '"' && in.remaining[1] != '\'' {
			break
		}
		in.readRune()
		c = in.peekRune()
		fallthrough

	case '"', '\'': // quoted string
		quote := c
		if len(in.remaining) >= 3 && in.remaining[0] == byte(quote) && in.remaining[1] == byte(quote) && in.remaining[2] == byte(quote) {
			// Triple-quoted string.
			in.readRune()
			in.readRune()
			in.readRune()
			var c1, c2, c3 int
			for {
				if in.eof() {
					in.pos = val.pos
					in.Error("unexpected EOF in string")
				}
				c1, c2, c3 = c2, c3, in.readRune()
				if c1 == quote && c2 == quote && c3 == quote {
					break
				}
				if c3 == '\\' {
					if in.eof() {
						in.pos = val.pos
						in.Error("unexpected EOF in string")
					}
					in.readRune()
				}
			}
		} else {
			in.readRune()
			for {
				if in.eof() {
					in.pos = val.pos
					in.Error("unexpected EOF in string")
				}
				if in.peekRune() == '\n' {
					in.Error("unexpected newline in string")
				}
				c := in.readRune()
				if c == quote {
					break
				}
				if c == '\\' {
					if in.eof() {
						in.pos = val.pos
						in.Error("unexpected EOF in string")
					}
					in.readRune()
				}
			}
		}
		in.endToken(val)
		s, triple, err := Unquote(val.tok)
		if err != nil {
			in.Error(fmt.Sprint(err))
		}
		val.str = s
		val.triple = triple
		return _STRING
	}

	// Checked all punctuation. Must be identifier token.
	if c := in.peekRune(); !isIdent(c) {
		in.Error(fmt.Sprintf("unexpected input character %#q", c))
	}

	// Scan over alphanumeric identifier.
	for {
		c := in.peekRune()
		if !isIdent(c) {
			break
		}
		in.readRune()
	}

	// Call endToken to set val.tok to identifier we just scanned,
	// so we can look to see if val.tok is a keyword.
	in.endToken(val)
	if k := keywordToken[val.tok]; k != 0 {
		return k
	}
	switch val.tok {
	case "pass":
		return _PASS
	case "break":
		return _BREAK
	case "continue":
		return _CONTINUE
	}
	if len(val.tok) > 0 && val.tok[0] >= '0' && val.tok[0] <= '9' {
		return _INT
	}
	return _IDENT
}