in build/lex.go [334:645]
func (in *input) Lex(val *yySymType) int {
// Skip past spaces, stopping at non-space or EOF.
countNL := 0 // number of newlines we've skipped past
for !in.eof() {
// Skip over spaces. Count newlines so we can give the parser
// information about where top-level blank lines are,
// for top-level comment assignment.
c := in.peekRune()
if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
if c == '\n' {
in.indent = 0
in.cleanLine = true
if in.depth == 0 {
// Not in a statememt. Tell parser about top-level blank line.
in.startToken(val)
in.readRune()
in.endToken(val)
return '\n'
}
countNL++
} else if c == ' ' && in.cleanLine {
in.indent++
}
in.readRune()
continue
}
// Comment runs to end of line.
if c == '#' {
// If a line contains just a comment its indentation level doesn't matter.
// Reset it to zero.
in.indent = 0
isLineComment := in.cleanLine
in.cleanLine = true
// Is this comment the only thing on its line?
// Find the last \n before this # and see if it's all
// spaces from there to here.
// If it's a suffix comment but the last non-space symbol before
// it is one of (, [, or {, or it's a suffix comment to "):"
// (e.g. trailing closing bracket or a function definition),
// treat it as a line comment that should be
// put inside the corresponding block.
i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
prefix := bytes.TrimSpace(in.complete[i+1 : in.pos.Byte])
prefix = bytes.Replace(prefix, []byte{' '}, []byte{}, -1)
isSuffix := true
if len(prefix) == 0 ||
(len(prefix) == 2 && prefix[0] == ')' && prefix[1] == ':') ||
prefix[len(prefix)-1] == '[' ||
prefix[len(prefix)-1] == '(' ||
prefix[len(prefix)-1] == '{' {
isSuffix = false
}
// Consume comment without the \n it ends with.
in.startToken(val)
for len(in.remaining) > 0 && in.peekRune() != '\n' {
in.readRune()
}
in.endToken(val)
val.tok = strings.TrimRight(val.tok, "\n")
in.lastToken = "comment"
// If we are at top level (not in a rule), hand the comment to
// the parser as a _COMMENT token. The grammar is written
// to handle top-level comments itself.
if in.depth == 0 && isLineComment {
// Not in a statement. Tell parser about top-level comment.
return _COMMENT
}
// Otherwise, save comment for later attachment to syntax tree.
if countNL > 1 {
in.lineComments = append(in.lineComments, Comment{val.pos, ""})
}
if isSuffix {
in.suffixComments = append(in.suffixComments, Comment{val.pos, val.tok})
} else {
in.lineComments = append(in.lineComments, Comment{val.pos, val.tok})
}
countNL = 0
continue
}
if c == '\\' && len(in.remaining) >= 2 && in.remaining[1] == '\n' {
// We can ignore a trailing \ at end of line together with the \n.
in.readRune()
in.readRune()
continue
}
// Found non-space non-comment.
break
}
// Check for changes in indentation
// Skip if we're inside a statement, or if there were non-space
// characters before in the current line.
if in.depth == 0 && in.cleanLine {
if in.indent > in.currentIndent() {
// A new indentation block starts
in.indents = append(in.indents, in.indent)
in.lastToken = "indent"
in.cleanLine = false
return _INDENT
} else if in.indent < in.currentIndent() {
// An indentation block ends
in.indents = in.indents[:len(in.indents)-1]
// It's a syntax error if the current line indentation level in now greater than
// currentIndent(), should be either equal (a parent block continues) or still less
// (need to unindent more).
if in.indent > in.currentIndent() {
in.pos = val.pos
in.Error("unexpected indentation")
}
in.lastToken = "unindent"
return _UNINDENT
}
}
in.cleanLine = false
// If the file ends with an indented block, return the corresponding amounts of unindents.
if in.eof() && in.currentIndent() > 0 {
in.indents = in.indents[:len(in.indents)-1]
in.lastToken = "unindent"
return _UNINDENT
}
// Found the beginning of the next token.
in.startToken(val)
defer in.endToken(val)
// End of file.
if in.eof() {
in.lastToken = "EOF"
return _EOF
}
// Punctuation tokens.
switch c := in.peekRune(); c {
case '[', '(', '{':
in.depth++
in.readRune()
return c
case ']', ')', '}':
in.depth--
in.readRune()
return c
case '.', ':', ';', ',': // single-char tokens
in.readRune()
return c
case '<', '>', '=', '!', '+', '-', '*', '/', '%', '|', '&', '~', '^': // possibly followed by =
in.readRune()
if c == '~' {
// unary bitwise not, shouldn't be followed by anything
return c
}
if c == '*' && in.peekRune() == '*' {
// double asterisk
in.readRune()
return _STAR_STAR
}
if c == '-' && in.peekRune() == '>' {
// functions type annotation
in.readRune()
return _ARROW
}
if c == in.peekRune() {
switch c {
case '/':
// integer division
in.readRune()
c = _INT_DIV
case '<':
// left shift
in.readRune()
c = _BIT_LSH
case '>':
// right shift
in.readRune()
c = _BIT_RSH
}
}
if in.peekRune() == '=' {
in.readRune()
switch c {
case '<':
return _LE
case '>':
return _GE
case '=':
return _EQ
case '!':
return _NE
default:
return _AUGM
}
}
return c
case 'r': // possible beginning of raw quoted string
if len(in.remaining) < 2 || in.remaining[1] != '"' && in.remaining[1] != '\'' {
break
}
in.readRune()
c = in.peekRune()
fallthrough
case '"', '\'': // quoted string
quote := c
if len(in.remaining) >= 3 && in.remaining[0] == byte(quote) && in.remaining[1] == byte(quote) && in.remaining[2] == byte(quote) {
// Triple-quoted string.
in.readRune()
in.readRune()
in.readRune()
var c1, c2, c3 int
for {
if in.eof() {
in.pos = val.pos
in.Error("unexpected EOF in string")
}
c1, c2, c3 = c2, c3, in.readRune()
if c1 == quote && c2 == quote && c3 == quote {
break
}
if c3 == '\\' {
if in.eof() {
in.pos = val.pos
in.Error("unexpected EOF in string")
}
in.readRune()
}
}
} else {
in.readRune()
for {
if in.eof() {
in.pos = val.pos
in.Error("unexpected EOF in string")
}
if in.peekRune() == '\n' {
in.Error("unexpected newline in string")
}
c := in.readRune()
if c == quote {
break
}
if c == '\\' {
if in.eof() {
in.pos = val.pos
in.Error("unexpected EOF in string")
}
in.readRune()
}
}
}
in.endToken(val)
s, triple, err := Unquote(val.tok)
if err != nil {
in.Error(fmt.Sprint(err))
}
val.str = s
val.triple = triple
return _STRING
}
// Checked all punctuation. Must be identifier token.
if c := in.peekRune(); !isIdent(c) {
in.Error(fmt.Sprintf("unexpected input character %#q", c))
}
// Scan over alphanumeric identifier.
for {
c := in.peekRune()
if !isIdent(c) {
break
}
in.readRune()
}
// Call endToken to set val.tok to identifier we just scanned,
// so we can look to see if val.tok is a keyword.
in.endToken(val)
if k := keywordToken[val.tok]; k != 0 {
return k
}
switch val.tok {
case "pass":
return _PASS
case "break":
return _BREAK
case "continue":
return _CONTINUE
}
if len(val.tok) > 0 && val.tok[0] >= '0' && val.tok[0] <= '9' {
return _INT
}
return _IDENT
}