query/expr/parser.go (470 lines of code) (raw):

// Modifications Copyright (c) 2017-2018 Uber Technologies, Inc. // Copyright (c) 2013-2016 Errplane Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. package expr import ( "bytes" "fmt" "io" "strconv" "strings" ) // Parser represents an InfluxQL parser. type Parser struct { s *bufScanner } // NewParser returns a new instance of Parser. func NewParser(r io.Reader) *Parser { return &Parser{s: newBufScanner(r)} } // ParseExpr parses an expression string and returns its AST representation. func ParseExpr(s string) (Expr, error) { return NewParser(strings.NewReader(s)).ParseExpr(0) } // parseInt parses a string and returns an integer literal. func (p *Parser) parseInt(min, max int) (int, error) { tok, pos, lit := p.scanIgnoreWhitespace() if tok != NUMBER { return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) } // Return an error if the number has a fractional part. if strings.Contains(lit, ".") { return 0, &ParseError{Message: "number must be an integer", Pos: pos} } // Convert string to int. n, err := strconv.Atoi(lit) if err != nil { return 0, &ParseError{Message: err.Error(), Pos: pos} } else if min > n || n > max { return 0, &ParseError{ Message: fmt.Sprintf("invalid value %d: must be %d <= n <= %d", n, min, max), Pos: pos, } } return n, nil } // parseUInt32 parses a string and returns a 32-bit unsigned integer literal. func (p *Parser) parseUInt32() (uint32, error) { tok, pos, lit := p.scanIgnoreWhitespace() if tok != NUMBER { return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) } // Convert string to unsigned 32-bit integer n, err := strconv.ParseUint(lit, 10, 32) if err != nil { return 0, &ParseError{Message: err.Error(), Pos: pos} } return uint32(n), nil } // parseUInt64 parses a string and returns a 64-bit unsigned integer literal. func (p *Parser) parseUInt64() (uint64, error) { tok, pos, lit := p.scanIgnoreWhitespace() if tok != NUMBER { return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) } // Convert string to unsigned 64-bit integer n, err := strconv.ParseUint(lit, 10, 64) if err != nil { return 0, &ParseError{Message: err.Error(), Pos: pos} } return uint64(n), nil } // parseIdent parses an identifier. func (p *Parser) parseIdent() (string, error) { tok, pos, lit := p.scanIgnoreWhitespace() if tok != IDENT { return "", newParseError(tokstr(tok, lit), []string{"identifier"}, pos) } return lit, nil } // parseIdentList parses a comma delimited list of identifiers. func (p *Parser) parseIdentList() ([]string, error) { // Parse first (required) identifier. ident, err := p.parseIdent() if err != nil { return nil, err } idents := []string{ident} // Parse remaining (optional) identifiers. for { if tok, _, _ := p.scanIgnoreWhitespace(); tok != COMMA { p.unscan() return idents, nil } if ident, err = p.parseIdent(); err != nil { return nil, err } idents = append(idents, ident) } } // parseSegmentedIdents parses a segmented identifiers. // e.g., "db"."rp".measurement or "db"..measurement func (p *Parser) parseSegmentedIdents() ([]string, error) { ident, err := p.parseIdent() if err != nil { return nil, err } idents := []string{ident} // Parse remaining (optional) identifiers. for { if tok, _, _ := p.scan(); tok != DOT { // No more segments so we're done. p.unscan() break } if ch := p.peekRune(); ch == '/' { // Next segment is a regex so we're done. break } else if ch == '.' { // Add an empty identifier. idents = append(idents, "") continue } // Parse the next identifier. if ident, err = p.parseIdent(); err != nil { return nil, err } idents = append(idents, ident) } if len(idents) > 3 { msg := fmt.Sprintf("too many segments in %s", QuoteIdent(idents...)) return nil, &ParseError{Message: msg} } return idents, nil } // parserString parses a string. func (p *Parser) parseString() (string, error) { tok, pos, lit := p.scanIgnoreWhitespace() if tok != STRING { return "", newParseError(tokstr(tok, lit), []string{"string"}, pos) } return lit, nil } // peekRune returns the next rune that would be read by the scanner. func (p *Parser) peekRune() rune { r, _, _ := p.s.s.r.ReadRune() if r != eof { _ = p.s.s.r.UnreadRune() } return r } // parseOptionalTokenAndInt parses the specified token followed // by an int, if it exists. func (p *Parser) parseOptionalTokenAndInt(t Token) (int, error) { // Check if the token exists. if tok, _, _ := p.scanIgnoreWhitespace(); tok != t { p.unscan() return 0, nil } // Scan the number. tok, pos, lit := p.scanIgnoreWhitespace() if tok != NUMBER { return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) } // Return an error if the number has a fractional part. if strings.Contains(lit, ".") { msg := fmt.Sprintf("fractional parts not allowed in %s", t.String()) return 0, &ParseError{Message: msg, Pos: pos} } // Parse number. n, _ := strconv.ParseInt(lit, 10, 64) if n < 0 { msg := fmt.Sprintf("%s must be >= 0", t.String()) return 0, &ParseError{Message: msg, Pos: pos} } return int(n), nil } // parseVarRef parses a reference to a measurement or field. func (p *Parser) parseVarRef() (*VarRef, error) { // Parse the segments of the variable ref. segments, err := p.parseSegmentedIdents() if err != nil { return nil, err } vr := &VarRef{Val: strings.Join(segments, ".")} return vr, nil } func rewriteIsOp(expr Expr) (Token, error) { affirmative := true if unary, ok := expr.(*UnaryExpr); ok { if unary.Op == NOT { affirmative = false expr = unary.Expr } else { return IS, fmt.Errorf("bad literal %s following IS", expr.String()) } } switch e := expr.(type) { case *NullLiteral: if affirmative { return IS_NULL, nil } return IS_NOT_NULL, nil case *UnknownLiteral: if affirmative { return IS_NULL, nil } return IS_NOT_NULL, nil case *BooleanLiteral: if affirmative == e.Val { return IS_TRUE, nil } return IS_FALSE, nil } return IS, fmt.Errorf("bad literal %s following IS (NOT)", expr.String()) } func rewriteIsExpr(expr Expr) (Expr, error) { e, ok := expr.(*BinaryExpr) if !ok { return expr, nil } if e.Op == IS { op, err := rewriteIsOp(e.RHS) if err != nil { return nil, err } expr, err := rewriteIsExpr(e.LHS) if err != nil { return nil, err } return &UnaryExpr{Op: op, Expr: expr}, nil } var err error e.LHS, err = rewriteIsExpr(e.LHS) if err != nil { return nil, err } e.RHS, err = rewriteIsExpr(e.RHS) if err != nil { return nil, err } return expr, nil } // ParseExpr parses an expression. // binOpPrcdncLb: binary operator precedence lower bound. // Any binary operator with a lower precedence than that will cause ParseExpr to stop. // This is used for parsing binary operators following a unary operator. func (p *Parser) ParseExpr(binOpPrcdncLb int) (Expr, error) { var err error // Dummy root node. root := &BinaryExpr{} // Parse a non-binary expression type to start. // This variable will always be the root of the expression tree. root.RHS, err = p.parseUnaryExpr(false) if err != nil { return nil, err } // Loop over operations and unary exprs and build a tree based on precendence. for { // If the next token is NOT an operator then return the expression. op, pos, lit := p.scanIgnoreWhitespace() if op == NOT { op, pos, lit = p.scanIgnoreWhitespace() if op == IN { op = NOT_IN } else { return nil, newParseError(tokstr(op, lit), []string{"IN"}, pos) } } if !op.isBinaryOperator() || op.Precedence() < binOpPrcdncLb { p.unscan() return rewriteIsExpr(root.RHS) } // Otherwise parse the next expression. var rhs Expr if rhs, err = p.parseUnaryExpr(op == IN || op == NOT_IN); err != nil { return nil, err } // Find the right spot in the tree to add the new expression by // descending the RHS of the expression tree until we reach the last // BinaryExpr or a BinaryExpr whose RHS has an operator with // precedence >= the operator being added. for node := root; ; { r, ok := node.RHS.(*BinaryExpr) if !ok || r.Op.Precedence() >= op.Precedence() { // Add the new expression here and break. node.RHS = &BinaryExpr{LHS: node.RHS, RHS: rhs, Op: op} break } node = r } } } // parseUnaryExpr parses an non-binary expression. // TODO: shz@ revisit inclusion parameter when open sourcing func (p *Parser) parseUnaryExpr(inclusion bool) (Expr, error) { // If the first token is a LPAREN then parse it as its own grouped expression. if tok, _, _ := p.scanIgnoreWhitespace(); tok == LPAREN { expr, err := p.ParseExpr(0) if err != nil { return nil, err } tok, pos, lit := p.scanIgnoreWhitespace() if tok == RPAREN { // Expect an RPAREN at the end. if inclusion { return &Call{Args: []Expr{expr}}, nil } return &ParenExpr{Expr: expr}, nil } else if tok == COMMA { // Parse a tuple as a function call with empty name. var args []Expr args = append(args, expr) for { // Parse an expression argument. arg, err := p.ParseExpr(0) if err != nil { return nil, err } args = append(args, arg) // If there's not a comma next then stop parsing arguments. if tok, _, _ := p.scan(); tok != COMMA { p.unscan() break } } // There should be a right parentheses at the end. if tok, pos, lit := p.scan(); tok != RPAREN { return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) } return &Call{Args: args}, nil } else { return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) } } p.unscan() // Read next token. tok, pos, lit := p.scanIgnoreWhitespace() if tok.isUnaryOperator() { expr, err := p.ParseExpr(tok.Precedence()) if err != nil { return nil, err } return &UnaryExpr{Op: tok, Expr: expr}, nil } switch tok { case CASE: return p.parseCase() case IDENT: // If the next immediate token is a left parentheses, parse as function call. // Otherwise parse as a variable reference. if tok0, _, _ := p.scan(); tok0 == LPAREN { return p.parseCall(lit) } p.unscan() // unscan the last token (wasn't an LPAREN) p.unscan() // unscan the IDENT token // Parse it as a VarRef. return p.parseVarRef() case DISTINCT: // If the next immediate token is a left parentheses, parse as function call. // Otherwise parse as a Distinct expression. tok0, pos, lit := p.scan() if tok0 == LPAREN { return p.parseCall("distinct") } else if tok0 == WS { tok1, pos, lit := p.scanIgnoreWhitespace() if tok1 != IDENT { return nil, newParseError(tokstr(tok1, lit), []string{"identifier"}, pos) } return &Distinct{Val: lit}, nil } return nil, newParseError(tokstr(tok0, lit), []string{"(", "identifier"}, pos) case STRING: return &StringLiteral{Val: lit}, nil case NUMBER: v, _ := strconv.ParseFloat(lit, 64) e := &NumberLiteral{Val: v, Expr: lit} var err error e.Int, err = strconv.Atoi(e.Expr) if err != nil { e.ExprType = Float e.Int = int(v) } else if e.Int >= 0 { e.ExprType = Unsigned } else { e.ExprType = Signed } return e, nil case NULL: return &NullLiteral{}, nil case UNKNOWN: return &UnknownLiteral{}, nil case TRUE, FALSE: return &BooleanLiteral{Val: (tok == TRUE)}, nil case MUL: return &Wildcard{}, nil default: return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos) } } // Assumes CASE token has been scanned. func (p *Parser) parseCase() (*Case, error) { var kase Case var err error tok, pos, lit := p.scanIgnoreWhitespace() for tok == WHEN { var cond WhenThen cond.When, err = p.ParseExpr(0) if err != nil { return nil, err } tok, pos, lit = p.scanIgnoreWhitespace() if tok != THEN { return nil, newParseError(tokstr(tok, lit), []string{"THEN"}, pos) } cond.Then, err = p.ParseExpr(0) if err != nil { return nil, err } kase.WhenThens = append(kase.WhenThens, cond) tok, pos, lit = p.scanIgnoreWhitespace() } if len(kase.WhenThens) == 0 { return nil, newParseError(tokstr(tok, lit), []string{"WHEN"}, pos) } if tok == ELSE { kase.Else, err = p.ParseExpr(0) if err != nil { return nil, err } tok, pos, lit = p.scanIgnoreWhitespace() } if tok != END { return nil, newParseError(tokstr(tok, lit), []string{"END"}, pos) } return &kase, nil } // parseCall parses a function call. // This function assumes the function name and LPAREN have been consumed. func (p *Parser) parseCall(name string) (*Call, error) { name = strings.ToLower(name) // If there's a right paren then just return immediately. if tok, _, _ := p.scan(); tok == RPAREN { return &Call{Name: name}, nil } p.unscan() // Otherwise parse function call arguments. var args []Expr for { // Parse an expression argument. arg, err := p.ParseExpr(0) if err != nil { return nil, err } args = append(args, arg) // If there's not a comma next then stop parsing arguments. if tok, _, _ := p.scan(); tok != COMMA { p.unscan() break } } // There should be a right parentheses at the end. if tok, pos, lit := p.scan(); tok != RPAREN { return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) } return &Call{Name: name, Args: args}, nil } // scan returns the next token from the underlying scanner. func (p *Parser) scan() (tok Token, pos Pos, lit string) { return p.s.Scan() } // scanIgnoreWhitespace scans the next non-whitespace token. func (p *Parser) scanIgnoreWhitespace() (tok Token, pos Pos, lit string) { tok, pos, lit = p.scan() if tok == WS { tok, pos, lit = p.scan() } return } // consumeWhitespace scans the next token if it's whitespace. func (p *Parser) consumeWhitespace() { if tok, _, _ := p.scan(); tok != WS { p.unscan() } } // unscan pushes the previously read token back onto the buffer. func (p *Parser) unscan() { p.s.Unscan() } // QuoteString returns a quoted string. func QuoteString(s string) string { return `'` + strings.NewReplacer("\n", `\n`, `\`, `\\`, `'`, `\'`).Replace(s) + `'` } // QuoteIdent returns a quoted identifier from multiple bare identifiers. func QuoteIdent(segments ...string) string { r := strings.NewReplacer("\n", `\n`, `\`, `\\`, `"`, `\"`) var buf bytes.Buffer for i, segment := range segments { needQuote := IdentNeedsQuotes(segment) || ((i < len(segments)-1) && segment != "") // not last segment && not "" if needQuote { _ = buf.WriteByte('"') } _, _ = buf.WriteString(r.Replace(segment)) if needQuote { _ = buf.WriteByte('"') } if i < len(segments)-1 { _ = buf.WriteByte('.') } } return buf.String() } // IdentNeedsQuotes returns true if the ident string given would require quotes. func IdentNeedsQuotes(ident string) bool { // check if this identifier is a keyword tok := Lookup(ident) if tok != IDENT { return true } for i, r := range ident { if i == 0 && !isIdentFirstChar(r) { return true } else if i > 0 && !isIdentChar(r) { return true } } return false } // split splits a string into a slice of runes. func split(s string) (a []rune) { for _, ch := range s { a = append(a, ch) } return } // ParseError represents an error that occurred during parsing. type ParseError struct { Message string Found string Expected []string Pos Pos } // newParseError returns a new instance of ParseError. func newParseError(found string, expected []string, pos Pos) *ParseError { return &ParseError{Found: found, Expected: expected, Pos: pos} } // Error returns the string representation of the error. func (e *ParseError) Error() string { if e.Message != "" { return fmt.Sprintf("%s at line %d, char %d", e.Message, e.Pos.Line+1, e.Pos.Char+1) } return fmt.Sprintf("found %s, expected %s at line %d, char %d", e.Found, strings.Join(e.Expected, ", "), e.Pos.Line+1, e.Pos.Char+1) }