query/sql/SqlBase.g4 (519 lines of code) (raw):

/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ grammar SqlBase; tokens { DELIMITER } statement : query #statementDefault ; query : with? queryNoWith ; with : WITH RECURSIVE? namedQuery (',' namedQuery)* ; queryNoWith: queryTerm (ORDER BY sortItem (',' sortItem)*)? (LIMIT limit=(INTEGER_VALUE | ALL))? ; queryTerm : queryPrimary #queryTermDefault | left=queryTerm operator=INTERSECT setQuantifier? right=queryTerm #setOperation | left=queryTerm operator=(UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation ; queryPrimary : querySpecification #queryPrimaryDefault | TABLE qualifiedName #table | VALUES expression (',' expression)* #inlineTable | '(' queryNoWith ')' #subquery ; sortItem : expression ordering=(ASC | DESC)? (NULLS nullOrdering=(FIRST | LAST))? ; querySpecification : SELECT setQuantifier? selectItem (',' selectItem)* (FROM relation (',' relation)*)? (WHERE where=booleanExpression)? (GROUP BY groupBy)? (HAVING having=booleanExpression)? ; groupBy : setQuantifier? groupingElement (',' groupingElement)* ; groupingElement : groupingExpressions #singleGroupingSet ; groupingExpressions : '(' (expression (',' expression)*)? ')' | expression ; namedQuery : name=identifier (columnAliases)? AS '(' query ')' ; setQuantifier : DISTINCT | ALL ; selectItem : expression (AS? identifier)? #selectSingle | qualifiedName '.' ASTERISK #selectAll | ASTERISK #selectAll ; relation : left=relation ( CROSS JOIN right=sampledRelation | joinType JOIN rightRelation=relation joinCriteria | NATURAL joinType JOIN right=sampledRelation ) #joinRelation | sampledRelation #relationDefault ; joinType : INNER? | LEFT OUTER? | RIGHT OUTER? | FULL OUTER? ; joinCriteria : ON booleanExpression | USING '(' identifier (',' identifier)* ')' ; sampledRelation : aliasedRelation ( TABLESAMPLE sampleType '(' percentage=expression ')' )? ; sampleType : BERNOULLI | SYSTEM ; aliasedRelation : relationPrimary (AS? identifier columnAliases?)? ; columnAliases : '(' identifier (',' identifier)* ')' ; relationPrimary : qualifiedName #tableName | '(' query ')' #subqueryRelation | '(' relation ')' #parenthesizedRelation ; expression : booleanExpression ; booleanExpression : predicated #booleanDefault | NOT booleanExpression #logicalNot | left=booleanExpression operator=AND right=booleanExpression #logicalBinary | left=booleanExpression operator=OR right=booleanExpression #logicalBinary ; // workaround for: // https://github.com/antlr/antlr4/issues/780 // https://github.com/antlr/antlr4/issues/781 predicated : valueExpression predicate[$valueExpression.ctx]? ; // fix golang compiler error: no package name predicate[antlr.ParserRuleContext value] : comparisonOperator right=valueExpression #comparison | comparisonOperator comparisonQuantifier '(' query ')' #quantifiedComparison | NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between | NOT? IN '(' expression (',' expression)* ')' #inList | NOT? IN '(' query ')' #inSubquery ; valueExpression : primaryExpression #valueExpressionDefault | valueExpression AT timeZoneSpecifier #atTimeZone | operator=(MINUS | PLUS) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary | left=valueExpression CONCAT right=valueExpression #concatenation ; primaryExpression : NULL #nullLiteral | interval #intervalLiteral | identifier sql_string #typeConstructor | DOUBLE_PRECISION sql_string #typeConstructor | number #numericLiteral | booleanValue #booleanLiteral | sql_string #stringLiteral | BINARY_LITERAL #binaryLiteral | '(' expression (',' expression)+ ')' #rowConstructor | ROW '(' expression (',' expression)* ')' #rowConstructor | qualifiedName '(' ASTERISK ')' filter? #functionCall | qualifiedName '(' (setQuantifier? expression (',' expression)*)? (ORDER BY sortItem (',' sortItem)*)? ')' filter? #functionCall | '(' query ')' #subqueryExpression // This is an extension to ANSI SQL, which considers EXISTS to be a <boolean expression> | ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor | value=primaryExpression '[' index=valueExpression ']' #subscript | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference | name=CURRENT_DATE #specialDateTimeFunction | name=CURRENT_TIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction | name=CURRENT_TIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction | name=LOCALTIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction | name=LOCALTIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction | name=CURRENT_USER #currentUser | '(' expression ')' #parenthesizedExpression | GROUPING '(' (qualifiedName (',' qualifiedName)*)? ')' #groupingOperation ; sql_string : STRING #basicStringLiteral | UNICODE_STRING (UESCAPE STRING)? #unicodeStringLiteral ; timeZoneSpecifier : TIME ZONE interval #timeZoneInterval | TIME ZONE sql_string #timeZoneString ; comparisonOperator : EQ | NEQ | LT | LTE | GT | GTE ; comparisonQuantifier : ALL | SOME | ANY ; booleanValue : TRUE | FALSE ; interval : INTERVAL sign=(PLUS | MINUS)? sql_string from=intervalField (TO to=intervalField)? ; intervalField : YEAR | MONTH | DAY | HOUR | MINUTE | SECOND ; normalForm : NFD | NFC | NFKD | NFKC ; // fix golang fmt error sqltype : sqltype ARRAY | ARRAY '<' sqltype '>' | MAP '<' sqltype ',' sqltype '>' | ROW '(' identifier sqltype (',' identifier sqltype)* ')' | baseType ('(' typeParameter (',' typeParameter)* ')')? | INTERVAL from=intervalField TO to=intervalField ; typeParameter : INTEGER_VALUE | sqltype ; baseType : TIME_WITH_TIME_ZONE | TIMESTAMP_WITH_TIME_ZONE | DOUBLE_PRECISION | identifier ; whenClause : WHEN condition=expression THEN result=expression ; filter : FILTER '(' WHERE booleanExpression ')' ; qualifiedName : identifier ('.' identifier)* ; identifier : IDENTIFIER #unquotedIdentifier | QUOTED_IDENTIFIER #quotedIdentifier | nonReserved #unquotedIdentifier | BACKQUOTED_IDENTIFIER #backQuotedIdentifier | DIGIT_IDENTIFIER #digitIdentifier ; number : DECIMAL_VALUE #decimalLiteral | DOUBLE_VALUE #doubleLiteral | INTEGER_VALUE #integerLiteral ; nonReserved // IMPORTANT: this rule must only contain tokens. Nested rules are not supported. See SqlParser.exitNonReserved : ADD | ALL | ANALYZE | ANY | ARRAY | ASC | AT | BERNOULLI | CALL | CASCADE | CATALOGS | COALESCE | COLUMN | COLUMNS | COMMENT | COMMITTED | CURRENT | DATA | DATE | DAY | DESC | DISTRIBUTED | EXCLUDING | EXPLAIN | FILTER | FIRST | FOLLOWING | FORMAT | FUNCTIONS | HOUR | IF | INCLUDING | INPUT | INTEGER | INTERVAL | LAST | LATERAL | LEVEL | LIMIT | LOGICAL | MAP | MINUTE | MONTH | NFC | NFD | NFKC | NFKD | NO | NULLIF | NULLS | ONLY | OPTION | ORDINALITY | OUTPUT | OVER | PARTITION | PARTITIONS | POSITION | PRECEDING | PRIVILEGES | PROPERTIES | PUBLIC | RANGE | READ | RENAME | REPEATABLE | REPLACE | RESET | RESTRICT | REVOKE | ROLLBACK | ROW | ROWS | SCHEMA | SCHEMAS | SECOND | SERIALIZABLE | SESSION | SET | SETS | SHOW | SMALLINT | SOME | START | STATS | SUBSTRING | SYSTEM | TABLES | TABLESAMPLE | TEXT | TIME | TIMESTAMP | TINYINT | TO | TRY_CAST | TYPE | UNBOUNDED | UNCOMMITTED | USE | VALIDATE | VERBOSE | VIEW | WORK | WRITE | YEAR | ZONE ; ADD: 'ADD'; ALL: 'ALL'; ALTER: 'ALTER'; ANALYZE: 'ANALYZE'; AND: 'AND'; ANY: 'ANY'; ARRAY: 'ARRAY'; AS: 'AS'; ASC: 'ASC'; AT: 'AT'; BERNOULLI: 'BERNOULLI'; BETWEEN: 'BETWEEN'; BY: 'BY'; CALL: 'CALL'; CASCADE: 'CASCADE'; CASE: 'CASE'; CAST: 'CAST'; CATALOGS: 'CATALOGS'; COALESCE: 'COALESCE'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; COMMENT: 'COMMENT'; COMMITTED: 'COMMITTED'; CONSTRAINT: 'CONSTRAINT'; CREATE: 'CREATE'; CROSS: 'CROSS'; CURRENT: 'CURRENT'; CURRENT_DATE: 'CURRENT_DATE'; CURRENT_TIME: 'CURRENT_TIME'; CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; CURRENT_USER: 'CURRENT_USER'; DATA: 'DATA'; DATE: 'DATE'; DAY: 'DAY'; DEALLOCATE: 'DEALLOCATE'; DELETE: 'DELETE'; DESC: 'DESC'; DESCRIBE: 'DESCRIBE'; DISTINCT: 'DISTINCT'; DISTRIBUTED: 'DISTRIBUTED'; DROP: 'DROP'; ELSE: 'ELSE'; END: 'END'; ESCAPE: 'ESCAPE'; EXCEPT: 'EXCEPT'; EXCLUDING: 'EXCLUDING'; EXECUTE: 'EXECUTE'; EXISTS: 'EXISTS'; EXPLAIN: 'EXPLAIN'; EXTRACT: 'EXTRACT'; FALSE: 'FALSE'; FILTER: 'FILTER'; FIRST: 'FIRST'; FOLLOWING: 'FOLLOWING'; FOR: 'FOR'; FORMAT: 'FORMAT'; FROM: 'FROM'; FULL: 'FULL'; FUNCTIONS: 'FUNCTIONS'; GRANT: 'GRANT'; GRANTS: 'GRANTS'; GRAPHVIZ: 'GRAPHVIZ'; GROUP: 'GROUP'; GROUPING: 'GROUPING'; HAVING: 'HAVING'; HOUR: 'HOUR'; IF: 'IF'; IN: 'IN'; INCLUDING: 'INCLUDING'; INNER: 'INNER'; INPUT: 'INPUT'; INSERT: 'INSERT'; INTEGER: 'INTEGER'; INTERSECT: 'INTERSECT'; INTERVAL: 'INTERVAL'; INTO: 'INTO'; IS: 'IS'; JOIN: 'JOIN'; LAST: 'LAST'; LATERAL: 'LATERAL'; LEFT: 'LEFT'; LEVEL: 'LEVEL'; LIKE: 'LIKE'; LIMIT: 'LIMIT'; LOCALTIME: 'LOCALTIME'; LOCALTIMESTAMP: 'LOCALTIMESTAMP'; LOGICAL: 'LOGICAL'; MAP: 'MAP'; MINUTE: 'MINUTE'; MONTH: 'MONTH'; NATURAL: 'NATURAL'; NFC : 'NFC'; NFD : 'NFD'; NFKC : 'NFKC'; NFKD : 'NFKD'; NO: 'NO'; NORMALIZE: 'NORMALIZE'; NOT: 'NOT'; NULL: 'NULL'; NULLIF: 'NULLIF'; NULLS: 'NULLS'; ON: 'ON'; ONLY: 'ONLY'; OPTION: 'OPTION'; OR: 'OR'; ORDER: 'ORDER'; ORDINALITY: 'ORDINALITY'; OUTER: 'OUTER'; OUTPUT: 'OUTPUT'; OVER: 'OVER'; PARTITION: 'PARTITION'; PARTITIONS: 'PARTITIONS'; POSITION: 'POSITION'; PRECEDING: 'PRECEDING'; PREPARE: 'PREPARE'; PRIVILEGES: 'PRIVILEGES'; PROPERTIES: 'PROPERTIES'; PUBLIC: 'PUBLIC'; RANGE: 'RANGE'; READ: 'READ'; RECURSIVE: 'RECURSIVE'; RENAME: 'RENAME'; REPEATABLE: 'REPEATABLE'; REPLACE: 'REPLACE'; RESET: 'RESET'; RESTRICT: 'RESTRICT'; REVOKE: 'REVOKE'; RIGHT: 'RIGHT'; ROLLBACK: 'ROLLBACK'; ROLLUP: 'ROLLUP'; ROW: 'ROW'; ROWS: 'ROWS'; SCHEMA: 'SCHEMA'; SCHEMAS: 'SCHEMAS'; SECOND: 'SECOND'; SELECT: 'SELECT'; SERIALIZABLE: 'SERIALIZABLE'; SESSION: 'SESSION'; SET: 'SET'; SETS: 'SETS'; SHOW: 'SHOW'; SMALLINT: 'SMALLINT'; SOME: 'SOME'; START: 'START'; STATS: 'STATS'; SUBSTRING: 'SUBSTRING'; SYSTEM: 'SYSTEM'; TABLE: 'TABLE'; TABLES: 'TABLES'; TABLESAMPLE: 'TABLESAMPLE'; TEXT: 'TEXT'; THEN: 'THEN'; TIME: 'TIME'; TIMESTAMP: 'TIMESTAMP'; TINYINT: 'TINYINT'; TO: 'TO'; TRUE: 'TRUE'; TRY_CAST: 'TRY_CAST'; TYPE: 'TYPE'; UESCAPE: 'UESCAPE'; UNBOUNDED: 'UNBOUNDED'; UNCOMMITTED: 'UNCOMMITTED'; UNION: 'UNION'; UNNEST: 'UNNEST'; USE: 'USE'; USING: 'USING'; VALIDATE: 'VALIDATE'; VALUES: 'VALUES'; VERBOSE: 'VERBOSE'; VIEW: 'VIEW'; WHEN: 'WHEN'; WHERE: 'WHERE'; WITH: 'WITH'; WORK: 'WORK'; WRITE: 'WRITE'; YEAR: 'YEAR'; ZONE: 'ZONE'; EQ : '='; NEQ : '<>' | '!='; LT : '<'; LTE : '<='; GT : '>'; GTE : '>='; PLUS: '+'; MINUS: '-'; ASTERISK: '*'; SLASH: '/'; PERCENT: '%'; CONCAT: '||'; STRING : '\'' ( ~'\'' | '\'\'' )* '\'' ; UNICODE_STRING : 'U&\'' ( ~'\'' | '\'\'' )* '\'' ; // Note: we allow any character inside the binary literal and validate // its a correct literal when the AST is being constructed. This // allows us to provide more meaningful error messages to the user BINARY_LITERAL : 'X\'' (~'\'')* '\'' ; INTEGER_VALUE : DIGIT+ ; DECIMAL_VALUE : DIGIT+ '.' DIGIT* | '.' DIGIT+ ; DOUBLE_VALUE : DIGIT+ ('.' DIGIT*)? EXPONENT | '.' DIGIT+ EXPONENT ; IDENTIFIER : (LETTER | '_') (LETTER | DIGIT | '_' | '@' | ':')* ; DIGIT_IDENTIFIER : DIGIT (LETTER | DIGIT | '_' | '@' | ':')+ ; QUOTED_IDENTIFIER : '"' ( ~'"' | '""' )* '"' ; BACKQUOTED_IDENTIFIER : '`' ( ~'`' | '``' )* '`' ; TIME_WITH_TIME_ZONE : 'TIME' WS 'WITH' WS 'TIME' WS 'ZONE' ; TIMESTAMP_WITH_TIME_ZONE : 'TIMESTAMP' WS 'WITH' WS 'TIME' WS 'ZONE' ; DOUBLE_PRECISION : 'DOUBLE' WS 'PRECISION' ; fragment EXPONENT : 'E' [+-]? DIGIT+ ; fragment DIGIT : [0-9] ; fragment LETTER : [A-Z] ; SIMPLE_COMMENT : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) ; BRACKETED_COMMENT : '/*' .*? '*/' -> channel(HIDDEN) ; WS : [ \r\n\t]+ -> channel(HIDDEN) ; // Catch-all for anything we can't recognize. // We use this to be able to ignore and recover all the text // when splitting statements with DelimiterLexer UNRECOGNIZED : . ;