query/sql/SqlBase.g4 (519 lines of code) (raw):
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
grammar SqlBase;
tokens {
DELIMITER
}
statement
: query #statementDefault
;
query
: with? queryNoWith
;
with
: WITH RECURSIVE? namedQuery (',' namedQuery)*
;
queryNoWith:
queryTerm
(ORDER BY sortItem (',' sortItem)*)?
(LIMIT limit=(INTEGER_VALUE | ALL))?
;
queryTerm
: queryPrimary #queryTermDefault
| left=queryTerm operator=INTERSECT setQuantifier? right=queryTerm #setOperation
| left=queryTerm operator=(UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation
;
queryPrimary
: querySpecification #queryPrimaryDefault
| TABLE qualifiedName #table
| VALUES expression (',' expression)* #inlineTable
| '(' queryNoWith ')' #subquery
;
sortItem
: expression ordering=(ASC | DESC)? (NULLS nullOrdering=(FIRST | LAST))?
;
querySpecification
: SELECT setQuantifier? selectItem (',' selectItem)*
(FROM relation (',' relation)*)?
(WHERE where=booleanExpression)?
(GROUP BY groupBy)?
(HAVING having=booleanExpression)?
;
groupBy
: setQuantifier? groupingElement (',' groupingElement)*
;
groupingElement
: groupingExpressions #singleGroupingSet
;
groupingExpressions
: '(' (expression (',' expression)*)? ')'
| expression
;
namedQuery
: name=identifier (columnAliases)? AS '(' query ')'
;
setQuantifier
: DISTINCT
| ALL
;
selectItem
: expression (AS? identifier)? #selectSingle
| qualifiedName '.' ASTERISK #selectAll
| ASTERISK #selectAll
;
relation
: left=relation
( CROSS JOIN right=sampledRelation
| joinType JOIN rightRelation=relation joinCriteria
| NATURAL joinType JOIN right=sampledRelation
) #joinRelation
| sampledRelation #relationDefault
;
joinType
: INNER?
| LEFT OUTER?
| RIGHT OUTER?
| FULL OUTER?
;
joinCriteria
: ON booleanExpression
| USING '(' identifier (',' identifier)* ')'
;
sampledRelation
: aliasedRelation (
TABLESAMPLE sampleType '(' percentage=expression ')'
)?
;
sampleType
: BERNOULLI
| SYSTEM
;
aliasedRelation
: relationPrimary (AS? identifier columnAliases?)?
;
columnAliases
: '(' identifier (',' identifier)* ')'
;
relationPrimary
: qualifiedName #tableName
| '(' query ')' #subqueryRelation
| '(' relation ')' #parenthesizedRelation
;
expression
: booleanExpression
;
booleanExpression
: predicated #booleanDefault
| NOT booleanExpression #logicalNot
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
;
// workaround for:
// https://github.com/antlr/antlr4/issues/780
// https://github.com/antlr/antlr4/issues/781
predicated
: valueExpression predicate[$valueExpression.ctx]?
;
// fix golang compiler error: no package name
predicate[antlr.ParserRuleContext value]
: comparisonOperator right=valueExpression #comparison
| comparisonOperator comparisonQuantifier '(' query ')' #quantifiedComparison
| NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between
| NOT? IN '(' expression (',' expression)* ')' #inList
| NOT? IN '(' query ')' #inSubquery
;
valueExpression
: primaryExpression #valueExpressionDefault
| valueExpression AT timeZoneSpecifier #atTimeZone
| operator=(MINUS | PLUS) valueExpression #arithmeticUnary
| left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary
| left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
| left=valueExpression CONCAT right=valueExpression #concatenation
;
primaryExpression
: NULL #nullLiteral
| interval #intervalLiteral
| identifier sql_string #typeConstructor
| DOUBLE_PRECISION sql_string #typeConstructor
| number #numericLiteral
| booleanValue #booleanLiteral
| sql_string #stringLiteral
| BINARY_LITERAL #binaryLiteral
| '(' expression (',' expression)+ ')' #rowConstructor
| ROW '(' expression (',' expression)* ')' #rowConstructor
| qualifiedName '(' ASTERISK ')' filter? #functionCall
| qualifiedName '(' (setQuantifier? expression (',' expression)*)?
(ORDER BY sortItem (',' sortItem)*)? ')' filter? #functionCall
| '(' query ')' #subqueryExpression
// This is an extension to ANSI SQL, which considers EXISTS to be a <boolean expression>
| ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor
| value=primaryExpression '[' index=valueExpression ']' #subscript
| identifier #columnReference
| base=primaryExpression '.' fieldName=identifier #dereference
| name=CURRENT_DATE #specialDateTimeFunction
| name=CURRENT_TIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=CURRENT_TIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=LOCALTIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=LOCALTIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=CURRENT_USER #currentUser
| '(' expression ')' #parenthesizedExpression
| GROUPING '(' (qualifiedName (',' qualifiedName)*)? ')' #groupingOperation
;
sql_string
: STRING #basicStringLiteral
| UNICODE_STRING (UESCAPE STRING)? #unicodeStringLiteral
;
timeZoneSpecifier
: TIME ZONE interval #timeZoneInterval
| TIME ZONE sql_string #timeZoneString
;
comparisonOperator
: EQ | NEQ | LT | LTE | GT | GTE
;
comparisonQuantifier
: ALL | SOME | ANY
;
booleanValue
: TRUE | FALSE
;
interval
: INTERVAL sign=(PLUS | MINUS)? sql_string from=intervalField (TO to=intervalField)?
;
intervalField
: YEAR | MONTH | DAY | HOUR | MINUTE | SECOND
;
normalForm
: NFD | NFC | NFKD | NFKC
;
// fix golang fmt error
sqltype
: sqltype ARRAY
| ARRAY '<' sqltype '>'
| MAP '<' sqltype ',' sqltype '>'
| ROW '(' identifier sqltype (',' identifier sqltype)* ')'
| baseType ('(' typeParameter (',' typeParameter)* ')')?
| INTERVAL from=intervalField TO to=intervalField
;
typeParameter
: INTEGER_VALUE | sqltype
;
baseType
: TIME_WITH_TIME_ZONE
| TIMESTAMP_WITH_TIME_ZONE
| DOUBLE_PRECISION
| identifier
;
whenClause
: WHEN condition=expression THEN result=expression
;
filter
: FILTER '(' WHERE booleanExpression ')'
;
qualifiedName
: identifier ('.' identifier)*
;
identifier
: IDENTIFIER #unquotedIdentifier
| QUOTED_IDENTIFIER #quotedIdentifier
| nonReserved #unquotedIdentifier
| BACKQUOTED_IDENTIFIER #backQuotedIdentifier
| DIGIT_IDENTIFIER #digitIdentifier
;
number
: DECIMAL_VALUE #decimalLiteral
| DOUBLE_VALUE #doubleLiteral
| INTEGER_VALUE #integerLiteral
;
nonReserved
// IMPORTANT: this rule must only contain tokens. Nested rules are not supported. See SqlParser.exitNonReserved
: ADD | ALL | ANALYZE | ANY | ARRAY | ASC | AT
| BERNOULLI
| CALL | CASCADE | CATALOGS | COALESCE | COLUMN | COLUMNS | COMMENT | COMMITTED | CURRENT
| DATA | DATE | DAY | DESC | DISTRIBUTED
| EXCLUDING | EXPLAIN
| FILTER | FIRST | FOLLOWING | FORMAT | FUNCTIONS
| HOUR
| IF | INCLUDING | INPUT | INTEGER | INTERVAL
| LAST | LATERAL | LEVEL | LIMIT | LOGICAL
| MAP | MINUTE | MONTH
| NFC | NFD | NFKC | NFKD | NO | NULLIF | NULLS
| ONLY | OPTION | ORDINALITY | OUTPUT | OVER
| PARTITION | PARTITIONS | POSITION | PRECEDING | PRIVILEGES | PROPERTIES | PUBLIC
| RANGE | READ | RENAME | REPEATABLE | REPLACE | RESET | RESTRICT | REVOKE | ROLLBACK | ROW | ROWS
| SCHEMA | SCHEMAS | SECOND | SERIALIZABLE | SESSION | SET | SETS
| SHOW | SMALLINT | SOME | START | STATS | SUBSTRING | SYSTEM
| TABLES | TABLESAMPLE | TEXT | TIME | TIMESTAMP | TINYINT | TO | TRY_CAST | TYPE
| UNBOUNDED | UNCOMMITTED | USE
| VALIDATE | VERBOSE | VIEW
| WORK | WRITE
| YEAR
| ZONE
;
ADD: 'ADD';
ALL: 'ALL';
ALTER: 'ALTER';
ANALYZE: 'ANALYZE';
AND: 'AND';
ANY: 'ANY';
ARRAY: 'ARRAY';
AS: 'AS';
ASC: 'ASC';
AT: 'AT';
BERNOULLI: 'BERNOULLI';
BETWEEN: 'BETWEEN';
BY: 'BY';
CALL: 'CALL';
CASCADE: 'CASCADE';
CASE: 'CASE';
CAST: 'CAST';
CATALOGS: 'CATALOGS';
COALESCE: 'COALESCE';
COLUMN: 'COLUMN';
COLUMNS: 'COLUMNS';
COMMENT: 'COMMENT';
COMMITTED: 'COMMITTED';
CONSTRAINT: 'CONSTRAINT';
CREATE: 'CREATE';
CROSS: 'CROSS';
CURRENT: 'CURRENT';
CURRENT_DATE: 'CURRENT_DATE';
CURRENT_TIME: 'CURRENT_TIME';
CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
CURRENT_USER: 'CURRENT_USER';
DATA: 'DATA';
DATE: 'DATE';
DAY: 'DAY';
DEALLOCATE: 'DEALLOCATE';
DELETE: 'DELETE';
DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
DISTINCT: 'DISTINCT';
DISTRIBUTED: 'DISTRIBUTED';
DROP: 'DROP';
ELSE: 'ELSE';
END: 'END';
ESCAPE: 'ESCAPE';
EXCEPT: 'EXCEPT';
EXCLUDING: 'EXCLUDING';
EXECUTE: 'EXECUTE';
EXISTS: 'EXISTS';
EXPLAIN: 'EXPLAIN';
EXTRACT: 'EXTRACT';
FALSE: 'FALSE';
FILTER: 'FILTER';
FIRST: 'FIRST';
FOLLOWING: 'FOLLOWING';
FOR: 'FOR';
FORMAT: 'FORMAT';
FROM: 'FROM';
FULL: 'FULL';
FUNCTIONS: 'FUNCTIONS';
GRANT: 'GRANT';
GRANTS: 'GRANTS';
GRAPHVIZ: 'GRAPHVIZ';
GROUP: 'GROUP';
GROUPING: 'GROUPING';
HAVING: 'HAVING';
HOUR: 'HOUR';
IF: 'IF';
IN: 'IN';
INCLUDING: 'INCLUDING';
INNER: 'INNER';
INPUT: 'INPUT';
INSERT: 'INSERT';
INTEGER: 'INTEGER';
INTERSECT: 'INTERSECT';
INTERVAL: 'INTERVAL';
INTO: 'INTO';
IS: 'IS';
JOIN: 'JOIN';
LAST: 'LAST';
LATERAL: 'LATERAL';
LEFT: 'LEFT';
LEVEL: 'LEVEL';
LIKE: 'LIKE';
LIMIT: 'LIMIT';
LOCALTIME: 'LOCALTIME';
LOCALTIMESTAMP: 'LOCALTIMESTAMP';
LOGICAL: 'LOGICAL';
MAP: 'MAP';
MINUTE: 'MINUTE';
MONTH: 'MONTH';
NATURAL: 'NATURAL';
NFC : 'NFC';
NFD : 'NFD';
NFKC : 'NFKC';
NFKD : 'NFKD';
NO: 'NO';
NORMALIZE: 'NORMALIZE';
NOT: 'NOT';
NULL: 'NULL';
NULLIF: 'NULLIF';
NULLS: 'NULLS';
ON: 'ON';
ONLY: 'ONLY';
OPTION: 'OPTION';
OR: 'OR';
ORDER: 'ORDER';
ORDINALITY: 'ORDINALITY';
OUTER: 'OUTER';
OUTPUT: 'OUTPUT';
OVER: 'OVER';
PARTITION: 'PARTITION';
PARTITIONS: 'PARTITIONS';
POSITION: 'POSITION';
PRECEDING: 'PRECEDING';
PREPARE: 'PREPARE';
PRIVILEGES: 'PRIVILEGES';
PROPERTIES: 'PROPERTIES';
PUBLIC: 'PUBLIC';
RANGE: 'RANGE';
READ: 'READ';
RECURSIVE: 'RECURSIVE';
RENAME: 'RENAME';
REPEATABLE: 'REPEATABLE';
REPLACE: 'REPLACE';
RESET: 'RESET';
RESTRICT: 'RESTRICT';
REVOKE: 'REVOKE';
RIGHT: 'RIGHT';
ROLLBACK: 'ROLLBACK';
ROLLUP: 'ROLLUP';
ROW: 'ROW';
ROWS: 'ROWS';
SCHEMA: 'SCHEMA';
SCHEMAS: 'SCHEMAS';
SECOND: 'SECOND';
SELECT: 'SELECT';
SERIALIZABLE: 'SERIALIZABLE';
SESSION: 'SESSION';
SET: 'SET';
SETS: 'SETS';
SHOW: 'SHOW';
SMALLINT: 'SMALLINT';
SOME: 'SOME';
START: 'START';
STATS: 'STATS';
SUBSTRING: 'SUBSTRING';
SYSTEM: 'SYSTEM';
TABLE: 'TABLE';
TABLES: 'TABLES';
TABLESAMPLE: 'TABLESAMPLE';
TEXT: 'TEXT';
THEN: 'THEN';
TIME: 'TIME';
TIMESTAMP: 'TIMESTAMP';
TINYINT: 'TINYINT';
TO: 'TO';
TRUE: 'TRUE';
TRY_CAST: 'TRY_CAST';
TYPE: 'TYPE';
UESCAPE: 'UESCAPE';
UNBOUNDED: 'UNBOUNDED';
UNCOMMITTED: 'UNCOMMITTED';
UNION: 'UNION';
UNNEST: 'UNNEST';
USE: 'USE';
USING: 'USING';
VALIDATE: 'VALIDATE';
VALUES: 'VALUES';
VERBOSE: 'VERBOSE';
VIEW: 'VIEW';
WHEN: 'WHEN';
WHERE: 'WHERE';
WITH: 'WITH';
WORK: 'WORK';
WRITE: 'WRITE';
YEAR: 'YEAR';
ZONE: 'ZONE';
EQ : '=';
NEQ : '<>' | '!=';
LT : '<';
LTE : '<=';
GT : '>';
GTE : '>=';
PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/';
PERCENT: '%';
CONCAT: '||';
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
;
UNICODE_STRING
: 'U&\'' ( ~'\'' | '\'\'' )* '\''
;
// Note: we allow any character inside the binary literal and validate
// its a correct literal when the AST is being constructed. This
// allows us to provide more meaningful error messages to the user
BINARY_LITERAL
: 'X\'' (~'\'')* '\''
;
INTEGER_VALUE
: DIGIT+
;
DECIMAL_VALUE
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;
DOUBLE_VALUE
: DIGIT+ ('.' DIGIT*)? EXPONENT
| '.' DIGIT+ EXPONENT
;
IDENTIFIER
: (LETTER | '_') (LETTER | DIGIT | '_' | '@' | ':')*
;
DIGIT_IDENTIFIER
: DIGIT (LETTER | DIGIT | '_' | '@' | ':')+
;
QUOTED_IDENTIFIER
: '"' ( ~'"' | '""' )* '"'
;
BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;
TIME_WITH_TIME_ZONE
: 'TIME' WS 'WITH' WS 'TIME' WS 'ZONE'
;
TIMESTAMP_WITH_TIME_ZONE
: 'TIMESTAMP' WS 'WITH' WS 'TIME' WS 'ZONE'
;
DOUBLE_PRECISION
: 'DOUBLE' WS 'PRECISION'
;
fragment EXPONENT
: 'E' [+-]? DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment LETTER
: [A-Z]
;
SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;
BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
WS
: [ \r\n\t]+ -> channel(HIDDEN)
;
// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;