diff --git a/src/grammar/spark/SparkSql.g4 b/src/grammar/spark/SparkSql.g4 deleted file mode 100644 index 9397bdb..0000000 --- a/src/grammar/spark/SparkSql.g4 +++ /dev/null @@ -1,1839 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. - */ - -grammar SparkSql; - -@parser::members { -/** -* When false, INTERSECT is given the greater precedence over the other set -* operations (UNION, EXCEPT and MINUS) as per the SQL standard. -*/ -public legacy_setops_precedence_enbled = false; -/** -* When false, a literal with an exponent would be converted into -* double type rather than decimal type. -*/ -public legacy_exponent_literal_as_decimal_enabled = false; -/** -* When true, the behavior of keywords follows ANSI SQL standard. -*/ -public SQL_standard_keyword_behavior = false; -} - -@lexer::members { -/** -* Verify whether current token is a valid decimal token (which contains dot). -* Returns true if the character that follows the token is not a digit or letter or underscore. -* -* For example: -* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. -* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. -* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. -* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed -* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' -* which is not a digit or letter or underscore. -*/ -isValidDecimal() { - let nextChar = this.fromCodePoint(this._input.LA(1)); - return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_') -} - -/** -* This method will be called when we see '/*' and try to match it as a bracketed comment. -* If the next character is '+', it should be parsed as hint later, and we cannot match -* it as a bracketed comment. -* -* Returns true if the next character is '+'. -*/ -isHint() { - let nextChar = this.fromCodePoint(this._input.LA(1)); - return nextChar == '+' -} - -fromCodePoint(codePoint) { - return String.fromCodePoint(codePoint); -} -} - -program - : singleStatement EOF - ; - -singleStatement - : (statement SEMICOLON? | emptyStatement)* - ; - -emptyStatement - : SEMICOLON - ; - -singleExpression - : namedExpression EOF - ; - -singleTableIdentifier - : tableIdentifier EOF - ; - -singleMultipartIdentifier - : multipartIdentifier EOF - ; - -//singleFunctionIdentifier -// : functionIdentifier EOF -// ; - -singleDataType - : dataType EOF - ; - -singleTableSchema - : colTypeList EOF - ; - -statement - : query #statementDefault - | ctes? dmlStatementNoWith #dmlStatement - | USE NAMESPACE? multipartIdentifier #use - | CREATE namespace (IF NOT EXISTS)? multipartIdentifier - (commentSpec | - locationSpec | - (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))* #createNamespace - | ALTER namespace multipartIdentifier - SET (DBPROPERTIES | PROPERTIES) tablePropertyList #setNamespaceProperties - | ALTER namespace multipartIdentifier - SET locationSpec #setNamespaceLocation - | DROP namespace (IF EXISTS)? multipartIdentifier - (RESTRICT | CASCADE)? #dropNamespace - | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)? - (LIKE? pattern=STRING)? #showNamespaces - | createTableHeader ('(' colTypeList ')')? tableProvider - createTableClauses - (AS? query)? #createTable - | createTableHeader ('(' columns=colTypeList ')')? - (commentSpec | - (PARTITIONED BY '(' partitionColumns=colTypeList ')' | - PARTITIONED BY partitionColumnNames=identifierList) | - bucketSpec | - skewSpec | - rowFormat | - createFileFormat | - locationSpec | - (TBLPROPERTIES tableProps=tablePropertyList))* - (AS? query)? #createHiveTable - | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier - LIKE source=tableIdentifier - (tableProvider | - rowFormat | - createFileFormat | - locationSpec | - (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike - | replaceTableHeader ('(' colTypeList ')')? tableProvider - createTableClauses - (AS? query)? #replaceTable - | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS - (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze - | ALTER TABLE multipartIdentifier - ADD (COLUMN | COLUMNS) - columns=qualifiedColTypeWithPositionList #addTableColumns - | ALTER TABLE multipartIdentifier - ADD (COLUMN | COLUMNS) - '(' columns=qualifiedColTypeWithPositionList ')' #addTableColumns - | ALTER TABLE table=multipartIdentifier - RENAME COLUMN - from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn - | ALTER TABLE multipartIdentifier - DROP (COLUMN | COLUMNS) - '(' columns=multipartIdentifierList ')' #dropTableColumns - | ALTER TABLE multipartIdentifier - DROP (COLUMN | COLUMNS) columns=multipartIdentifierList #dropTableColumns - | ALTER (TABLE | VIEW) from=multipartIdentifier - RENAME TO to=multipartIdentifier #renameTable - | ALTER (TABLE | VIEW) multipartIdentifier - SET TBLPROPERTIES tablePropertyList #setTableProperties - | ALTER (TABLE | VIEW) multipartIdentifier - UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties - | ALTER TABLE table=multipartIdentifier - (ALTER | CHANGE) COLUMN? column=multipartIdentifier - alterColumnAction? #alterTableAlterColumn - | ALTER TABLE table=multipartIdentifier partitionSpec? - CHANGE COLUMN? - colName=multipartIdentifier colType colPosition? #hiveChangeColumn - | ALTER TABLE table=multipartIdentifier partitionSpec? - REPLACE COLUMNS - '(' columns=qualifiedColTypeWithPositionList ')' #hiveReplaceColumns - | ALTER TABLE multipartIdentifier (partitionSpec)? - SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe - | ALTER TABLE multipartIdentifier (partitionSpec)? - SET SERDEPROPERTIES tablePropertyList #setTableSerDe - | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)? - partitionSpecLocation+ #addTablePartition - | ALTER TABLE multipartIdentifier - from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition - | ALTER (TABLE | VIEW) multipartIdentifier - DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions - | ALTER TABLE multipartIdentifier - (partitionSpec)? SET locationSpec #setTableLocation - | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions - | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable - | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView - | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? - VIEW (IF NOT EXISTS)? multipartIdentifier - identifierCommentList? - (commentSpec | - (PARTITIONED ON identifierList) | - (TBLPROPERTIES tablePropertyList))* - AS query #createView - | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW - tableIdentifier ('(' colTypeList ')')? tableProvider - (OPTIONS tablePropertyList)? #createTempViewUsing - | ALTER VIEW multipartIdentifier AS? query #alterViewQuery - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? - multipartIdentifier AS className=STRING - (USING resource (',' resource)*)? #createFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction - | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? - statement #explain - | SHOW TABLES ((FROM | IN) multipartIdentifier)? - (LIKE? pattern=STRING)? #showTables - | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? - LIKE pattern=STRING partitionSpec? #showTable - | SHOW TBLPROPERTIES table=multipartIdentifier - ('(' key=tablePropertyKey ')')? #showTblProperties - | SHOW COLUMNS (FROM | IN) table=multipartIdentifier - ((FROM | IN) ns=multipartIdentifier)? #showColumns - | SHOW VIEWS ((FROM | IN) multipartIdentifier)? - (LIKE? pattern=STRING)? #showViews - | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions - | SHOW identifier? FUNCTIONS - (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions - | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable - | SHOW CURRENT NAMESPACE #showCurrentNamespace - | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction - | (DESC | DESCRIBE) namespace EXTENDED? - multipartIdentifier #describeNamespace - | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? - multipartIdentifier partitionSpec? describeColName? #describeRelation - | (DESC | DESCRIBE) QUERY? query #describeQuery - | COMMENT ON namespace multipartIdentifier IS - comment=(STRING | NULL) #commentNamespace - | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable - | REFRESH TABLE multipartIdentifier #refreshTable - | REFRESH FUNCTION multipartIdentifier #refreshFunction - | REFRESH (STRING | .*?) #refreshResource - | CACHE LAZY? TABLE multipartIdentifier - (OPTIONS tablePropertyList)? (AS? query)? #cacheTable - | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable - | CLEAR CACHE #clearCache - | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE - multipartIdentifier partitionSpec? #loadData - | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable - | MSCK REPAIR TABLE multipartIdentifier #repairTable - | op=(ADD | LIST) identifier (STRING | .*?) #manageResource - | SET ROLE .*? #failNativeCommand - | SET TIME ZONE interval #setTimeZone - | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone - | SET TIME ZONE .*? #setTimeZone - | SET configKey (EQ .*?)? #setQuotedConfiguration - | SET .*? #setConfiguration - | RESET configKey #resetQuotedConfiguration - | RESET .*? #resetConfiguration - | unsupportedHiveNativeCommands .*? #failNativeCommand - ; - -configKey - : quotedIdentifier - ; - -unsupportedHiveNativeCommands - : kw1=CREATE kw2=ROLE - | kw1=DROP kw2=ROLE - | kw1=GRANT kw2=ROLE? - | kw1=REVOKE kw2=ROLE? - | kw1=SHOW kw2=GRANT - | kw1=SHOW kw2=ROLE kw3=GRANT? - | kw1=SHOW kw2=PRINCIPALS - | kw1=SHOW kw2=ROLES - | kw1=SHOW kw2=CURRENT kw3=ROLES - | kw1=EXPORT kw2=TABLE - | kw1=IMPORT kw2=TABLE - | kw1=SHOW kw2=COMPACTIONS - | kw1=SHOW kw2=CREATE kw3=TABLE - | kw1=SHOW kw2=TRANSACTIONS - | kw1=SHOW kw2=INDEXES - | kw1=SHOW kw2=LOCKS - | kw1=CREATE kw2=INDEX - | kw1=DROP kw2=INDEX - | kw1=ALTER kw2=INDEX - | kw1=LOCK kw2=TABLE - | kw1=LOCK kw2=DATABASE - | kw1=UNLOCK kw2=TABLE - | kw1=UNLOCK kw2=DATABASE - | kw1=CREATE kw2=TEMPORARY kw3=MACRO - | kw1=DROP kw2=TEMPORARY kw3=MACRO - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES - | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS - | kw1=START kw2=TRANSACTION - | kw1=COMMIT - | kw1=ROLLBACK - | kw1=DFS - ; - -createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier - ; - -replaceTableHeader - : (CREATE OR)? REPLACE TABLE multipartIdentifier - ; - -bucketSpec - : CLUSTERED BY identifierList - (SORTED BY orderedIdentifierList)? - INTO INTEGER_VALUE BUCKETS - ; - -skewSpec - : SKEWED BY identifierList - ON (constantList | nestedConstantList) - (STORED AS DIRECTORIES)? - ; - -locationSpec - : LOCATION STRING - ; - -commentSpec - : COMMENT STRING - ; - -query - : ctes? queryTerm queryOrganization - ; - -insertInto - : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? #insertOverwriteTable - | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? #insertIntoTable - | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir - | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS tablePropertyList)? #insertOverwriteDir - ; - -partitionSpecLocation - : partitionSpec locationSpec? - ; - -partitionSpec - : PARTITION '(' partitionVal (',' partitionVal)* ')' - ; - -partitionVal - : identifier (EQ constant)? - ; - -namespace - : NAMESPACE - | DATABASE - | SCHEMA - ; - -describeFuncName - : qualifiedName - | STRING - | comparisonOperator - | arithmeticOperator - | predicateOperator - ; - -describeColName - : nameParts+=identifier ('.' nameParts+=identifier)* - ; - -ctes - : WITH namedQuery (',' namedQuery)* - ; - -namedQuery - : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')' - ; - -tableProvider - : USING multipartIdentifier - ; - -createTableClauses - :((OPTIONS tablePropertyList) | - (PARTITIONED BY partitioning=transformList) | - bucketSpec | - locationSpec | - commentSpec | - (TBLPROPERTIES tableProps=tablePropertyList))* - ; - -tablePropertyList - : '(' tableProperty (',' tableProperty)* ')' - ; - -tableProperty - : key=tablePropertyKey (EQ? value=tablePropertyValue)? - ; - -tablePropertyKey - : identifier ('.' identifier)* - | STRING - ; - -tablePropertyValue - : INTEGER_VALUE - | DECIMAL_VALUE - | booleanValue - | STRING - ; - -constantList - : '(' constant (',' constant)* ')' - ; - -nestedConstantList - : '(' constantList (',' constantList)* ')' - ; - -createFileFormat - : STORED AS fileFormat - | STORED BY storageHandler - ; - -fileFormat - : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat - | identifier #genericFileFormat - ; - -storageHandler - : STRING (WITH SERDEPROPERTIES tablePropertyList)? - ; - -resource - : identifier STRING - ; - -dmlStatementNoWith - : insertInto queryTerm queryOrganization #singleInsertQuery - | fromClause multiInsertQueryBody+ #multiInsertQuery - | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable - | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable - | MERGE INTO target=multipartIdentifier targetAlias=tableAlias - USING (source=multipartIdentifier | - '(' sourceQuery=query')') sourceAlias=tableAlias - ON mergeCondition=booleanExpression - matchedClause* - notMatchedClause* #mergeIntoTable - ; - -queryOrganization - : (ORDER BY order+=sortItem (',' order+=sortItem)*)? - (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? - (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? - (SORT BY sort+=sortItem (',' sort+=sortItem)*)? - windowClause? - (LIMIT (ALL | limit=expression))? - ; - -multiInsertQueryBody - : insertInto fromStatementBody - ; - -queryTerm - : queryPrimary #queryTermDefault - | left=queryTerm {this.legacy_setops_precedence_enbled}? - operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!this.legacy_setops_precedence_enbled}? - operator=INTERSECT setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!this.legacy_setops_precedence_enbled}? - operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - ; - -queryPrimary - : querySpecification #queryPrimaryDefault - | fromStatement #fromStmt - | TABLE multipartIdentifier #table - | inlineTable #inlineTableDefault1 - | '(' query ')' #subquery - ; - -sortItem - : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? - ; - -fromStatement - : fromClause fromStatementBody+ - ; - -fromStatementBody - : transformClause - whereClause? - queryOrganization - | selectClause - lateralView* - whereClause? - aggregationClause? - havingClause? - windowClause? - queryOrganization - ; - -querySpecification - : transformClause - fromClause? - whereClause? #transformQuerySpecification - | selectClause - fromClause? - lateralView* - whereClause? - aggregationClause? - havingClause? - windowClause? #regularQuerySpecification - ; - -transformClause - : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')' - | kind=MAP namedExpressionSeq - | kind=REDUCE namedExpressionSeq) - inRowFormat=rowFormat? - (RECORDWRITER recordWriter=STRING)? - USING script=STRING - (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? - outRowFormat=rowFormat? - (RECORDREADER recordReader=STRING)? - ; - -selectClause - : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq - ; - -setClause - : SET assignmentList - ; - -matchedClause - : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction - ; -notMatchedClause - : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction - ; - -matchedAction - : DELETE - | UPDATE SET ASTERISK - | UPDATE SET assignmentList - ; - -notMatchedAction - : INSERT ASTERISK - | INSERT '(' columns=multipartIdentifierList ')' - VALUES '(' expression (',' expression)* ')' - ; - -assignmentList - : assignment (',' assignment)* - ; - -assignment - : key=multipartIdentifier EQ value=expression - ; - -whereClause - : WHERE booleanExpression - ; - -havingClause - : HAVING booleanExpression - ; - -hint - : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' - ; - -hintStatement - : hintName=identifier - | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' - ; - -fromClause - : FROM relation (',' relation)* lateralView* pivotClause? - ; - -aggregationClause - : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( - WITH kind=ROLLUP - | WITH kind=CUBE - | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? - | GROUP BY kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')' - ; - -groupingSet - : '(' (expression (',' expression)*)? ')' - | expression - ; - -pivotClause - : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')' - ; - -pivotColumn - : identifiers+=identifier - | '(' identifiers+=identifier (',' identifiers+=identifier)* ')' - ; - -pivotValue - : expression (AS? identifier)? - ; - -lateralView - : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? - ; - -setQuantifier - : DISTINCT - | ALL - ; - -relation - : relationPrimary joinRelation* - ; - -joinRelation - : (joinType) JOIN right=relationPrimary joinCriteria? - | NATURAL joinType JOIN right=relationPrimary - ; - -joinType - : INNER? - | CROSS - | LEFT OUTER? - | LEFT? SEMI - | RIGHT OUTER? - | FULL OUTER? - | LEFT? ANTI - ; - -joinCriteria - : ON booleanExpression - | USING identifierList - ; - -sample - : TABLESAMPLE '(' sampleMethod? ')' - ; - -sampleMethod - : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile - | expression ROWS #sampleByRows - | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE - (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket - | bytes=expression #sampleByBytes - ; - -identifierList - : '(' identifierSeq ')' - ; - -identifierSeq - : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)* - ; - -orderedIdentifierList - : '(' orderedIdentifier (',' orderedIdentifier)* ')' - ; - -orderedIdentifier - : ident=errorCapturingIdentifier ordering=(ASC | DESC)? - ; - -identifierCommentList - : '(' identifierComment (',' identifierComment)* ')' - ; - -identifierComment - : identifier commentSpec? - ; - -relationPrimary - : multipartIdentifier sample? tableAlias #tableName - | '(' query ')' sample? tableAlias #aliasedQuery - | '(' relation ')' sample? tableAlias #aliasedRelation - | inlineTable #inlineTableDefault2 - | functionTable #tableValuedFunction - ; - -inlineTable - : VALUES expression (',' expression)* tableAlias - ; - -functionTable - : funcName=errorCapturingIdentifier '(' (expression (',' expression)*)? ')' tableAlias - ; - -tableAlias - : (AS? strictIdentifier identifierList?)? - ; - -rowFormat - : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde - | ROW FORMAT DELIMITED - (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? - (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? - (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? - (LINES TERMINATED BY linesSeparatedBy=STRING)? - (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited - ; - -multipartIdentifierList - : multipartIdentifier (',' multipartIdentifier)* - ; - -multipartIdentifier - : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)* - ; - -tableIdentifier - : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier - ; - -//functionIdentifier -// : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier -// ; - -namedExpression - : expression (AS? (name=errorCapturingIdentifier | identifierList))? - ; - -namedExpressionSeq - : namedExpression (',' namedExpression)* - ; - -transformList - : '(' transforms+=transform (',' transforms+=transform)* ')' - ; - -transform - : qualifiedName #identityTransform - | transformName=identifier - '(' argument+=transformArgument (',' argument+=transformArgument)* ')' #applyTransform - ; - -transformArgument - : qualifiedName - | constant - ; - -expression - : booleanExpression - ; - -booleanExpression - : NOT booleanExpression #logicalNot - | EXISTS '(' query ')' #exists - | valueExpression predicate? #predicated - | left=booleanExpression operator=AND right=booleanExpression #logicalBinary - | left=booleanExpression operator=OR right=booleanExpression #logicalBinary - ; - -predicate - : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | NOT? kind=IN '(' expression (',' expression)* ')' - | NOT? kind=IN '(' query ')' - | NOT? kind=RLIKE pattern=valueExpression - | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')') - | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)? - | IS NOT? kind=NULL - | IS NOT? kind=(TRUE | FALSE | UNKNOWN) - | IS NOT? kind=DISTINCT FROM right=valueExpression - ; - -valueExpression - : primaryExpression #valueExpressionDefault - | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary - | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary - | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary - | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary - | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary - | left=valueExpression comparisonOperator right=valueExpression #comparison - ; - -primaryExpression - : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #currentDatetime - | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase - | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase - | CAST '(' expression AS dataType ')' #cast - | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct - | FIRST '(' expression (IGNORE NULLS)? ')' #first - | LAST '(' expression (IGNORE NULLS)? ')' #last - | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position - | constant #constantDefault - | ASTERISK #star - | qualifiedName '.' ASTERISK #star - | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor - | '(' query ')' #subqueryExpression - | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' - (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)? #functionCall - | identifier '->' expression #lambda - | '(' identifier (',' identifier)+ ')' '->' expression #lambda - | value=primaryExpression '[' index=valueExpression ']' #subscript - | identifier #columnReference - | base=primaryExpression '.' fieldName=identifier #dereference - | '(' expression ')' #parenthesizedExpression - | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract - | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression - ((FOR | ',') len=valueExpression)? ')' #substring - | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? - FROM srcStr=valueExpression ')' #trim - | OVERLAY '(' input=valueExpression PLACING replace=valueExpression - FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay - ; - -constant - : NULL #nullLiteral - | interval #intervalLiteral - | identifier STRING #typeConstructor - | number #numericLiteral - | booleanValue #booleanLiteral - | STRING+ #stringLiteral - ; - -comparisonOperator - : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ - ; - -arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT - ; - -predicateOperator - : OR | AND | IN | NOT - ; - -booleanValue - : TRUE | FALSE - ; - -interval - : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? - ; - -errorCapturingMultiUnitsInterval - : multiUnitsInterval unitToUnitInterval? - ; - -multiUnitsInterval - : (intervalValue unit+=identifier)+ - ; - -errorCapturingUnitToUnitInterval - : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? - ; - -unitToUnitInterval - : value=intervalValue from=identifier TO to=identifier - ; - -intervalValue - : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) - | STRING - ; - -colPosition - : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier - ; - -dataType - : complex=ARRAY '<' dataType '>' #complexDataType - | complex=MAP '<' dataType ',' dataType '>' #complexDataType - | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType - ; - -qualifiedColTypeWithPositionList - : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)* - ; - -qualifiedColTypeWithPosition - : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? - ; - -colTypeList - : colType (',' colType)* - ; - -colType - : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? - ; - -complexColTypeList - : complexColType (',' complexColType)* - ; - -complexColType - : identifier ':' dataType (NOT NULL)? commentSpec? - ; - -whenClause - : WHEN condition=expression THEN result=expression - ; - -windowClause - : WINDOW namedWindow (',' namedWindow)* - ; - -namedWindow - : name=errorCapturingIdentifier AS windowSpec - ; - -windowSpec - : name=errorCapturingIdentifier #windowRef - | '('name=errorCapturingIdentifier')' #windowRef - | '(' - ( CLUSTER BY partition+=expression (',' partition+=expression)* - | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? - ((ORDER | SORT) BY sortItem (',' sortItem)*)?) - windowFrame? - ')' #windowDef - ; - -windowFrame - : frameType=RANGE frameStart=frameBound - | frameType=ROWS frameStart=frameBound - | frameType=RANGE BETWEEN frameStart=frameBound AND end=frameBound - | frameType=ROWS BETWEEN frameStart=frameBound AND end=frameBound - ; - -frameBound - : UNBOUNDED boundType=(PRECEDING | FOLLOWING) - | boundType=CURRENT ROW - | expression boundType=(PRECEDING | FOLLOWING) - ; - -qualifiedNameList - : qualifiedName (',' qualifiedName)* - ; - -functionName - : qualifiedName - | FILTER - | LEFT - | RIGHT - ; - -qualifiedName - : identifier ('.' identifier)* - ; - -// this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table` -// replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise -// valid expressions such as "a-b" can be recognized as an identifier -errorCapturingIdentifier - : identifier errorCapturingIdentifierExtra - ; - -// extra left-factoring grammar -errorCapturingIdentifierExtra - : (MINUS identifier)+ #errorIdent - | #realIdent - ; - -identifier - : strictIdentifier - | {!this.SQL_standard_keyword_behavior}? strictNonReserved - ; - -strictIdentifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | {this.SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier - | {!this.SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier - ; - -quotedIdentifier - : BACKQUOTED_IDENTIFIER - ; - -number - : {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral - | {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral - | {this.legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral - | MINUS? INTEGER_VALUE #integerLiteral - | MINUS? BIGINT_LITERAL #bigIntLiteral - | MINUS? SMALLINT_LITERAL #smallIntLiteral - | MINUS? TINYINT_LITERAL #tinyIntLiteral - | MINUS? DOUBLE_LITERAL #doubleLiteral - | MINUS? FLOAT_LITERAL #floatLiteral - | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral - ; - -alterColumnAction - : TYPE dataType - | commentSpec - | colPosition - | setOrDrop=(SET | DROP) NOT NULL - ; - -// When `this.SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. -// - Reserved keywords: -// Keywords that are reserved and can't be used as identifiers for table, view, column, -// function, alias, etc. -// - Non-reserved keywords: -// Keywords that have a special meaning only in particular contexts and can be used as -// identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN -// can be used as identifiers in other places. -// You can find the full keywords list by searching "Start of the keywords list" in this file. -// The non-reserved keywords are listed below. Keywords not in this list are reserved keywords. -ansiNonReserved -//--ANSI-NON-RESERVED-START - : ADD - | AFTER - | ALTER - | ANALYZE - | ANTI - | ARCHIVE - | ARRAY - | ASC - | AT - | BETWEEN - | BUCKET - | BUCKETS - | BY - | CACHE - | CASCADE - | CHANGE - | CLEAR - | CLUSTER - | CLUSTERED - | CODEGEN - | COLLECTION - | COLUMNS - | COMMENT - | COMMIT - | COMPACT - | COMPACTIONS - | COMPUTE - | CONCATENATE - | COST - | CUBE - | CURRENT - | DATA - | DATABASE - | DATABASES - | DBPROPERTIES - | DEFINED - | DELETE - | DELIMITED - | DESC - | DESCRIBE - | DFS - | DIRECTORIES - | DIRECTORY - | DISTRIBUTE - | DIV - | DROP - | ESCAPED - | EXCHANGE - | EXISTS - | EXPLAIN - | EXPORT - | EXTENDED - | EXTERNAL - | EXTRACT - | FIELDS - | FILEFORMAT - | FIRST - | FOLLOWING - | FORMAT - | FORMATTED - | FUNCTION - | FUNCTIONS - | GLOBAL - | GROUPING - | IF - | IGNORE - | IMPORT - | INDEX - | INDEXES - | INPATH - | INPUTFORMAT - | INSERT - | INTERVAL - | ITEMS - | KEYS - | LAST - | LATERAL - | LAZY - | LIKE - | LIMIT - | LINES - | LIST - | LOAD - | LOCAL - | LOCATION - | LOCK - | LOCKS - | LOGICAL - | MACRO - | MAP - | MATCHED - | MERGE - | MSCK - | NAMESPACE - | NAMESPACES - | NO - | NULLS - | OF - | OPTION - | OPTIONS - | OUT - | OUTPUTFORMAT - | OVER - | OVERLAY - | OVERWRITE - | PARTITION - | PARTITIONED - | PARTITIONS - | PERCENTLIT - | PIVOT - | PLACING - | POSITION - | PRECEDING - | PRINCIPALS - | PROPERTIES - | PURGE - | QUERY - | RANGE - | RECORDREADER - | RECORDWRITER - | RECOVER - | REDUCE - | REFRESH - | RENAME - | REPAIR - | REPLACE - | RESET - | RESTRICT - | REVOKE - | RLIKE - | ROLE - | ROLES - | ROLLBACK - | ROLLUP - | ROW - | ROWS - | SCHEMA - | SEMI - | SEPARATED - | SERDE - | SERDEPROPERTIES - | SET - | SETMINUS - | SETS - | SHOW - | SKEWED - | SORT - | SORTED - | START - | STATISTICS - | STORED - | STRATIFY - | STRUCT - | SUBSTR - | SUBSTRING - | TABLES - | TABLESAMPLE - | TBLPROPERTIES - | TEMPORARY - | TERMINATED - | TOUCH - | TRANSACTION - | TRANSACTIONS - | TRANSFORM - | TRIM - | TRUE - | TRUNCATE - | TYPE - | UNARCHIVE - | UNBOUNDED - | UNCACHE - | UNLOCK - | UNSET - | UPDATE - | USE - | VALUES - | VIEW - | VIEWS - | WINDOW - | ZONE -//--ANSI-NON-RESERVED-END - ; - -// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. -// - Non-reserved keywords: -// Same definition as the one when `SQL_standard_keyword_behavior=true`. -// - Strict-non-reserved keywords: -// A strict version of non-reserved keywords, which can not be used as table alias. -// You can find the full keywords list by searching "Start of the keywords list" in this file. -// The strict-non-reserved keywords are listed in `strictNonReserved`. -// The non-reserved keywords are listed in `nonReserved`. -// These 2 together contain all the keywords. -strictNonReserved - : ANTI - | CROSS - | EXCEPT - | FULL - | INNER - | INTERSECT - | JOIN - | LEFT - | NATURAL - | ON - | RIGHT - | SEMI - | SETMINUS - | UNION - | USING - ; - -nonReserved -//--DEFAULT-NON-RESERVED-START - : ADD - | AFTER - | ALL - | ALTER - | ANALYZE - | AND - | ANY - | ARCHIVE - | ARRAY - | AS - | ASC - | AT - | AUTHORIZATION - | BETWEEN - | BOTH - | BUCKET - | BUCKETS - | BY - | CACHE - | CASCADE - | CASE - | CAST - | CHANGE - | CHECK - | CLEAR - | CLUSTER - | CLUSTERED - | CODEGEN - | COLLATE - | COLLECTION - | COLUMN - | COLUMNS - | COMMENT - | COMMIT - | COMPACT - | COMPACTIONS - | COMPUTE - | CONCATENATE - | CONSTRAINT - | COST - | CREATE - | CUBE - | CURRENT - | CURRENT_DATE - | CURRENT_TIME - | CURRENT_TIMESTAMP - | CURRENT_USER - | DATA - | DATABASE - | DATABASES - | DBPROPERTIES - | DEFINED - | DELETE - | DELIMITED - | DESC - | DESCRIBE - | DFS - | DIRECTORIES - | DIRECTORY - | DISTINCT - | DISTRIBUTE - | DIV - | DROP - | ELSE - | END - | ESCAPE - | ESCAPED - | EXCHANGE - | EXISTS - | EXPLAIN - | EXPORT - | EXTENDED - | EXTERNAL - | EXTRACT - | FALSE - | FETCH - | FILTER - | FIELDS - | FILEFORMAT - | FIRST - | FOLLOWING - | FOR - | FOREIGN - | FORMAT - | FORMATTED - | FROM - | FUNCTION - | FUNCTIONS - | GLOBAL - | GRANT - | GROUP - | GROUPING - | HAVING - | IF - | IGNORE - | IMPORT - | IN - | INDEX - | INDEXES - | INPATH - | INPUTFORMAT - | INSERT - | INTERVAL - | INTO - | IS - | ITEMS - | KEYS - | LAST - | LATERAL - | LAZY - | LEADING - | LIKE - | LIMIT - | LINES - | LIST - | LOAD - | LOCAL - | LOCATION - | LOCK - | LOCKS - | LOGICAL - | MACRO - | MAP - | MATCHED - | MERGE - | MSCK - | NAMESPACE - | NAMESPACES - | NO - | NOT - | NULL - | NULLS - | OF - | ONLY - | OPTION - | OPTIONS - | OR - | ORDER - | OUT - | OUTER - | OUTPUTFORMAT - | OVER - | OVERLAPS - | OVERLAY - | OVERWRITE - | PARTITION - | PARTITIONED - | PARTITIONS - | PERCENTLIT - | PIVOT - | PLACING - | POSITION - | PRECEDING - | PRIMARY - | PRINCIPALS - | PROPERTIES - | PURGE - | QUERY - | RANGE - | RECORDREADER - | RECORDWRITER - | RECOVER - | REDUCE - | REFERENCES - | REFRESH - | RENAME - | REPAIR - | REPLACE - | RESET - | RESTRICT - | REVOKE - | RLIKE - | ROLE - | ROLES - | ROLLBACK - | ROLLUP - | ROW - | ROWS - | SCHEMA - | SELECT - | SEPARATED - | SERDE - | SERDEPROPERTIES - | SESSION_USER - | SET - | SETS - | SHOW - | SKEWED - | SOME - | SORT - | SORTED - | START - | STATISTICS - | STORED - | STRATIFY - | STRUCT - | SUBSTR - | SUBSTRING - | TABLE - | TABLES - | TABLESAMPLE - | TBLPROPERTIES - | TEMPORARY - | TERMINATED - | THEN - | TIME - | TO - | TOUCH - | TRAILING - | TRANSACTION - | TRANSACTIONS - | TRANSFORM - | TRIM - | TRUE - | TRUNCATE - | TYPE - | UNARCHIVE - | UNBOUNDED - | UNCACHE - | UNIQUE - | UNKNOWN - | UNLOCK - | UNSET - | UPDATE - | USE - | USER - | VALUES - | VIEW - | VIEWS - | WHEN - | WHERE - | WINDOW - | WITH - | ZONE -//--DEFAULT-NON-RESERVED-END - ; - -// NOTE: If you add a new token in the list below, you should update the list of keywords -// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`. - -//============================ -// Start of the keywords list -//============================ -//--SPARK-KEYWORD-LIST-START -ADD: 'ADD'; -AFTER: 'AFTER'; -ALL: 'ALL'; -ALTER: 'ALTER'; -ANALYZE: 'ANALYZE'; -AND: 'AND'; -ANTI: 'ANTI'; -ANY: 'ANY'; -ARCHIVE: 'ARCHIVE'; -ARRAY: 'ARRAY'; -AS: 'AS'; -ASC: 'ASC'; -AT: 'AT'; -AUTHORIZATION: 'AUTHORIZATION'; -BETWEEN: 'BETWEEN'; -BOTH: 'BOTH'; -BUCKET: 'BUCKET'; -BUCKETS: 'BUCKETS'; -BY: 'BY'; -CACHE: 'CACHE'; -CASCADE: 'CASCADE'; -CASE: 'CASE'; -CAST: 'CAST'; -CHANGE: 'CHANGE'; -CHECK: 'CHECK'; -CLEAR: 'CLEAR'; -CLUSTER: 'CLUSTER'; -CLUSTERED: 'CLUSTERED'; -CODEGEN: 'CODEGEN'; -COLLATE: 'COLLATE'; -COLLECTION: 'COLLECTION'; -COLUMN: 'COLUMN'; -COLUMNS: 'COLUMNS'; -COMMENT: 'COMMENT'; -COMMIT: 'COMMIT'; -COMPACT: 'COMPACT'; -COMPACTIONS: 'COMPACTIONS'; -COMPUTE: 'COMPUTE'; -CONCATENATE: 'CONCATENATE'; -CONSTRAINT: 'CONSTRAINT'; -COST: 'COST'; -CREATE: 'CREATE'; -CROSS: 'CROSS'; -CUBE: 'CUBE'; -CURRENT: 'CURRENT'; -CURRENT_DATE: 'CURRENT_DATE'; -CURRENT_TIME: 'CURRENT_TIME'; -CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; -CURRENT_USER: 'CURRENT_USER'; -DATA: 'DATA'; -DATABASE: 'DATABASE'; -DATABASES: 'DATABASES' | 'SCHEMAS'; -DBPROPERTIES: 'DBPROPERTIES'; -DEFINED: 'DEFINED'; -DELETE: 'DELETE'; -DELIMITED: 'DELIMITED'; -DESC: 'DESC'; -DESCRIBE: 'DESCRIBE'; -DFS: 'DFS'; -DIRECTORIES: 'DIRECTORIES'; -DIRECTORY: 'DIRECTORY'; -DISTINCT: 'DISTINCT'; -DISTRIBUTE: 'DISTRIBUTE'; -DIV: 'DIV'; -DROP: 'DROP'; -ELSE: 'ELSE'; -END: 'END'; -ESCAPE: 'ESCAPE'; -ESCAPED: 'ESCAPED'; -EXCEPT: 'EXCEPT'; -EXCHANGE: 'EXCHANGE'; -EXISTS: 'EXISTS'; -EXPLAIN: 'EXPLAIN'; -EXPORT: 'EXPORT'; -EXTENDED: 'EXTENDED'; -EXTERNAL: 'EXTERNAL'; -EXTRACT: 'EXTRACT'; -FALSE: 'FALSE'; -FETCH: 'FETCH'; -FIELDS: 'FIELDS'; -FILTER: 'FILTER'; -FILEFORMAT: 'FILEFORMAT'; -FIRST: 'FIRST'; -FOLLOWING: 'FOLLOWING'; -FOR: 'FOR'; -FOREIGN: 'FOREIGN'; -FORMAT: 'FORMAT'; -FORMATTED: 'FORMATTED'; -FROM: 'FROM'; -FULL: 'FULL'; -FUNCTION: 'FUNCTION'; -FUNCTIONS: 'FUNCTIONS'; -GLOBAL: 'GLOBAL'; -GRANT: 'GRANT'; -GROUP: 'GROUP'; -GROUPING: 'GROUPING'; -HAVING: 'HAVING'; -IF: 'IF'; -IGNORE: 'IGNORE'; -IMPORT: 'IMPORT'; -IN: 'IN'; -INDEX: 'INDEX'; -INDEXES: 'INDEXES'; -INNER: 'INNER'; -INPATH: 'INPATH'; -INPUTFORMAT: 'INPUTFORMAT'; -INSERT: 'INSERT'; -INTERSECT: 'INTERSECT'; -INTERVAL: 'INTERVAL'; -INTO: 'INTO'; -IS: 'IS'; -ITEMS: 'ITEMS'; -JOIN: 'JOIN'; -KEYS: 'KEYS'; -LAST: 'LAST'; -LATERAL: 'LATERAL'; -LAZY: 'LAZY'; -LEADING: 'LEADING'; -LEFT: 'LEFT'; -LIKE: 'LIKE'; -LIMIT: 'LIMIT'; -LINES: 'LINES'; -LIST: 'LIST'; -LOAD: 'LOAD'; -LOCAL: 'LOCAL'; -LOCATION: 'LOCATION'; -LOCK: 'LOCK'; -LOCKS: 'LOCKS'; -LOGICAL: 'LOGICAL'; -MACRO: 'MACRO'; -MAP: 'MAP'; -MATCHED: 'MATCHED'; -MERGE: 'MERGE'; -MSCK: 'MSCK'; -NAMESPACE: 'NAMESPACE'; -NAMESPACES: 'NAMESPACES'; -NATURAL: 'NATURAL'; -NO: 'NO'; -NOT: 'NOT' | '!'; -NULL: 'NULL'; -NULLS: 'NULLS'; -OF: 'OF'; -ON: 'ON'; -ONLY: 'ONLY'; -OPTION: 'OPTION'; -OPTIONS: 'OPTIONS'; -OR: 'OR'; -ORDER: 'ORDER'; -OUT: 'OUT'; -OUTER: 'OUTER'; -OUTPUTFORMAT: 'OUTPUTFORMAT'; -OVER: 'OVER'; -OVERLAPS: 'OVERLAPS'; -OVERLAY: 'OVERLAY'; -OVERWRITE: 'OVERWRITE'; -PARTITION: 'PARTITION'; -PARTITIONED: 'PARTITIONED'; -PARTITIONS: 'PARTITIONS'; -PERCENTLIT: 'PERCENT'; -PIVOT: 'PIVOT'; -PLACING: 'PLACING'; -POSITION: 'POSITION'; -PRECEDING: 'PRECEDING'; -PRIMARY: 'PRIMARY'; -PRINCIPALS: 'PRINCIPALS'; -PROPERTIES: 'PROPERTIES'; -PURGE: 'PURGE'; -QUERY: 'QUERY'; -RANGE: 'RANGE'; -RECORDREADER: 'RECORDREADER'; -RECORDWRITER: 'RECORDWRITER'; -RECOVER: 'RECOVER'; -REDUCE: 'REDUCE'; -REFERENCES: 'REFERENCES'; -REFRESH: 'REFRESH'; -RENAME: 'RENAME'; -REPAIR: 'REPAIR'; -REPLACE: 'REPLACE'; -RESET: 'RESET'; -RESTRICT: 'RESTRICT'; -REVOKE: 'REVOKE'; -RIGHT: 'RIGHT'; -RLIKE: 'RLIKE' | 'REGEXP'; -ROLE: 'ROLE'; -ROLES: 'ROLES'; -ROLLBACK: 'ROLLBACK'; -ROLLUP: 'ROLLUP'; -ROW: 'ROW'; -ROWS: 'ROWS'; -SCHEMA: 'SCHEMA'; -SELECT: 'SELECT'; -SEMI: 'SEMI'; -SEPARATED: 'SEPARATED'; -SERDE: 'SERDE'; -SERDEPROPERTIES: 'SERDEPROPERTIES'; -SESSION_USER: 'SESSION_USER'; -SET: 'SET'; -SETMINUS: 'MINUS'; -SETS: 'SETS'; -SHOW: 'SHOW'; -SKEWED: 'SKEWED'; -SOME: 'SOME'; -SORT: 'SORT'; -SORTED: 'SORTED'; -START: 'START'; -STATISTICS: 'STATISTICS'; -STORED: 'STORED'; -STRATIFY: 'STRATIFY'; -STRUCT: 'STRUCT'; -SUBSTR: 'SUBSTR'; -SUBSTRING: 'SUBSTRING'; -TABLE: 'TABLE'; -TABLES: 'TABLES'; -TABLESAMPLE: 'TABLESAMPLE'; -TBLPROPERTIES: 'TBLPROPERTIES'; -TEMPORARY: 'TEMPORARY' | 'TEMP'; -TERMINATED: 'TERMINATED'; -THEN: 'THEN'; -TIME: 'TIME'; -TO: 'TO'; -TOUCH: 'TOUCH'; -TRAILING: 'TRAILING'; -TRANSACTION: 'TRANSACTION'; -TRANSACTIONS: 'TRANSACTIONS'; -TRANSFORM: 'TRANSFORM'; -TRIM: 'TRIM'; -TRUE: 'TRUE'; -TRUNCATE: 'TRUNCATE'; -TYPE: 'TYPE'; -UNARCHIVE: 'UNARCHIVE'; -UNBOUNDED: 'UNBOUNDED'; -UNCACHE: 'UNCACHE'; -UNION: 'UNION'; -UNIQUE: 'UNIQUE'; -UNKNOWN: 'UNKNOWN'; -UNLOCK: 'UNLOCK'; -UNSET: 'UNSET'; -UPDATE: 'UPDATE'; -USE: 'USE'; -USER: 'USER'; -USING: 'USING'; -VALUES: 'VALUES'; -VIEW: 'VIEW'; -VIEWS: 'VIEWS'; -WHEN: 'WHEN'; -WHERE: 'WHERE'; -WINDOW: 'WINDOW'; -WITH: 'WITH'; -ZONE: 'ZONE'; -//--SPARK-KEYWORD-LIST-END -//============================ -// End of the keywords list -//============================ - -EQ : '=' | '=='; -NSEQ: '<=>'; -NEQ : '<>'; -NEQJ: '!='; -LT : '<'; -LTE : '<=' | '!>'; -GT : '>'; -GTE : '>=' | '!<'; - -PLUS: '+'; -MINUS: '-'; -ASTERISK: '*'; -SLASH: '/'; -PERCENT: '%'; -TILDE: '~'; -AMPERSAND: '&'; -PIPE: '|'; -CONCAT_PIPE: '||'; -HAT: '^'; -SEMICOLON: ';'; - -STRING - : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' - | '"' ( ~('"'|'\\') | ('\\' .) )* '"' - ; - -BIGINT_LITERAL - : DIGIT+ 'L' - ; - -SMALLINT_LITERAL - : DIGIT+ 'S' - ; - -TINYINT_LITERAL - : DIGIT+ 'Y' - ; - -INTEGER_VALUE - : DIGIT+ - ; - -EXPONENT_VALUE - : DIGIT+ EXPONENT - | DECIMAL_DIGITS EXPONENT {this.isValidDecimal()}? - ; - -DECIMAL_VALUE - : DECIMAL_DIGITS {this.isValidDecimal()}? - ; - -FLOAT_LITERAL - : DIGIT+ EXPONENT? 'F' - | DECIMAL_DIGITS EXPONENT? 'F' {this.isValidDecimal()}? - ; - -DOUBLE_LITERAL - : DIGIT+ EXPONENT? 'D' - | DECIMAL_DIGITS EXPONENT? 'D' {this.isValidDecimal()}? - ; - -BIGDECIMAL_LITERAL - : DIGIT+ EXPONENT? 'BD' - | DECIMAL_DIGITS EXPONENT? 'BD' {this.isValidDecimal()}? - ; - -IDENTIFIER - : (LETTER | DIGIT | '_' | CUSTOM_VARS)+ - ; - -BACKQUOTED_IDENTIFIER - : '`' ( ~'`' | '``' )* '`' - ; - -CUSTOM_VARS - : '${'(IDENTIFIER)'}' - ; - -fragment DECIMAL_DIGITS - : DIGIT+ '.' DIGIT* - | '.' DIGIT+ - ; - -fragment EXPONENT - : 'E' [+-]? DIGIT+ - ; - -fragment DIGIT - : [0-9] - ; - -fragment LETTER - : [A-Z] - ; - -SIMPLE_COMMENT - : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) - ; - -BRACKETED_COMMENT - : '/*' {!this.isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) - ; - -WS - : [ \r\n\t]+ -> channel(HIDDEN) - ; - -// Catch-all for anything we can't recognize. -// We use this to be able to ignore and recover all the text -// when splitting statements with DelimiterLexer -UNRECOGNIZED - : . - ; \ No newline at end of file diff --git a/src/grammar/spark/SparkSqlLexer.g4 b/src/grammar/spark/SparkSqlLexer.g4 new file mode 100644 index 0000000..8793c10 --- /dev/null +++ b/src/grammar/spark/SparkSqlLexer.g4 @@ -0,0 +1,553 @@ +// Grammar file from: https://github.com/apache/spark/blob/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. + */ + +lexer grammar SparkSqlLexer; + +@members { + /** + * When true, parser should throw ParseException for unclosed bracketed comment. + */ + public has_unclosed_bracketed_comment = false; + + /** + * Verify whether current token is a valid decimal token (which contains dot). + * Returns true if the character that follows the token is not a digit or letter or underscore. + * + * For example: + * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. + * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. + * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. + * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed + * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' + * which is not a digit or letter or underscore. + */ + public isValidDecimal() { + const nextChar = _input.LA(1); + if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || + nextChar == '_') { + return false; + } else { + return true; + } + } + + /** + * This method will be called when we see '/*' and try to match it as a bracketed comment. + * If the next character is '+', it should be parsed as hint later, and we cannot match + * it as a bracketed comment. + * + * Returns true if the next character is '+'. + */ + public isHint() { + const nextChar = _input.LA(1); + if (nextChar == '+') { + return true; + } else { + return false; + } + } + + /** + * This method will be called when the character stream ends and try to find out the + * unclosed bracketed comment. + * If the method be called, it means the end of the entire character stream match, + * and we set the flag and fail later. + */ + public markUnclosedComment() { + has_unclosed_bracketed_comment = true; + } +} + +SEMICOLON: ';'; + +LEFT_PAREN: '('; +RIGHT_PAREN: ')'; +COMMA: ','; +DOT: '.'; +LEFT_BRACKET: '['; +RIGHT_BRACKET: ']'; + +// NOTE: If you add a new token in the list below, you should update the list of keywords +// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and +// modify `ParserUtils.toExprAlias()` which assumes all keywords are between `ADD` and `ZONE`. + +//============================ +// Start of the keywords list +//============================ +//--SPARK-KEYWORD-LIST-START +KW_ADD: 'ADD'; +KW_AFTER: 'AFTER'; +KW_ALL: 'ALL'; +KW_ALTER: 'ALTER'; +KW_ALWAYS: 'ALWAYS'; +KW_ANALYZE: 'ANALYZE'; +KW_AND: 'AND'; +KW_ANTI: 'ANTI'; +KW_ANY: 'ANY'; +KW_ANY_VALUE: 'ANY_VALUE'; +KW_ARCHIVE: 'ARCHIVE'; +KW_ARRAY: 'ARRAY'; +KW_AS: 'AS'; +KW_ASC: 'ASC'; +KW_AT: 'AT'; +KW_AUTHORIZATION: 'AUTHORIZATION'; +KW_BETWEEN: 'BETWEEN'; +KW_BIGINT: 'BIGINT'; +KW_BINARY: 'BINARY'; +KW_BOOLEAN: 'BOOLEAN'; +KW_BOTH: 'BOTH'; +KW_BUCKET: 'BUCKET'; +KW_BUCKETS: 'BUCKETS'; +KW_BY: 'BY'; +KW_BYTE: 'BYTE'; +KW_CACHE: 'CACHE'; +KW_CASCADE: 'CASCADE'; +KW_CASE: 'CASE'; +KW_CAST: 'CAST'; +KW_CATALOG: 'CATALOG'; +KW_CATALOGS: 'CATALOGS'; +KW_CHANGE: 'CHANGE'; +KW_CHAR: 'CHAR'; +KW_CHARACTER: 'CHARACTER'; +KW_CHECK: 'CHECK'; +KW_CLEAR: 'CLEAR'; +KW_CLUSTER: 'CLUSTER'; +KW_CLUSTERED: 'CLUSTERED'; +KW_CODEGEN: 'CODEGEN'; +KW_COLLATE: 'COLLATE'; +KW_COLLECTION: 'COLLECTION'; +KW_COLUMN: 'COLUMN'; +KW_COLUMNS: 'COLUMNS'; +KW_COMMENT: 'COMMENT'; +KW_COMMIT: 'COMMIT'; +KW_COMPACT: 'COMPACT'; +KW_COMPACTIONS: 'COMPACTIONS'; +KW_COMPUTE: 'COMPUTE'; +KW_CONCATENATE: 'CONCATENATE'; +KW_CONSTRAINT: 'CONSTRAINT'; +KW_COST: 'COST'; +KW_CREATE: 'CREATE'; +KW_CROSS: 'CROSS'; +KW_CUBE: 'CUBE'; +KW_CURRENT: 'CURRENT'; +KW_CURRENT_DATE: 'CURRENT_DATE'; +KW_CURRENT_TIME: 'CURRENT_TIME'; +KW_CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +KW_CURRENT_USER: 'CURRENT_USER'; +KW_DAY: 'DAY'; +KW_DAYS: 'DAYS'; +KW_DAYOFYEAR: 'DAYOFYEAR'; +KW_DATA: 'DATA'; +KW_DATE: 'DATE'; +KW_DATABASE: 'DATABASE'; +KW_DATABASES: 'DATABASES'; +KW_DATEADD: 'DATEADD'; +KW_DATE_ADD: 'DATE_ADD'; +KW_DATEDIFF: 'DATEDIFF'; +KW_DATE_DIFF: 'DATE_DIFF'; +KW_DBPROPERTIES: 'DBPROPERTIES'; +KW_DEC: 'DEC'; +KW_DECIMAL: 'DECIMAL'; +KW_DECLARE: 'DECLARE'; +KW_DEFAULT: 'DEFAULT'; +KW_DEFINED: 'DEFINED'; +KW_DELETE: 'DELETE'; +KW_DELIMITED: 'DELIMITED'; +KW_DESC: 'DESC'; +KW_DESCRIBE: 'DESCRIBE'; +KW_DFS: 'DFS'; +KW_DIRECTORIES: 'DIRECTORIES'; +KW_DIRECTORY: 'DIRECTORY'; +KW_DISTINCT: 'DISTINCT'; +KW_DISTRIBUTE: 'DISTRIBUTE'; +KW_DIV: 'DIV'; +KW_DOUBLE: 'DOUBLE'; +KW_DROP: 'DROP'; +KW_ELSE: 'ELSE'; +KW_END: 'END'; +KW_ESCAPE: 'ESCAPE'; +KW_ESCAPED: 'ESCAPED'; +KW_EXCEPT: 'EXCEPT'; +KW_EXCHANGE: 'EXCHANGE'; +KW_EXCLUDE: 'EXCLUDE'; +KW_EXISTS: 'EXISTS'; +KW_EXPLAIN: 'EXPLAIN'; +KW_EXPORT: 'EXPORT'; +KW_EXTENDED: 'EXTENDED'; +KW_EXTERNAL: 'EXTERNAL'; +KW_EXTRACT: 'EXTRACT'; +KW_FALSE: 'FALSE'; +KW_FETCH: 'FETCH'; +KW_FIELDS: 'FIELDS'; +KW_FILTER: 'FILTER'; +KW_FILEFORMAT: 'FILEFORMAT'; +KW_FIRST: 'FIRST'; +KW_FLOAT: 'FLOAT'; +KW_FOLLOWING: 'FOLLOWING'; +KW_FOR: 'FOR'; +KW_FOREIGN: 'FOREIGN'; +KW_FORMAT: 'FORMAT'; +KW_FORMATTED: 'FORMATTED'; +KW_FROM: 'FROM'; +KW_FULL: 'FULL'; +KW_FUNCTION: 'FUNCTION'; +KW_FUNCTIONS: 'FUNCTIONS'; +KW_GENERATED: 'GENERATED'; +KW_GLOBAL: 'GLOBAL'; +KW_GRANT: 'GRANT'; +KW_GROUP: 'GROUP'; +KW_GROUPING: 'GROUPING'; +KW_HAVING: 'HAVING'; +KW_BINARY_HEX: 'X'; +KW_HOUR: 'HOUR'; +KW_HOURS: 'HOURS'; +KW_IDENTIFIER_KW: 'IDENTIFIER'; +KW_IF: 'IF'; +KW_IGNORE: 'IGNORE'; +KW_IMPORT: 'IMPORT'; +KW_IN: 'IN'; +KW_INCLUDE: 'INCLUDE'; +KW_INDEX: 'INDEX'; +KW_INDEXES: 'INDEXES'; +KW_INNER: 'INNER'; +KW_INPATH: 'INPATH'; +KW_INPUTFORMAT: 'INPUTFORMAT'; +KW_INSERT: 'INSERT'; +KW_INTERSECT: 'INTERSECT'; +KW_INTERVAL: 'INTERVAL'; +KW_INT: 'INT'; +KW_INTEGER: 'INTEGER'; +KW_INTO: 'INTO'; +KW_IS: 'IS'; +KW_ITEMS: 'ITEMS'; +KW_JOIN: 'JOIN'; +KW_KEYS: 'KEYS'; +KW_LAST: 'LAST'; +KW_LATERAL: 'LATERAL'; +KW_LAZY: 'LAZY'; +KW_LEADING: 'LEADING'; +KW_LEFT: 'LEFT'; +KW_LIKE: 'LIKE'; +KW_ILIKE: 'ILIKE'; +KW_LIMIT: 'LIMIT'; +KW_LINES: 'LINES'; +KW_LIST: 'LIST'; +KW_LOAD: 'LOAD'; +KW_LOCAL: 'LOCAL'; +KW_LOCATION: 'LOCATION'; +KW_LOCK: 'LOCK'; +KW_LOCKS: 'LOCKS'; +KW_LOGICAL: 'LOGICAL'; +KW_LONG: 'LONG'; +KW_MACRO: 'MACRO'; +KW_MAP: 'MAP'; +KW_MATCHED: 'MATCHED'; +KW_MERGE: 'MERGE'; +KW_MICROSECOND: 'MICROSECOND'; +KW_MICROSECONDS: 'MICROSECONDS'; +KW_MILLISECOND: 'MILLISECOND'; +KW_MILLISECONDS: 'MILLISECONDS'; +KW_MINUTE: 'MINUTE'; +KW_MINUTES: 'MINUTES'; +KW_MONTH: 'MONTH'; +KW_MONTHS: 'MONTHS'; +KW_MSCK: 'MSCK'; +KW_NAME: 'NAME'; +KW_NAMESPACE: 'NAMESPACE'; +KW_NAMESPACES: 'NAMESPACES'; +KW_NANOSECOND: 'NANOSECOND'; +KW_NANOSECONDS: 'NANOSECONDS'; +KW_NATURAL: 'NATURAL'; +KW_NO: 'NO'; +KW_NOT: 'NOT' | '!'; +KW_NULL: 'NULL'; +KW_NULLS: 'NULLS'; +KW_NUMERIC: 'NUMERIC'; +KW_OF: 'OF'; +KW_OFFSET: 'OFFSET'; +KW_ON: 'ON'; +KW_ONLY: 'ONLY'; +KW_OPTION: 'OPTION'; +KW_OPTIONS: 'OPTIONS'; +KW_OR: 'OR'; +KW_ORDER: 'ORDER'; +KW_OUT: 'OUT'; +KW_OUTER: 'OUTER'; +KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; +KW_OVER: 'OVER'; +KW_OVERLAPS: 'OVERLAPS'; +KW_OVERLAY: 'OVERLAY'; +KW_OVERWRITE: 'OVERWRITE'; +KW_PARTITION: 'PARTITION'; +KW_PARTITIONED: 'PARTITIONED'; +KW_PARTITIONS: 'PARTITIONS'; +KW_PERCENTILE_CONT: 'PERCENTILE_CONT'; +KW_PERCENTILE_DISC: 'PERCENTILE_DISC'; +KW_PERCENTLIT: 'PERCENT'; +KW_PIVOT: 'PIVOT'; +KW_PLACING: 'PLACING'; +KW_POSITION: 'POSITION'; +KW_PRECEDING: 'PRECEDING'; +KW_PRIMARY: 'PRIMARY'; +KW_PRINCIPALS: 'PRINCIPALS'; +KW_PROPERTIES: 'PROPERTIES'; +KW_PURGE: 'PURGE'; +KW_QUARTER: 'QUARTER'; +KW_QUERY: 'QUERY'; +KW_RANGE: 'RANGE'; +KW_REAL: 'REAL'; +KW_RECORDREADER: 'RECORDREADER'; +KW_RECORDWRITER: 'RECORDWRITER'; +KW_RECOVER: 'RECOVER'; +KW_REDUCE: 'REDUCE'; +KW_REFERENCES: 'REFERENCES'; +KW_REFRESH: 'REFRESH'; +KW_RENAME: 'RENAME'; +KW_REPAIR: 'REPAIR'; +KW_REPEATABLE: 'REPEATABLE'; +KW_REPLACE: 'REPLACE'; +KW_RESET: 'RESET'; +KW_RESPECT: 'RESPECT'; +KW_RESTRICT: 'RESTRICT'; +KW_REVOKE: 'REVOKE'; +KW_RIGHT: 'RIGHT'; +KW_RLIKE: 'RLIKE' | 'REGEXP'; +KW_ROLE: 'ROLE'; +KW_ROLES: 'ROLES'; +KW_ROLLBACK: 'ROLLBACK'; +KW_ROLLUP: 'ROLLUP'; +KW_ROW: 'ROW'; +KW_ROWS: 'ROWS'; +KW_SECOND: 'SECOND'; +KW_SECONDS: 'SECONDS'; +KW_SCHEMA: 'SCHEMA'; +KW_SCHEMAS: 'SCHEMAS'; +KW_SELECT: 'SELECT'; +KW_SEMI: 'SEMI'; +KW_SEPARATED: 'SEPARATED'; +KW_SERDE: 'SERDE'; +KW_SERDEPROPERTIES: 'SERDEPROPERTIES'; +KW_SESSION_USER: 'SESSION_USER'; +KW_SET: 'SET'; +KW_SETMINUS: 'MINUS'; +KW_SETS: 'SETS'; +KW_SHORT: 'SHORT'; +KW_SHOW: 'SHOW'; +KW_SINGLE: 'SINGLE'; +KW_SKEWED: 'SKEWED'; +KW_SMALLINT: 'SMALLINT'; +KW_SOME: 'SOME'; +KW_SORT: 'SORT'; +KW_SORTED: 'SORTED'; +KW_SOURCE: 'SOURCE'; +KW_START: 'START'; +KW_STATISTICS: 'STATISTICS'; +KW_STORED: 'STORED'; +KW_STRATIFY: 'STRATIFY'; +KW_STRING: 'STRING'; +KW_STRUCT: 'STRUCT'; +KW_SUBSTR: 'SUBSTR'; +KW_SUBSTRING: 'SUBSTRING'; +KW_SYNC: 'SYNC'; +KW_SYSTEM_TIME: 'SYSTEM_TIME'; +KW_SYSTEM_VERSION: 'SYSTEM_VERSION'; +KW_TABLE: 'TABLE'; +KW_TABLES: 'TABLES'; +KW_TABLESAMPLE: 'TABLESAMPLE'; +KW_TARGET: 'TARGET'; +KW_TBLPROPERTIES: 'TBLPROPERTIES'; +KW_TEMPORARY: 'TEMPORARY' | 'TEMP'; +KW_TERMINATED: 'TERMINATED'; +KW_THEN: 'THEN'; +KW_TIME: 'TIME'; +KW_TIMEDIFF: 'TIMEDIFF'; +KW_TIMESTAMP: 'TIMESTAMP'; +KW_TIMESTAMP_LTZ: 'TIMESTAMP_LTZ'; +KW_TIMESTAMP_NTZ: 'TIMESTAMP_NTZ'; +KW_TIMESTAMPADD: 'TIMESTAMPADD'; +KW_TIMESTAMPDIFF: 'TIMESTAMPDIFF'; +KW_TINYINT: 'TINYINT'; +KW_TO: 'TO'; +KW_TOUCH: 'TOUCH'; +KW_TRAILING: 'TRAILING'; +KW_TRANSACTION: 'TRANSACTION'; +KW_TRANSACTIONS: 'TRANSACTIONS'; +KW_TRANSFORM: 'TRANSFORM'; +KW_TRIM: 'TRIM'; +KW_TRUE: 'TRUE'; +KW_TRUNCATE: 'TRUNCATE'; +KW_TRY_CAST: 'TRY_CAST'; +KW_TYPE: 'TYPE'; +KW_UNARCHIVE: 'UNARCHIVE'; +KW_UNBOUNDED: 'UNBOUNDED'; +KW_UNCACHE: 'UNCACHE'; +KW_UNION: 'UNION'; +KW_UNIQUE: 'UNIQUE'; +KW_UNKNOWN: 'UNKNOWN'; +KW_UNLOCK: 'UNLOCK'; +KW_UNPIVOT: 'UNPIVOT'; +KW_UNSET: 'UNSET'; +KW_UPDATE: 'UPDATE'; +KW_USE: 'USE'; +KW_USER: 'USER'; +KW_USING: 'USING'; +KW_VALUES: 'VALUES'; +KW_VARCHAR: 'VARCHAR'; +KW_VAR: 'VAR'; +KW_VARIABLE: 'VARIABLE'; +KW_VERSION: 'VERSION'; +KW_VIEW: 'VIEW'; +KW_VIEWS: 'VIEWS'; +KW_VOID: 'VOID'; +KW_WEEK: 'WEEK'; +KW_WEEKS: 'WEEKS'; +KW_WHEN: 'WHEN'; +KW_WHERE: 'WHERE'; +KW_WINDOW: 'WINDOW'; +KW_WITH: 'WITH'; +KW_WITHIN: 'WITHIN'; +KW_YEAR: 'YEAR'; +KW_YEARS: 'YEARS'; +KW_ZONE: 'ZONE'; +//--SPARK-KEYWORD-LIST-END +//============================ +// End of the keywords list +//============================ + +EQ : '=' | '=='; +NSEQ: '<=>'; +NEQ : '<>'; +NEQJ: '!='; +LT : '<'; +LTE : '<=' | '!>'; +GT : '>'; +GTE : '>=' | '!<'; + +PLUS: '+'; +MINUS: '-'; +ASTERISK: '*'; +SLASH: '/'; +PERCENT: '%'; +TILDE: '~'; +AMPERSAND: '&'; +PIPE: '|'; +CONCAT_PIPE: '||'; +HAT: '^'; +COLON: ':'; +ARROW: '->'; +FAT_ARROW : '=>'; +HENT_START: '/*+'; +HENT_END: '*/'; +QUESTION: '?'; + +STRING_LITERAL + : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | 'R\'' (~'\'')* '\'' + | 'R"'(~'"')* '"' + ; + +DOUBLEQUOTED_STRING + :'"' ( ~('"'|'\\') | ('\\' .) )* '"' + ; + +// NOTE: If you move a numeric literal, you should modify `ParserUtils.toExprAlias()` +// which assumes all numeric literals are between `BIGINT_LITERAL` and `BIGDECIMAL_LITERAL`. + +BIGINT_LITERAL + : DIGIT+ 'L' + ; + +SMALLINT_LITERAL + : DIGIT+ 'S' + ; + +TINYINT_LITERAL + : DIGIT+ 'Y' + ; + +INTEGER_VALUE + : DIGIT+ + ; + +EXPONENT_VALUE + : DIGIT+ EXPONENT + | DECIMAL_DIGITS EXPONENT {isValidDecimal()}? + ; + +DECIMAL_VALUE + : DECIMAL_DIGITS {isValidDecimal()}? + ; + +FLOAT_LITERAL + : DIGIT+ EXPONENT? 'F' + | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}? + ; + +DOUBLE_LITERAL + : DIGIT+ EXPONENT? 'D' + | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? + ; + +BIGDECIMAL_LITERAL + : DIGIT+ EXPONENT? 'BD' + | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? + ; + +IDENTIFIER + : (LETTER | DIGIT | '_')+ + ; + +BACKQUOTED_IDENTIFIER + : '`' ( ~'`' | '``' )* '`' + ; + +fragment DECIMAL_DIGITS + : DIGIT+ '.' DIGIT* + | '.' DIGIT+ + ; + +fragment EXPONENT + : 'E' [+-]? DIGIT+ + ; + +fragment DIGIT + : [0-9] + ; + +fragment LETTER + : [A-Za-z] + ; + +SIMPLE_COMMENT + : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) + ; + +BRACKETED_COMMENT + : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN) + ; + +WS + : [ \r\n\t]+ -> channel(HIDDEN) + ; + +// Catch-all for anything we can't recognize. +// We use this to be able to ignore and recover all the text +// when splitting statements with DelimiterLexer +UNRECOGNIZED + : . + ; diff --git a/src/grammar/spark/SparkSqlParser.g4 b/src/grammar/spark/SparkSqlParser.g4 new file mode 100644 index 0000000..5542bca --- /dev/null +++ b/src/grammar/spark/SparkSqlParser.g4 @@ -0,0 +1,1892 @@ +// Grammar file from: https://github.com/apache/spark/blob/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. + */ + +parser grammar SparkSqlParser; + +options { tokenVocab = SparkSqlLexer; } + +@members { + /** + * When false, KW_INTERSECT is given the greater precedence over the other set + * operations (KW_UNION, KW_EXCEPT and MINUS) as per the SQL standard. + */ + public legacy_setops_precedence_enabled = false; + + /** + * When false, a literal with an exponent would be converted into + * double type rather than decimal type. + */ + public legacy_exponent_literal_as_decimal_enabled = false; + + /** + * When true, the behavior of keywords follows ANSI SQL standard. + */ + public SQL_standard_keyword_behavior = false; + + /** + * When true, double quoted literals are identifiers rather than STRINGs. + */ + public double_quoted_identifiers = false; +} + +program + : singleStatement* EOF + ; + +singleStatement + : statement SEMICOLON ? + ; + +tableIdentifierReference: identifierReference; +viewIdentifierReference: identifierReference; +functionIdentifierReference: identifierReference; +namespaceIdentifierReference: identifierReference; + +statement + : query + | ctes? dmlStatementNoWith + | KW_USE identifierReference + | KW_USE namespace namespaceIdentifierReference + | KW_SET KW_CATALOG (identifier | stringLit) + | KW_CREATE namespace (KW_IF KW_NOT KW_EXISTS)? namespaceIdentifierReference + (commentSpec | + locationSpec | + (KW_WITH (KW_DBPROPERTIES | KW_PROPERTIES) propertyList))* + | KW_ALTER namespace namespaceIdentifierReference + KW_SET (KW_DBPROPERTIES | KW_PROPERTIES) propertyList + | KW_ALTER namespace namespaceIdentifierReference + KW_SET locationSpec + | KW_DROP namespace (KW_IF KW_EXISTS)? namespaceIdentifierReference + (KW_RESTRICT | KW_CASCADE)? + | KW_SHOW namespaces ((KW_FROM | KW_IN) multipartIdentifier)? + (KW_LIKE? pattern=stringLit)? + | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? + createTableClauses + (KW_AS? query)? + | KW_CREATE KW_TABLE (KW_IF KW_NOT KW_EXISTS)? target=tableIdentifier + KW_LIKE source=tableIdentifier + (tableProvider | + rowFormat | + createFileFormat | + locationSpec | + (KW_TBLPROPERTIES tableProps=propertyList))* + | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? + createTableClauses + (KW_AS? query)? + | KW_ANALYZE KW_TABLE tableIdentifierReference partitionSpec? KW_COMPUTE KW_STATISTICS + (identifier | KW_FOR KW_COLUMNS identifierSeq | KW_FOR KW_ALL KW_COLUMNS)? + | KW_ANALYZE KW_TABLES ((KW_FROM | KW_IN) tableIdentifierReference)? KW_COMPUTE KW_STATISTICS + (identifier)? + | KW_ALTER KW_TABLE tableIdentifierReference + KW_ADD (KW_COLUMN | KW_COLUMNS) + qualifiedColTypeWithPositionList + | KW_ALTER KW_TABLE tableIdentifierReference + KW_ADD (KW_COLUMN | KW_COLUMNS) + LEFT_PAREN qualifiedColTypeWithPositionList RIGHT_PAREN + | KW_ALTER KW_TABLE table=tableIdentifierReference + KW_RENAME KW_COLUMN + multipartIdentifier KW_TO errorCapturingIdentifier + | KW_ALTER KW_TABLE tableIdentifierReference + KW_DROP (KW_COLUMN | KW_COLUMNS) (KW_IF KW_EXISTS)? + LEFT_PAREN multipartIdentifierList RIGHT_PAREN + | KW_ALTER KW_TABLE tableIdentifierReference + KW_DROP (KW_COLUMN | KW_COLUMNS) (KW_IF KW_EXISTS)? + multipartIdentifierList + | KW_ALTER (KW_TABLE | KW_VIEW) (tableIdentifierReference | viewIdentifierReference) + KW_RENAME KW_TO multipartIdentifier + | KW_ALTER (KW_TABLE | KW_VIEW) (tableIdentifierReference | viewIdentifierReference) + KW_SET KW_TBLPROPERTIES propertyList + | KW_ALTER (KW_TABLE | KW_VIEW) (tableIdentifierReference | viewIdentifierReference) + KW_UNSET KW_TBLPROPERTIES (KW_IF KW_EXISTS)? propertyList + | KW_ALTER KW_TABLE table=tableIdentifierReference + (KW_ALTER | KW_CHANGE) KW_COLUMN? column=multipartIdentifier + alterColumnAction? + | KW_ALTER KW_TABLE table=tableIdentifierReference partitionSpec? + KW_CHANGE KW_COLUMN? + colName=multipartIdentifier colType colPosition? + | KW_ALTER KW_TABLE table=tableIdentifierReference partitionSpec? + KW_REPLACE KW_COLUMNS + LEFT_PAREN qualifiedColTypeWithPositionList + RIGHT_PAREN + | KW_ALTER KW_TABLE tableIdentifierReference (partitionSpec)? + KW_SET KW_SERDE stringLit (KW_WITH KW_SERDEPROPERTIES propertyList)? + | KW_ALTER KW_TABLE tableIdentifierReference (partitionSpec)? + KW_SET KW_SERDEPROPERTIES propertyList + | KW_ALTER (KW_TABLE | KW_VIEW) (tableIdentifierReference | viewIdentifierReference) KW_ADD (KW_IF KW_NOT KW_EXISTS)? + partitionSpecLocation+ + | KW_ALTER KW_TABLE tableIdentifierReference + partitionSpec KW_RENAME KW_TO partitionSpec + | KW_ALTER (KW_TABLE | KW_VIEW) (tableIdentifierReference | viewIdentifierReference) + KW_DROP (KW_IF KW_EXISTS)? partitionSpec (COMMA partitionSpec)* KW_PURGE? + | KW_ALTER KW_TABLE tableIdentifierReference + (partitionSpec)? KW_SET locationSpec + | KW_ALTER KW_TABLE tableIdentifierReference KW_RECOVER KW_PARTITIONS + | KW_DROP KW_TABLE (KW_IF KW_EXISTS)? tableIdentifierReference KW_PURGE? + | KW_DROP KW_VIEW (KW_IF KW_EXISTS)? viewIdentifierReference + | KW_CREATE (KW_OR KW_REPLACE)? (KW_GLOBAL? KW_TEMPORARY)? + KW_VIEW (KW_IF KW_NOT KW_EXISTS)? viewIdentifierReference + identifierCommentList? + (commentSpec | + (KW_PARTITIONED KW_ON identifierList) | + (KW_TBLPROPERTIES propertyList))* + KW_AS query + | KW_CREATE (KW_OR KW_REPLACE)? KW_GLOBAL? KW_TEMPORARY KW_VIEW + tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider + (KW_OPTIONS propertyList)? + | KW_ALTER KW_VIEW viewIdentifierReference KW_AS? query + | KW_CREATE (KW_OR KW_REPLACE)? KW_TEMPORARY? KW_FUNCTION (KW_IF KW_NOT KW_EXISTS)? + functionIdentifierReference KW_AS className=stringLit + (KW_USING resource (COMMA resource)*)? + | KW_DROP KW_TEMPORARY? KW_FUNCTION (KW_IF KW_EXISTS)? functionIdentifierReference + | KW_DECLARE (KW_OR KW_REPLACE)? KW_VARIABLE? + functionIdentifierReference dataType? variableDefaultExpression? + | KW_DROP KW_TEMPORARY KW_VARIABLE (KW_IF KW_EXISTS)? identifierReference + | KW_EXPLAIN (KW_LOGICAL | KW_FORMATTED | KW_EXTENDED | KW_CODEGEN | KW_COST)? + statement + | KW_SHOW KW_TABLES ((KW_FROM | KW_IN) tableIdentifierReference)? + (KW_LIKE? pattern=stringLit)? + | KW_SHOW KW_TABLE KW_EXTENDED ((KW_FROM | KW_IN) ns=tableIdentifierReference)? + KW_LIKE pattern=stringLit partitionSpec? + | KW_SHOW KW_TBLPROPERTIES table=tableIdentifierReference + (LEFT_PAREN key=propertyKey RIGHT_PAREN)? + | KW_SHOW KW_COLUMNS (KW_FROM | KW_IN) table=tableIdentifierReference + ((KW_FROM | KW_IN) multipartIdentifier)? + | KW_SHOW KW_VIEWS ((KW_FROM | KW_IN) viewIdentifierReference)? + (KW_LIKE? pattern=stringLit)? + | KW_SHOW KW_PARTITIONS identifierReference partitionSpec? + | KW_SHOW identifier? KW_FUNCTIONS ((KW_FROM | KW_IN) ns=tableIdentifierReference)? + (KW_LIKE? (legacy=multipartIdentifier | pattern=stringLit))? + | KW_SHOW KW_CREATE KW_TABLE tableIdentifierReference (KW_AS KW_SERDE)? + | KW_SHOW KW_CURRENT namespace + | KW_SHOW KW_CATALOGS (KW_LIKE? pattern=stringLit)? + | (KW_DESC | KW_DESCRIBE) KW_FUNCTION KW_EXTENDED? describeFuncName + | (KW_DESC | KW_DESCRIBE) namespace KW_EXTENDED? + namespaceIdentifierReference + | (KW_DESC | KW_DESCRIBE) KW_TABLE? option=(KW_EXTENDED | KW_FORMATTED)? + tableIdentifierReference partitionSpec? describeColName? + | (KW_DESC | KW_DESCRIBE) KW_QUERY? query + | KW_COMMENT KW_ON namespace namespaceIdentifierReference KW_IS + comment + | KW_COMMENT KW_ON KW_TABLE tableIdentifierReference KW_IS comment + | KW_REFRESH KW_TABLE tableIdentifierReference + | KW_REFRESH KW_FUNCTION functionIdentifierReference + | KW_REFRESH (stringLit | .*?) + | KW_CACHE KW_LAZY? KW_TABLE tableIdentifierReference + (KW_OPTIONS options=propertyList)? (KW_AS? query)? + | KW_UNCACHE KW_TABLE (KW_IF KW_EXISTS)? tableIdentifierReference + | KW_CLEAR KW_CACHE + | KW_LOAD KW_DATA KW_LOCAL? KW_INPATH path=stringLit KW_OVERWRITE? KW_INTO KW_TABLE + tableIdentifierReference partitionSpec? + | KW_TRUNCATE KW_TABLE tableIdentifierReference partitionSpec? + | (KW_MSCK)? KW_REPAIR KW_TABLE tableIdentifierReference + (option=(KW_ADD|KW_DROP|KW_SYNC) KW_PARTITIONS)? + | op=(KW_ADD | KW_LIST) identifier .*? + | KW_SET KW_ROLE .*? + | KW_SET KW_TIME KW_ZONE interval + | KW_SET KW_TIME KW_ZONE timezone + | KW_SET KW_TIME KW_ZONE .*? + | KW_SET (KW_VARIABLE | KW_VAR) assignmentList + | KW_SET (KW_VARIABLE | KW_VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ + LEFT_PAREN query RIGHT_PAREN + | KW_SET configKey EQ configValue + | KW_SET configKey (EQ .*?)? + | KW_SET .*? EQ configValue + | KW_SET .*? + | KW_RESET configKey + | KW_RESET .*? + | KW_CREATE KW_INDEX (KW_IF KW_NOT KW_EXISTS)? identifier KW_ON KW_TABLE? + tableIdentifierReference (KW_USING indexType=identifier)? + LEFT_PAREN multipartIdentifierPropertyList RIGHT_PAREN + (KW_OPTIONS options=propertyList)? + | KW_DROP KW_INDEX (KW_IF KW_EXISTS)? identifier KW_ON KW_TABLE? tableIdentifierReference + | unsupportedHiveNativeCommands .*? + ; + +timezone + : stringLit + | KW_LOCAL + ; + +configKey + : quotedIdentifier + ; + +configValue + : backQuotedIdentifier + ; + +unsupportedHiveNativeCommands + : kw1=KW_CREATE kw2=KW_ROLE + | kw1=KW_DROP kw2=KW_ROLE + | kw1=KW_GRANT kw2=KW_ROLE? + | kw1=KW_REVOKE kw2=KW_ROLE? + | kw1=KW_SHOW kw2=KW_GRANT + | kw1=KW_SHOW kw2=KW_ROLE kw3=KW_GRANT? + | kw1=KW_SHOW kw2=KW_PRINCIPALS + | kw1=KW_SHOW kw2=KW_ROLES + | kw1=KW_SHOW kw2=KW_CURRENT kw3=KW_ROLES + | kw1=KW_EXPORT kw2=KW_TABLE + | kw1=KW_IMPORT kw2=KW_TABLE + | kw1=KW_SHOW kw2=KW_COMPACTIONS + | kw1=KW_SHOW kw2=KW_CREATE kw3=KW_TABLE + | kw1=KW_SHOW kw2=KW_TRANSACTIONS + | kw1=KW_SHOW kw2=KW_INDEXES + | kw1=KW_SHOW kw2=KW_LOCKS + | kw1=KW_CREATE kw2=KW_INDEX + | kw1=KW_DROP kw2=KW_INDEX + | kw1=KW_ALTER kw2=KW_INDEX + | kw1=KW_LOCK kw2=KW_TABLE + | kw1=KW_LOCK kw2=KW_DATABASE + | kw1=KW_UNLOCK kw2=KW_TABLE + | kw1=KW_UNLOCK kw2=KW_DATABASE + | kw1=KW_CREATE kw2=KW_TEMPORARY kw3=KW_MACRO + | kw1=KW_DROP kw2=KW_TEMPORARY kw3=KW_MACRO + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_NOT kw4=KW_CLUSTERED + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_CLUSTERED kw4=KW_BY + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_NOT kw4=KW_SORTED + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_SKEWED kw4=KW_BY + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_NOT kw4=KW_SKEWED + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_NOT kw4=KW_STORED kw5=KW_AS kw6=KW_DIRECTORIES + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_SET kw4=KW_SKEWED kw5=KW_LOCATION + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_EXCHANGE kw4=KW_PARTITION + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_ARCHIVE kw4=KW_PARTITION + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_UNARCHIVE kw4=KW_PARTITION + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier kw3=KW_TOUCH + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier partitionSpec? kw3=KW_COMPACT + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier partitionSpec? kw3=KW_CONCATENATE + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier partitionSpec? kw3=KW_SET kw4=KW_FILEFORMAT + | kw1=KW_ALTER kw2=KW_TABLE tableIdentifier partitionSpec? kw3=KW_REPLACE kw4=KW_COLUMNS + | kw1=KW_START kw2=KW_TRANSACTION + | kw1=KW_COMMIT + | kw1=KW_ROLLBACK + | kw1=KW_DFS + ; + +createTableHeader + : KW_CREATE KW_TEMPORARY? KW_EXTERNAL? KW_TABLE (KW_IF KW_NOT KW_EXISTS)? tableIdentifierReference + ; + +replaceTableHeader + : (KW_CREATE KW_OR)? KW_REPLACE KW_TABLE tableIdentifierReference + ; + +bucketSpec + : KW_CLUSTERED KW_BY identifierList + (KW_SORTED KW_BY orderedIdentifierList)? + KW_INTO INTEGER_VALUE KW_BUCKETS + ; + +skewSpec + : KW_SKEWED KW_BY identifierList + KW_ON (constantList | nestedConstantList) + (KW_STORED KW_AS KW_DIRECTORIES)? + ; + +locationSpec + : KW_LOCATION stringLit + ; + +commentSpec + : KW_COMMENT stringLit + ; + +query + : ctes? queryTerm queryOrganization + ; + +insertInto + : KW_INSERT KW_OVERWRITE KW_TABLE? tableIdentifierReference (partitionSpec (KW_IF KW_NOT KW_EXISTS)?)? ((KW_BY KW_NAME) | identifierList)? + | KW_INSERT KW_INTO KW_TABLE? tableIdentifierReference partitionSpec? (KW_IF KW_NOT KW_EXISTS)? ((KW_BY KW_NAME) | identifierList)? + | KW_INSERT KW_INTO KW_TABLE? tableIdentifierReference KW_REPLACE whereClause + | KW_INSERT KW_OVERWRITE KW_LOCAL? KW_DIRECTORY path=stringLit rowFormat? createFileFormat? + | KW_INSERT KW_OVERWRITE KW_LOCAL? KW_DIRECTORY (path=stringLit)? tableProvider (KW_OPTIONS options=propertyList)? + ; + +partitionSpecLocation + : partitionSpec locationSpec? + ; + +partitionSpec + : KW_PARTITION LEFT_PAREN partitionVal (COMMA partitionVal)* RIGHT_PAREN + ; + +partitionVal + : identifier (EQ constant)? + | identifier EQ KW_DEFAULT + ; + +namespace + : KW_NAMESPACE + | KW_DATABASE + | KW_SCHEMA + ; + +namespaces + : KW_NAMESPACES + | KW_DATABASES + | KW_SCHEMAS + ; + +describeFuncName + : identifierReference + | stringLit + | comparisonOperator + | arithmeticOperator + | predicateOperator + ; + +describeColName + : nameParts+=identifier (DOT nameParts+=identifier)* + ; + +ctes + : KW_WITH namedQuery (COMMA namedQuery)* + ; + +namedQuery + : name=errorCapturingIdentifier (columnAliases=identifierList)? KW_AS? LEFT_PAREN query RIGHT_PAREN + ; + +tableProvider + : KW_USING multipartIdentifier + ; + +createTableClauses + :((KW_OPTIONS options=expressionPropertyList) | + (KW_PARTITIONED KW_BY partitioning=partitionFieldList) | + skewSpec | + bucketSpec | + rowFormat | + createFileFormat | + locationSpec | + commentSpec | + (KW_TBLPROPERTIES tableProps=propertyList))* + ; + +propertyList + : LEFT_PAREN property (COMMA property)* RIGHT_PAREN + ; + +property + : key=propertyKey (EQ? value=propertyValue)? + ; + +propertyKey + : identifier (DOT identifier)* + | stringLit + ; + +propertyValue + : INTEGER_VALUE + | DECIMAL_VALUE + | booleanValue + | stringLit + ; + +expressionPropertyList + : LEFT_PAREN expressionProperty (COMMA expressionProperty)* RIGHT_PAREN + ; + +expressionProperty + : key=propertyKey (EQ? value=expression)? + ; + +constantList + : LEFT_PAREN constant (COMMA constant)* RIGHT_PAREN + ; + +nestedConstantList + : LEFT_PAREN constantList (COMMA constantList)* RIGHT_PAREN + ; + +createFileFormat + : KW_STORED KW_AS fileFormat + | KW_STORED KW_BY storageHandler + ; + +fileFormat + : KW_INPUTFORMAT inFmt=stringLit KW_OUTPUTFORMAT outFmt=stringLit + | identifier + ; + +storageHandler + : stringLit (KW_WITH KW_SERDEPROPERTIES propertyList)? + ; + +resource + : identifier stringLit + ; + +dmlStatementNoWith + : insertInto query + | fromClause multiInsertQueryBody+ + | KW_DELETE KW_FROM identifierReference tableAlias whereClause? + | KW_UPDATE identifierReference tableAlias setClause whereClause? + | KW_MERGE KW_INTO target=identifierReference targetAlias=tableAlias + KW_USING (source=identifierReference | + LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias + KW_ON mergeCondition=booleanExpression + matchedClause* + notMatchedClause* + notMatchedBySourceClause* + ; + +identifierReference + : KW_IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + | multipartIdentifier + ; + +queryOrganization + : (KW_ORDER KW_BY order+=sortItem (COMMA order+=sortItem)*)? + (KW_CLUSTER KW_BY clusterBy+=expression (COMMA clusterBy+=expression)*)? + (KW_DISTRIBUTE KW_BY distributeBy+=expression (COMMA distributeBy+=expression)*)? + (KW_SORT KW_BY sort+=sortItem (COMMA sort+=sortItem)*)? + windowClause? + (KW_LIMIT (KW_ALL | limit=expression))? + (KW_OFFSET offset=expression)? + ; + +multiInsertQueryBody + : insertInto fromStatementBody + ; + +queryTerm + : queryPrimary + | left=queryTerm {this.legacy_setops_precedence_enabled}? + operator=(KW_INTERSECT | KW_UNION | KW_EXCEPT | KW_SETMINUS) setQuantifier? right=queryTerm + | left=queryTerm {!this.legacy_setops_precedence_enabled}? + operator=KW_INTERSECT setQuantifier? right=queryTerm + | left=queryTerm {!this.legacy_setops_precedence_enabled}? + operator=(KW_UNION | KW_EXCEPT | KW_SETMINUS) setQuantifier? right=queryTerm + ; + +queryPrimary + : querySpecification + | fromStatement + | KW_TABLE tableIdentifierReference + | inlineTable + | LEFT_PAREN query RIGHT_PAREN + ; + +sortItem + : expression ordering=(KW_ASC | KW_DESC)? (KW_NULLS nullOrder=(KW_LAST | KW_FIRST))? + ; + +fromStatement + : fromClause fromStatementBody+ + ; + +fromStatementBody + : transformClause + whereClause? + queryOrganization + | selectClause + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? + queryOrganization + ; + +querySpecification + : transformClause + fromClause? + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? + | selectClause + fromClause? + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? + ; + +transformClause + : (KW_SELECT kind=KW_TRANSFORM LEFT_PAREN setQuantifier? expressionSeq RIGHT_PAREN + | kind=KW_MAP setQuantifier? expressionSeq + | kind=KW_REDUCE setQuantifier? expressionSeq) + inRowFormat=rowFormat? + (KW_RECORDWRITER recordWriter=stringLit)? + KW_USING script=stringLit + (KW_AS (identifierSeq | colTypeList | (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)))? + outRowFormat=rowFormat? + (KW_RECORDREADER recordReader=stringLit)? + ; + +selectClause + : KW_SELECT (hints+=hint)* setQuantifier? namedExpressionSeq + ; + +setClause + : KW_SET assignmentList + ; + +matchedClause + : KW_WHEN KW_MATCHED (KW_AND matchedCond=booleanExpression)? KW_THEN matchedAction + ; +notMatchedClause + : KW_WHEN KW_NOT KW_MATCHED (KW_BY KW_TARGET)? (KW_AND notMatchedCond=booleanExpression)? KW_THEN notMatchedAction + ; + +notMatchedBySourceClause + : KW_WHEN KW_NOT KW_MATCHED KW_BY KW_SOURCE (KW_AND notMatchedBySourceCond=booleanExpression)? KW_THEN notMatchedBySourceAction + ; + +matchedAction + : KW_DELETE + | KW_UPDATE KW_SET ASTERISK + | KW_UPDATE KW_SET assignmentList + ; + +notMatchedAction + : KW_INSERT ASTERISK + | KW_INSERT LEFT_PAREN multipartIdentifierList RIGHT_PAREN + KW_VALUES LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + ; + +notMatchedBySourceAction + : KW_DELETE + | KW_UPDATE KW_SET assignmentList + ; + +assignmentList + : assignment (COMMA assignment)* + ; + +assignment + : key=multipartIdentifier EQ value=expression + ; + +whereClause + : KW_WHERE booleanExpression + ; + +havingClause + : KW_HAVING booleanExpression + ; + +hint + : HENT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HENT_END + ; + +hintStatement + : hintName=identifier + | hintName=identifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN + ; + +fromClause + : KW_FROM relation (COMMA relation)* lateralView* pivotClause? unpivotClause? + ; + +temporalClause + : KW_FOR? (KW_SYSTEM_VERSION | KW_VERSION) KW_AS KW_OF version + | KW_FOR? (KW_SYSTEM_TIME | KW_TIMESTAMP) KW_AS KW_OF timestamp=valueExpression + ; + +aggregationClause + : KW_GROUP KW_BY groupingExpressionsWithGroupingAnalytics+=groupByClause + (COMMA groupingExpressionsWithGroupingAnalytics+=groupByClause)* + | KW_GROUP KW_BY groupingExpressions+=expression (COMMA groupingExpressions+=expression)* ( + KW_WITH kind=KW_ROLLUP + | KW_WITH kind=KW_CUBE + | kind=KW_GROUPING KW_SETS LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN)? + ; + +groupByClause + : groupingAnalytics + | expression + ; + +groupingAnalytics + : (KW_ROLLUP | KW_CUBE) LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN + | KW_GROUPING KW_SETS LEFT_PAREN groupingElement (COMMA groupingElement)* RIGHT_PAREN + ; + +groupingElement + : groupingAnalytics + | groupingSet + ; + +groupingSet + : LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN + | expression + ; + +pivotClause + : KW_PIVOT LEFT_PAREN aggregates=namedExpressionSeq KW_FOR pivotColumn KW_IN LEFT_PAREN pivotValues+=pivotValue (COMMA pivotValues+=pivotValue)* RIGHT_PAREN RIGHT_PAREN + ; + +pivotColumn + : identifiers+=identifier + | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN + ; + +pivotValue + : expression (KW_AS? identifier)? + ; + +unpivotClause + : KW_UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN + operator=unpivotOperator + RIGHT_PAREN (KW_AS? identifier)? + ; + +unpivotNullClause + : (KW_INCLUDE | KW_EXCLUDE) KW_NULLS + ; + +unpivotOperator + : (unpivotSingleValueColumnClause | unpivotMultiValueColumnClause) + ; + +unpivotSingleValueColumnClause + : unpivotValueColumn KW_FOR unpivotNameColumn KW_IN LEFT_PAREN unpivotColumns+=unpivotColumnAndAlias (COMMA unpivotColumns+=unpivotColumnAndAlias)* RIGHT_PAREN + ; + +unpivotMultiValueColumnClause + : LEFT_PAREN unpivotValueColumns+=unpivotValueColumn (COMMA unpivotValueColumns+=unpivotValueColumn)* RIGHT_PAREN + KW_FOR unpivotNameColumn + KW_IN LEFT_PAREN unpivotColumnSets+=unpivotColumnSet (COMMA unpivotColumnSets+=unpivotColumnSet)* RIGHT_PAREN + ; + +unpivotColumnSet + : LEFT_PAREN unpivotColumns+=unpivotColumn (COMMA unpivotColumns+=unpivotColumn)* RIGHT_PAREN unpivotAlias? + ; + +unpivotValueColumn + : identifier + ; + +unpivotNameColumn + : identifier + ; + +unpivotColumnAndAlias + : unpivotColumn unpivotAlias? + ; + +unpivotColumn + : multipartIdentifier + ; + +unpivotAlias + : KW_AS? identifier + ; + +lateralView + : KW_LATERAL KW_VIEW (KW_OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (KW_AS? colName+=identifier (COMMA colName+=identifier)*)? + ; + +setQuantifier + : KW_DISTINCT + | KW_ALL + ; + +relation + : KW_LATERAL? relationPrimary relationExtension* + ; + +relationExtension + : joinRelation + | pivotClause + | unpivotClause + ; + +joinRelation + : (joinType) KW_JOIN KW_LATERAL? right=relationPrimary joinCriteria? + | KW_NATURAL joinType KW_JOIN KW_LATERAL? right=relationPrimary + ; + +joinType + : KW_INNER? + | KW_CROSS + | KW_LEFT KW_OUTER? + | KW_LEFT? KW_SEMI + | KW_RIGHT KW_OUTER? + | KW_FULL KW_OUTER? + | KW_LEFT? KW_ANTI + ; + +joinCriteria + : KW_ON booleanExpression + | KW_USING identifierList + ; + +sample + : KW_TABLESAMPLE LEFT_PAREN sampleMethod? RIGHT_PAREN (KW_REPEATABLE LEFT_PAREN seed=INTEGER_VALUE RIGHT_PAREN)? + ; + +sampleMethod + : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) KW_PERCENTLIT + | expression KW_ROWS + | sampleType=KW_BUCKET numerator=INTEGER_VALUE KW_OUT KW_OF denominator=INTEGER_VALUE + (KW_ON (identifier | qualifiedName LEFT_PAREN RIGHT_PAREN))? + | bytes=expression + ; + +identifierList + : LEFT_PAREN identifierSeq RIGHT_PAREN + ; + +identifierSeq + : ident+=errorCapturingIdentifier (COMMA ident+=errorCapturingIdentifier)* + ; + +orderedIdentifierList + : LEFT_PAREN orderedIdentifier (COMMA orderedIdentifier)* RIGHT_PAREN + ; + +orderedIdentifier + : ident=errorCapturingIdentifier ordering=(KW_ASC | KW_DESC)? + ; + +identifierCommentList + : LEFT_PAREN identifierComment (COMMA identifierComment)* RIGHT_PAREN + ; + +identifierComment + : identifier commentSpec? + ; + +relationPrimary + : identifierReference temporalClause? + sample? tableAlias + | LEFT_PAREN query RIGHT_PAREN sample? tableAlias + | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias + | inlineTable + | functionTable + ; + +inlineTable + : KW_VALUES expression (COMMA expression)* tableAlias + ; + +functionTableSubqueryArgument + : KW_TABLE tableIdentifierReference tableArgumentPartitioning? + | KW_TABLE LEFT_PAREN tableIdentifierReference RIGHT_PAREN tableArgumentPartitioning? + | KW_TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning? + ; + +tableArgumentPartitioning + : ((KW_WITH KW_SINGLE KW_PARTITION) + | ((KW_PARTITION | KW_DISTRIBUTE) KW_BY + (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN)) + | partition+=expression))) + ((KW_ORDER | KW_SORT) KW_BY + (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) + | sortItem)))? + ; + +functionTableNamedArgumentExpression + : key=identifier FAT_ARROW table=functionTableSubqueryArgument + ; + +functionTableReferenceArgument + : functionTableSubqueryArgument + | functionTableNamedArgumentExpression + ; + +functionTableArgument + : functionTableReferenceArgument + | functionArgument + ; + +functionTable + : funcName=functionName LEFT_PAREN + (functionTableArgument (COMMA functionTableArgument)*)? + RIGHT_PAREN tableAlias + ; + +tableAlias + : (KW_AS? strictIdentifier identifierList?)? + ; + +rowFormat + : KW_ROW KW_FORMAT KW_SERDE name=stringLit (KW_WITH KW_SERDEPROPERTIES props=propertyList)? + | KW_ROW KW_FORMAT KW_DELIMITED + (KW_FIELDS KW_TERMINATED KW_BY fieldsTerminatedBy=stringLit (KW_ESCAPED KW_BY escapedBy=stringLit)?)? + (KW_COLLECTION KW_ITEMS KW_TERMINATED KW_BY collectionItemsTerminatedBy=stringLit)? + (KW_MAP KW_KEYS KW_TERMINATED KW_BY keysTerminatedBy=stringLit)? + (KW_LINES KW_TERMINATED KW_BY linesSeparatedBy=stringLit)? + (KW_NULL KW_DEFINED KW_AS nullDefinedAs=stringLit)? + ; + +multipartIdentifierList + : multipartIdentifier (COMMA multipartIdentifier)* + ; + +multipartIdentifier + : parts+=errorCapturingIdentifier (DOT parts+=errorCapturingIdentifier)* + ; + +multipartIdentifierPropertyList + : multipartIdentifierProperty (COMMA multipartIdentifierProperty)* + ; + +multipartIdentifierProperty + : multipartIdentifier (KW_OPTIONS options=propertyList)? + ; + +tableIdentifier + : (db=errorCapturingIdentifier DOT)? table=errorCapturingIdentifier + ; + +functionIdentifier + : (db=errorCapturingIdentifier DOT)? function=errorCapturingIdentifier + ; + +namedExpression + : expression (KW_AS? (name=errorCapturingIdentifier | identifierList))? + ; + +namedExpressionSeq + : namedExpression (COMMA namedExpression)* + ; + +partitionFieldList + : LEFT_PAREN fields+=partitionField (COMMA fields+=partitionField)* RIGHT_PAREN + ; + +partitionField + : transform + | colType + ; + +transform + : qualifiedName + | transformName=identifier + LEFT_PAREN transformArgument (COMMA transformArgument)* RIGHT_PAREN + ; + +transformArgument + : qualifiedName + | constant + ; + +expression + : booleanExpression + ; + +namedArgumentExpression + : key=identifier FAT_ARROW value=expression + ; + +functionArgument + : expression + | namedArgumentExpression + ; + +expressionSeq + : expression (COMMA expression)* + ; + +booleanExpression + : KW_NOT booleanExpression + | KW_EXISTS LEFT_PAREN query RIGHT_PAREN + | valueExpression predicate? + | left=booleanExpression operator=KW_AND right=booleanExpression + | left=booleanExpression operator=KW_OR right=booleanExpression + ; + +predicate + : KW_NOT? kind=KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression + | KW_NOT? kind=KW_IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | KW_NOT? kind=KW_IN LEFT_PAREN query RIGHT_PAREN + | KW_NOT? kind=KW_RLIKE pattern=valueExpression + | KW_NOT? kind=(KW_LIKE | KW_ILIKE) quantifier=(KW_ANY | KW_SOME | KW_ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) + | KW_NOT? kind=(KW_LIKE | KW_ILIKE) pattern=valueExpression (KW_ESCAPE escapeChar=stringLit)? + | KW_IS KW_NOT? kind=KW_NULL + | KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE | KW_UNKNOWN) + | KW_IS KW_NOT? kind=KW_DISTINCT KW_FROM right=valueExpression + ; + +valueExpression + : primaryExpression + | operator=(MINUS | PLUS | TILDE) valueExpression + | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | KW_DIV) right=valueExpression + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression + | left=valueExpression operator=AMPERSAND right=valueExpression + | left=valueExpression operator=HAT right=valueExpression + | left=valueExpression operator=PIPE right=valueExpression + | left=valueExpression comparisonOperator right=valueExpression + ; + +datetimeUnit + : KW_YEAR | KW_QUARTER | KW_MONTH + | KW_WEEK | KW_DAY | KW_DAYOFYEAR + | KW_HOUR | KW_MINUTE | KW_SECOND | KW_MILLISECOND | KW_MICROSECOND + ; + +primaryExpression + : name=(KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_CURRENT_USER | KW_USER | KW_SESSION_USER) + | name=(KW_TIMESTAMPADD | KW_DATEADD | KW_DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN + | name=(KW_TIMESTAMPDIFF | KW_DATEDIFF | KW_DATE_DIFF | KW_TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN + | KW_CASE whenClause+ (KW_ELSE elseExpression=expression)? KW_END + | KW_CASE expression whenClause+ (KW_ELSE elseExpression=expression)? KW_END + | name=(KW_CAST | KW_TRY_CAST) LEFT_PAREN expression KW_AS dataType RIGHT_PAREN + | KW_STRUCT LEFT_PAREN (namedExpression (COMMA namedExpression)*)? RIGHT_PAREN + | KW_FIRST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN + | KW_ANY_VALUE LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN + | KW_LAST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN + | KW_POSITION LEFT_PAREN substr=valueExpression KW_IN str=valueExpression RIGHT_PAREN + | constant + | ASTERISK + | qualifiedName DOT ASTERISK + | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN + | LEFT_PAREN query RIGHT_PAREN + | KW_IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + | functionName LEFT_PAREN (setQuantifier? functionArgument + (COMMA functionArgument)*)? RIGHT_PAREN + (KW_FILTER LEFT_PAREN KW_WHERE where=booleanExpression RIGHT_PAREN)? + (nullsOption=(KW_IGNORE | KW_RESPECT) KW_NULLS)? ( KW_OVER windowSpec)? + | identifier ARROW expression + | LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression + | value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET + | identifier + | base=primaryExpression DOT fieldName=identifier + | LEFT_PAREN expression RIGHT_PAREN + | KW_EXTRACT LEFT_PAREN field=identifier KW_FROM source=valueExpression RIGHT_PAREN + | (KW_SUBSTR | KW_SUBSTRING) LEFT_PAREN str=valueExpression (KW_FROM | COMMA) pos=valueExpression + ((KW_FOR | COMMA) len=valueExpression)? RIGHT_PAREN + | KW_TRIM LEFT_PAREN trimOption=(KW_BOTH | KW_LEADING | KW_TRAILING)? (trimStr=valueExpression)? + KW_FROM srcStr=valueExpression RIGHT_PAREN + | KW_OVERLAY LEFT_PAREN input=valueExpression KW_PLACING replace=valueExpression + KW_FROM position=valueExpression (KW_FOR length=valueExpression)? RIGHT_PAREN + | name=(KW_PERCENTILE_CONT | KW_PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN + KW_WITHIN KW_GROUP LEFT_PAREN KW_ORDER KW_BY sortItem RIGHT_PAREN + (KW_FILTER LEFT_PAREN KW_WHERE where=booleanExpression RIGHT_PAREN)? ( KW_OVER windowSpec)? + ; + +literalType + : KW_DATE + | KW_TIMESTAMP | KW_TIMESTAMP_LTZ | KW_TIMESTAMP_NTZ + | KW_INTERVAL + | KW_BINARY_HEX + | unsupportedType=identifier + ; + +constant + : KW_NULL + | QUESTION + | COLON identifier + | interval + | literalType stringLit + | number + | booleanValue + | stringLit+ + ; + +comparisonOperator + : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ + ; + +arithmeticOperator + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | KW_DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT + ; + +predicateOperator + : KW_OR | KW_AND | KW_IN | KW_NOT + ; + +booleanValue + : KW_TRUE | KW_FALSE + ; + +interval + : KW_INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + ; + +errorCapturingMultiUnitsInterval + : body=multiUnitsInterval unitToUnitInterval? + ; + +multiUnitsInterval + : (intervalValue unit+=unitInMultiUnits)+ + ; + +errorCapturingUnitToUnitInterval + : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? + ; + +unitToUnitInterval + : value=intervalValue unitInUnitToUnit KW_TO unitInUnitToUnit + ; + +intervalValue + : (PLUS | MINUS)? + (INTEGER_VALUE | DECIMAL_VALUE | stringLit) + ; + +unitInMultiUnits + : KW_NANOSECOND | KW_NANOSECONDS | KW_MICROSECOND | KW_MICROSECONDS | KW_MILLISECOND | KW_MILLISECONDS + | KW_SECOND | KW_SECONDS | KW_MINUTE | KW_MINUTES | KW_HOUR | KW_HOURS | KW_DAY | KW_DAYS | KW_WEEK | KW_WEEKS + | KW_MONTH | KW_MONTHS | KW_YEAR | KW_YEARS + ; + +unitInUnitToUnit + : KW_SECOND | KW_MINUTE | KW_HOUR | KW_DAY | KW_MONTH | KW_YEAR + ; + +colPosition + : position=KW_FIRST | position=KW_AFTER afterCol=errorCapturingIdentifier + ; + +type + : KW_BOOLEAN + | KW_TINYINT | KW_BYTE + | KW_SMALLINT | KW_SHORT + | KW_INT | KW_INTEGER + | KW_BIGINT | KW_LONG + | KW_FLOAT | KW_REAL + | KW_DOUBLE + | KW_DATE + | KW_TIMESTAMP | KW_TIMESTAMP_NTZ | KW_TIMESTAMP_LTZ + | KW_STRING + | KW_CHARACTER | KW_CHAR + | KW_VARCHAR + | KW_BINARY + | KW_DECIMAL | KW_DEC | KW_NUMERIC + | KW_VOID + | KW_INTERVAL + | KW_ARRAY | KW_STRUCT | KW_MAP + | unsupportedType=identifier + ; + +dataType + : complex=KW_ARRAY LT dataType GT + | complex=KW_MAP LT dataType COMMA dataType GT + | complex=KW_STRUCT (LT complexColTypeList? GT | NEQ) + | KW_INTERVAL (KW_YEAR | KW_MONTH) (KW_TO KW_MONTH)? + | KW_INTERVAL (KW_DAY | KW_HOUR | KW_MINUTE | KW_SECOND) + (KW_TO (KW_HOUR | KW_MINUTE | KW_SECOND))? + | type (LEFT_PAREN INTEGER_VALUE + (COMMA INTEGER_VALUE)* RIGHT_PAREN)? + ; + +qualifiedColTypeWithPositionList + : qualifiedColTypeWithPosition (COMMA qualifiedColTypeWithPosition)* + ; + +qualifiedColTypeWithPosition + : name=multipartIdentifier dataType colDefinitionDescriptorWithPosition* + ; + +colDefinitionDescriptorWithPosition + : KW_NOT KW_NULL + | defaultExpression + | commentSpec + | colPosition + ; + +defaultExpression + : KW_DEFAULT expression + ; + +variableDefaultExpression + : (KW_DEFAULT | EQ) expression + ; + +colTypeList + : colType (COMMA colType)* + ; + +colType + : colName=errorCapturingIdentifier dataType (KW_NOT KW_NULL)? commentSpec? + ; + +createOrReplaceTableColTypeList + : createOrReplaceTableColType (COMMA createOrReplaceTableColType)* + ; + +createOrReplaceTableColType + : colName=errorCapturingIdentifier dataType colDefinitionOption* + ; + +colDefinitionOption + : KW_NOT KW_NULL + | defaultExpression + | generationExpression + | commentSpec + ; + +generationExpression + : KW_GENERATED KW_ALWAYS KW_AS LEFT_PAREN expression RIGHT_PAREN + ; + +complexColTypeList + : complexColType (COMMA complexColType)* + ; + +complexColType + : identifier COLON? dataType (KW_NOT KW_NULL)? commentSpec? + ; + +whenClause + : KW_WHEN condition=expression KW_THEN result=expression + ; + +windowClause + : KW_WINDOW namedWindow (COMMA namedWindow)* + ; + +namedWindow + : name=errorCapturingIdentifier KW_AS windowSpec + ; + +windowSpec + : name=errorCapturingIdentifier + | LEFT_PAREN name=errorCapturingIdentifier RIGHT_PAREN + | LEFT_PAREN + ( KW_CLUSTER KW_BY partition+=expression (COMMA partition+=expression)* + | ((KW_PARTITION | KW_DISTRIBUTE) KW_BY partition+=expression (COMMA partition+=expression)*)? + ((KW_ORDER | KW_SORT) KW_BY sortItem (COMMA sortItem)*)?) + windowFrame? + RIGHT_PAREN + ; + +windowFrame + : frameType=KW_RANGE start=frameBound + | frameType=KW_ROWS start=frameBound + | frameType=KW_RANGE KW_BETWEEN start=frameBound KW_AND end=frameBound + | frameType=KW_ROWS KW_BETWEEN start=frameBound KW_AND end=frameBound + ; + +frameBound + : KW_UNBOUNDED boundType=(KW_PRECEDING | KW_FOLLOWING) + | boundType=KW_CURRENT KW_ROW + | expression boundType=(KW_PRECEDING | KW_FOLLOWING) + ; + +qualifiedNameList + : qualifiedName (COMMA qualifiedName)* + ; + +functionName + : KW_IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + | qualifiedName + | KW_FILTER + | KW_LEFT + | KW_RIGHT + ; + +qualifiedName + : identifier (DOT identifier)* + ; + +// this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table` +// replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise +// valid expressions such as "a-b" can be recognized as an identifier +errorCapturingIdentifier + : identifier errorCapturingIdentifierExtra + ; + +// extra left-factoring grammar +errorCapturingIdentifierExtra + : (MINUS identifier)+ + | + ; + +identifier + : strictIdentifier + | {!this.SQL_standard_keyword_behavior}? strictNonReserved + ; + +strictIdentifier + : IDENTIFIER + | quotedIdentifier + | {this.SQL_standard_keyword_behavior}? ansiNonReserved + | {!this.SQL_standard_keyword_behavior}? nonReserved + ; + +quotedIdentifier + : BACKQUOTED_IDENTIFIER + | {this.double_quoted_identifiers}? DOUBLEQUOTED_STRING + ; + +backQuotedIdentifier + : BACKQUOTED_IDENTIFIER + ; + +number + : {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE + | {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE + | {this.legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) + | MINUS? INTEGER_VALUE + | MINUS? BIGINT_LITERAL + | MINUS? SMALLINT_LITERAL + | MINUS? TINYINT_LITERAL + | MINUS? DOUBLE_LITERAL + | MINUS? FLOAT_LITERAL + | MINUS? BIGDECIMAL_LITERAL + ; + +alterColumnAction + : KW_TYPE dataType + | commentSpec + | colPosition + | setOrDrop=(KW_SET | KW_DROP) KW_NOT KW_NULL + | KW_SET defaultExpression + | dropDefault=KW_DROP KW_DEFAULT + ; + +stringLit + : STRING_LITERAL + | {!this.double_quoted_identifiers}? DOUBLEQUOTED_STRING + ; + +comment + : stringLit + | KW_NULL + ; + +version + : INTEGER_VALUE + | stringLit + ; + +// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. +// - Reserved keywords: +// Keywords that are reserved and can't be used as identifiers for table, view, column, +// function, alias, etc. +// - Non-reserved keywords: +// Keywords that have a special meaning only in particular contexts and can be used as +// identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN +// can be used as identifiers in other places. +// You can find the full keywords list by searching "Start of the keywords list" in this file. +// The non-reserved keywords are listed below. Keywords not in this list are reserved keywords. +ansiNonReserved +//--ANSI-NON-RESERVED-START + : KW_ADD + | KW_AFTER + | KW_ALTER + | KW_ALWAYS + | KW_ANALYZE + | KW_ANTI + | KW_ANY_VALUE + | KW_ARCHIVE + | KW_ARRAY + | KW_ASC + | KW_AT + | KW_BETWEEN + | KW_BIGINT + | KW_BINARY + | KW_BINARY_HEX + | KW_BOOLEAN + | KW_BUCKET + | KW_BUCKETS + | KW_BY + | KW_BYTE + | KW_CACHE + | KW_CASCADE + | KW_CATALOG + | KW_CATALOGS + | KW_CHANGE + | KW_CHAR + | KW_CHARACTER + | KW_CLEAR + | KW_CLUSTER + | KW_CLUSTERED + | KW_CODEGEN + | KW_COLLECTION + | KW_COLUMNS + | KW_COMMENT + | KW_COMMIT + | KW_COMPACT + | KW_COMPACTIONS + | KW_COMPUTE + | KW_CONCATENATE + | KW_COST + | KW_CUBE + | KW_CURRENT + | KW_DATA + | KW_DATABASE + | KW_DATABASES + | KW_DATE + | KW_DATEADD + | KW_DATE_ADD + | KW_DATEDIFF + | KW_DATE_DIFF + | KW_DAY + | KW_DAYS + | KW_DAYOFYEAR + | KW_DBPROPERTIES + | KW_DEC + | KW_DECIMAL + | KW_DECLARE + | KW_DEFAULT + | KW_DEFINED + | KW_DELETE + | KW_DELIMITED + | KW_DESC + | KW_DESCRIBE + | KW_DFS + | KW_DIRECTORIES + | KW_DIRECTORY + | KW_DISTRIBUTE + | KW_DIV + | KW_DOUBLE + | KW_DROP + | KW_ESCAPED + | KW_EXCHANGE + | KW_EXCLUDE + | KW_EXISTS + | KW_EXPLAIN + | KW_EXPORT + | KW_EXTENDED + | KW_EXTERNAL + | KW_EXTRACT + | KW_FIELDS + | KW_FILEFORMAT + | KW_FIRST + | KW_FLOAT + | KW_FOLLOWING + | KW_FORMAT + | KW_FORMATTED + | KW_FUNCTION + | KW_FUNCTIONS + | KW_GENERATED + | KW_GLOBAL + | KW_GROUPING + | KW_HOUR + | KW_HOURS + | KW_IDENTIFIER_KW + | KW_IF + | KW_IGNORE + | KW_IMPORT + | KW_INCLUDE + | KW_INDEX + | KW_INDEXES + | KW_INPATH + | KW_INPUTFORMAT + | KW_INSERT + | KW_INT + | KW_INTEGER + | KW_INTERVAL + | KW_ITEMS + | KW_KEYS + | KW_LAST + | KW_LAZY + | KW_LIKE + | KW_ILIKE + | KW_LIMIT + | KW_LINES + | KW_LIST + | KW_LOAD + | KW_LOCAL + | KW_LOCATION + | KW_LOCK + | KW_LOCKS + | KW_LOGICAL + | KW_LONG + | KW_MACRO + | KW_MAP + | KW_MATCHED + | KW_MERGE + | KW_MICROSECOND + | KW_MICROSECONDS + | KW_MILLISECOND + | KW_MILLISECONDS + | KW_MINUTE + | KW_MINUTES + | KW_MONTH + | KW_MONTHS + | KW_MSCK + | KW_NAME + | KW_NAMESPACE + | KW_NAMESPACES + | KW_NANOSECOND + | KW_NANOSECONDS + | KW_NO + | KW_NULLS + | KW_NUMERIC + | KW_OF + | KW_OPTION + | KW_OPTIONS + | KW_OUT + | KW_OUTPUTFORMAT + | KW_OVER + | KW_OVERLAY + | KW_OVERWRITE + | KW_PARTITION + | KW_PARTITIONED + | KW_PARTITIONS + | KW_PERCENTLIT + | KW_PIVOT + | KW_PLACING + | KW_POSITION + | KW_PRECEDING + | KW_PRINCIPALS + | KW_PROPERTIES + | KW_PURGE + | KW_QUARTER + | KW_QUERY + | KW_RANGE + | KW_REAL + | KW_RECORDREADER + | KW_RECORDWRITER + | KW_RECOVER + | KW_REDUCE + | KW_REFRESH + | KW_RENAME + | KW_REPAIR + | KW_REPEATABLE + | KW_REPLACE + | KW_RESET + | KW_RESPECT + | KW_RESTRICT + | KW_REVOKE + | KW_RLIKE + | KW_ROLE + | KW_ROLES + | KW_ROLLBACK + | KW_ROLLUP + | KW_ROW + | KW_ROWS + | KW_SCHEMA + | KW_SCHEMAS + | KW_SECOND + | KW_SECONDS + | KW_SEMI + | KW_SEPARATED + | KW_SERDE + | KW_SERDEPROPERTIES + | KW_SET + | KW_SETMINUS + | KW_SETS + | KW_SHORT + | KW_SHOW + | KW_SINGLE + | KW_SKEWED + | KW_SMALLINT + | KW_SORT + | KW_SORTED + | KW_SOURCE + | KW_START + | KW_STATISTICS + | KW_STORED + | KW_STRATIFY + | KW_STRING + | KW_STRUCT + | KW_SUBSTR + | KW_SUBSTRING + | KW_SYNC + | KW_SYSTEM_TIME + | KW_SYSTEM_VERSION + | KW_TABLES + | KW_TABLESAMPLE + | KW_TARGET + | KW_TBLPROPERTIES + | KW_TEMPORARY + | KW_TERMINATED + | KW_TIMEDIFF + | KW_TIMESTAMP + | KW_TIMESTAMP_LTZ + | KW_TIMESTAMP_NTZ + | KW_TIMESTAMPADD + | KW_TIMESTAMPDIFF + | KW_TINYINT + | KW_TOUCH + | KW_TRANSACTION + | KW_TRANSACTIONS + | KW_TRANSFORM + | KW_TRIM + | KW_TRUE + | KW_TRUNCATE + | KW_TRY_CAST + | KW_TYPE + | KW_UNARCHIVE + | KW_UNBOUNDED + | KW_UNCACHE + | KW_UNLOCK + | KW_UNPIVOT + | KW_UNSET + | KW_UPDATE + | KW_USE + | KW_VALUES + | KW_VARCHAR + | KW_VAR + | KW_VARIABLE + | KW_VERSION + | KW_VIEW + | KW_VIEWS + | KW_VOID + | KW_WEEK + | KW_WEEKS + | KW_WINDOW + | KW_YEAR + | KW_YEARS + | KW_ZONE +//--ANSI-NON-RESERVED-END + ; + +// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. +// - Non-reserved keywords: +// Same definition as the one when `SQL_standard_keyword_behavior=true`. +// - Strict-non-reserved keywords: +// A strict version of non-reserved keywords, which can not be used as table alias. +// You can find the full keywords list by searching "Start of the keywords list" in this file. +// The strict-non-reserved keywords are listed in `strictNonReserved`. +// The non-reserved keywords are listed in `nonReserved`. +// These 2 together contain all the keywords. +strictNonReserved + : KW_ANTI + | KW_CROSS + | KW_EXCEPT + | KW_FULL + | KW_INNER + | KW_INTERSECT + | KW_JOIN + | KW_LATERAL + | KW_LEFT + | KW_NATURAL + | KW_ON + | KW_RIGHT + | KW_SEMI + | KW_SETMINUS + | KW_UNION + | KW_USING + ; + +nonReserved +//--DEFAULT-NON-RESERVED-START + : KW_ADD + | KW_AFTER + | KW_ALL + | KW_ALTER + | KW_ALWAYS + | KW_ANALYZE + | KW_AND + | KW_ANY + | KW_ANY_VALUE + | KW_ARCHIVE + | KW_ARRAY + | KW_AS + | KW_ASC + | KW_AT + | KW_AUTHORIZATION + | KW_BETWEEN + | KW_BIGINT + | KW_BINARY + | KW_BINARY_HEX + | KW_BOOLEAN + | KW_BOTH + | KW_BUCKET + | KW_BUCKETS + | KW_BY + | KW_BYTE + | KW_CACHE + | KW_CASCADE + | KW_CASE + | KW_CAST + | KW_CATALOG + | KW_CATALOGS + | KW_CHANGE + | KW_CHAR + | KW_CHARACTER + | KW_CHECK + | KW_CLEAR + | KW_CLUSTER + | KW_CLUSTERED + | KW_CODEGEN + | KW_COLLATE + | KW_COLLECTION + | KW_COLUMN + | KW_COLUMNS + | KW_COMMENT + | KW_COMMIT + | KW_COMPACT + | KW_COMPACTIONS + | KW_COMPUTE + | KW_CONCATENATE + | KW_CONSTRAINT + | KW_COST + | KW_CREATE + | KW_CUBE + | KW_CURRENT + | KW_CURRENT_DATE + | KW_CURRENT_TIME + | KW_CURRENT_TIMESTAMP + | KW_CURRENT_USER + | KW_DATA + | KW_DATABASE + | KW_DATABASES + | KW_DATE + | KW_DATEADD + | KW_DATE_ADD + | KW_DATEDIFF + | KW_DATE_DIFF + | KW_DAY + | KW_DAYS + | KW_DAYOFYEAR + | KW_DBPROPERTIES + | KW_DEC + | KW_DECIMAL + | KW_DECLARE + | KW_DEFAULT + | KW_DEFINED + | KW_DELETE + | KW_DELIMITED + | KW_DESC + | KW_DESCRIBE + | KW_DFS + | KW_DIRECTORIES + | KW_DIRECTORY + | KW_DISTINCT + | KW_DISTRIBUTE + | KW_DIV + | KW_DOUBLE + | KW_DROP + | KW_ELSE + | KW_END + | KW_ESCAPE + | KW_ESCAPED + | KW_EXCHANGE + | KW_EXCLUDE + | KW_EXISTS + | KW_EXPLAIN + | KW_EXPORT + | KW_EXTENDED + | KW_EXTERNAL + | KW_EXTRACT + | KW_FALSE + | KW_FETCH + | KW_FILTER + | KW_FIELDS + | KW_FILEFORMAT + | KW_FIRST + | KW_FLOAT + | KW_FOLLOWING + | KW_FOR + | KW_FOREIGN + | KW_FORMAT + | KW_FORMATTED + | KW_FROM + | KW_FUNCTION + | KW_FUNCTIONS + | KW_GENERATED + | KW_GLOBAL + | KW_GRANT + | KW_GROUP + | KW_GROUPING + | KW_HAVING + | KW_HOUR + | KW_HOURS + | KW_IDENTIFIER_KW + | KW_IF + | KW_IGNORE + | KW_IMPORT + | KW_IN + | KW_INCLUDE + | KW_INDEX + | KW_INDEXES + | KW_INPATH + | KW_INPUTFORMAT + | KW_INSERT + | KW_INT + | KW_INTEGER + | KW_INTERVAL + | KW_INTO + | KW_IS + | KW_ITEMS + | KW_KEYS + | KW_LAST + | KW_LAZY + | KW_LEADING + | KW_LIKE + | KW_LONG + | KW_ILIKE + | KW_LIMIT + | KW_LINES + | KW_LIST + | KW_LOAD + | KW_LOCAL + | KW_LOCATION + | KW_LOCK + | KW_LOCKS + | KW_LOGICAL + | KW_LONG + | KW_MACRO + | KW_MAP + | KW_MATCHED + | KW_MERGE + | KW_MICROSECOND + | KW_MICROSECONDS + | KW_MILLISECOND + | KW_MILLISECONDS + | KW_MINUTE + | KW_MINUTES + | KW_MONTH + | KW_MONTHS + | KW_MSCK + | KW_NAME + | KW_NAMESPACE + | KW_NAMESPACES + | KW_NANOSECOND + | KW_NANOSECONDS + | KW_NO + | KW_NOT + | KW_NULL + | KW_NULLS + | KW_NUMERIC + | KW_OF + | KW_OFFSET + | KW_ONLY + | KW_OPTION + | KW_OPTIONS + | KW_OR + | KW_ORDER + | KW_OUT + | KW_OUTER + | KW_OUTPUTFORMAT + | KW_OVER + | KW_OVERLAPS + | KW_OVERLAY + | KW_OVERWRITE + | KW_PARTITION + | KW_PARTITIONED + | KW_PARTITIONS + | KW_PERCENTILE_CONT + | KW_PERCENTILE_DISC + | KW_PERCENTLIT + | KW_PIVOT + | KW_PLACING + | KW_POSITION + | KW_PRECEDING + | KW_PRIMARY + | KW_PRINCIPALS + | KW_PROPERTIES + | KW_PURGE + | KW_QUARTER + | KW_QUERY + | KW_RANGE + | KW_REAL + | KW_RECORDREADER + | KW_RECORDWRITER + | KW_RECOVER + | KW_REDUCE + | KW_REFERENCES + | KW_REFRESH + | KW_RENAME + | KW_REPAIR + | KW_REPEATABLE + | KW_REPLACE + | KW_RESET + | KW_RESPECT + | KW_RESTRICT + | KW_REVOKE + | KW_RLIKE + | KW_ROLE + | KW_ROLES + | KW_ROLLBACK + | KW_ROLLUP + | KW_ROW + | KW_ROWS + | KW_SCHEMA + | KW_SCHEMAS + | KW_SECOND + | KW_SECONDS + | KW_SELECT + | KW_SEPARATED + | KW_SERDE + | KW_SERDEPROPERTIES + | KW_SESSION_USER + | KW_SET + | KW_SETS + | KW_SHORT + | KW_SHOW + | KW_SINGLE + | KW_SKEWED + | KW_SMALLINT + | KW_SOME + | KW_SORT + | KW_SORTED + | KW_SOURCE + | KW_START + | KW_STATISTICS + | KW_STORED + | KW_STRATIFY + | KW_STRING + | KW_STRUCT + | KW_SUBSTR + | KW_SUBSTRING + | KW_SYNC + | KW_SYSTEM_TIME + | KW_SYSTEM_VERSION + | KW_TABLE + | KW_TABLES + | KW_TABLESAMPLE + | KW_TARGET + | KW_TBLPROPERTIES + | KW_TEMPORARY + | KW_TERMINATED + | KW_THEN + | KW_TIME + | KW_TIMEDIFF + | KW_TIMESTAMP + | KW_TIMESTAMP_LTZ + | KW_TIMESTAMP_NTZ + | KW_TIMESTAMPADD + | KW_TIMESTAMPDIFF + | KW_TINYINT + | KW_TO + | KW_TOUCH + | KW_TRAILING + | KW_TRANSACTION + | KW_TRANSACTIONS + | KW_TRANSFORM + | KW_TRIM + | KW_TRUE + | KW_TRUNCATE + | KW_TRY_CAST + | KW_TYPE + | KW_UNARCHIVE + | KW_UNBOUNDED + | KW_UNCACHE + | KW_UNIQUE + | KW_UNKNOWN + | KW_UNLOCK + | KW_UNPIVOT + | KW_UNSET + | KW_UPDATE + | KW_USE + | KW_USER + | KW_VALUES + | KW_VARCHAR + | KW_VAR + | KW_VARIABLE + | KW_VERSION + | KW_VIEW + | KW_VIEWS + | KW_VOID + | KW_WEEK + | KW_WEEKS + | KW_WHEN + | KW_WHERE + | KW_WINDOW + | KW_WITH + | KW_WITHIN + | KW_YEAR + | KW_YEARS + | KW_ZONE +//--DEFAULT-NON-RESERVED-END + ;