lava-oushudb-dt-sql-parser/src/grammar/flinksql/FlinkSqlParser.g4
nankaNULL 0c9a831585
fix: fix flinksql create table grammar, add test (#65)
* fix: fix flinksql create table grammar, add test

* feat: add cross join, left outer join and time temporal join

* test: supplement flinksql join test

* fix: fix catalog table grammar, add test

* fix: fix flinksql data type, add test

* fix: delete console

* feat: add query with clause, add test
2022-12-28 14:20:33 +08:00

1131 lines
22 KiB
ANTLR
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

parser grammar FlinkSqlParser;
options { tokenVocab=FlinkSqlLexer; }
program: statement EOF;
statement
: sqlStatements EOF
;
sqlStatements
: (sqlStatement SEMICOLON? | emptyStatement)*
;
sqlStatement
: ddlStatement | dmlStatement | describeStatement | explainStatement | useStatement | showStatememt
;
emptyStatement
: SEMICOLON
;
ddlStatement
: createTable | createDatabase | createView | createFunction | createCatalog
| alterTable | alterDatabase | alterFunction
| dropTable | dropDatabase | dropView | dropFunction
;
dmlStatement
: queryStatement | insertStatement
;
describeStatement
: DESCRIBE uid
;
explainStatement
: EXPLAIN identifier FOR dmlStatement
;
useStatement
: USE CATALOG? uid
;
showStatememt
: SHOW (CATALOGS | DATABASES | TABLES | FUNCTIONS | VIEWS)
;
// Create statements
createTable
: CREATE TABLE ifNotExists? sourceTable
LR_BRACKET
columnOptionDefinition (COMMA columnOptionDefinition)*
(COMMA watermarkDefinition)?
(COMMA tableConstraint)?
(COMMA selfDefinitionClause)?
RR_BRACKET
commentSpec?
partitionDefinition?
withOption
likeDefinition?
;
columnOptionDefinition
: physicalColumnDefinition
| metadataColumnDefinition
| computedColumnDefinition
;
physicalColumnDefinition
: columnName columnType columnConstraint? commentSpec?
;
columnName
: plusUid | expression
;
columnNameList
: columnName (',' columnName)*
;
columnType
: typeName=(DATE | BOOLEAN | NULL)
| typeName=(CHAR | VARCHAR | STRING | BINARY | VARBINARY | BYTES
| TINYINT | SMALLINT | INT | INTEGER | BIGINT
| TIME | TIMESTAMP | TIMESTAMP_LTZ | DATETIME
) lengthOneDimension?
| typeName=(DECIMAL | DEC | NUMERIC | FLOAT | DOUBLE) lengthTwoOptionalDimension?
| type=(ARRAY | MULTISET) lengthOneTypeDimension?
| type=MAP mapTypeDimension?
| type=ROW rowTypeDimension?
| type=RAW lengthTwoStringDimension?
;
lengthOneDimension
: '(' decimalLiteral ')'
;
lengthTwoOptionalDimension
: '(' decimalLiteral (',' decimalLiteral)? ')'
;
lengthTwoStringDimension
: '(' stringLiteral (',' stringLiteral)? ')'
;
lengthOneTypeDimension
: LESS_SYMBOL columnType GREATER_SYMBOL
;
mapTypeDimension
: LESS_SYMBOL columnType (COMMA columnType) GREATER_SYMBOL
;
rowTypeDimension
: LESS_SYMBOL columnName columnType (COMMA columnName columnType)* GREATER_SYMBOL
;
columnConstraint
:(CONSTRAINT constraintName)? PRIMARY KEY (NOT ENFORCED)?
;
commentSpec
: COMMENT STRING_LITERAL
;
metadataColumnDefinition
: columnName columnType METADATA (FROM metadataKey)? VIRTUAL?
;
metadataKey
: STRING_LITERAL
;
computedColumnDefinition
: columnName AS computedColumnExpression commentSpec?
;
// 计算表达式
computedColumnExpression
: expression
;
watermarkDefinition
: WATERMARK FOR expression AS expression
;
tableConstraint
: (CONSTRAINT constraintName)? PRIMARY KEY '(' columnNameList ')' (NOT ENFORCED)?
;
constraintName
: identifier
;
selfDefinitionClause // 数栈自定义语句 PERIOD FOR SYSTEM_TIME
: PERIOD FOR SYSTEM_TIME
;
partitionDefinition
: PARTITIONED BY transformList
;
transformList
: '(' transform (',' transform)* ')'
;
transform
: qualifiedName #identityTransform
| transformName=identifier
'(' transformArgument (',' transformArgument)* ')' #applyTransform
;
transformArgument
: qualifiedName
| constant
;
likeDefinition
: LIKE sourceTable (LR_BRACKET likeOption* RR_BRACKET)?
;
sourceTable
: uid
;
likeOption
: (INCLUDING | EXCLUDING) (ALL | CONSTRAINTS | PARTITIONS)
| (INCLUDING | EXCLUDING | OVERWRITING) (GENERATED | OPTIONS | WATERMARKS)
;
createCatalog
: CREATE CATALOG uid withOption
;
createDatabase
: CREATE DATABASE ifNotExists? uid commentSpec? withOption
;
createView
: CREATE TEMPORARY? VIEW ifNotExists? uid columnNameList? commentSpec? AS queryStatement
;
createFunction
: CREATE (TEMPORARY|TEMPORARY SYSTEM) FUNCTION ifNotExists? uid AS identifier (LANGUAGE identifier)?
;
// Alter statements
alterTable
: ALTER TABLE uid (renameDefinition | setKeyValueDefinition)
;
renameDefinition
: RENAME TO uid
;
setKeyValueDefinition
: SET tablePropertyList
;
alterDatabase
: ALTER DATABASE uid setKeyValueDefinition
;
alterFunction
: ALTER (TEMPORARY|TEMPORARY SYSTEM) FUNCTION ifExists? uid AS identifier (LANGUAGE identifier)?
;
// Drop statements
dropTable
: DROP TABLE ifExists? uid
;
dropDatabase
: DROP DATABASE ifExists? uid dropType=(RESTRICT | CASCADE)?
;
dropView
: DROP TEMPORARY? VIEW ifExists? uid
;
dropFunction
: DROP (TEMPORARY|TEMPORARY SYSTEM)? FUNCTION ifExists? uid
;
// Insert statements
insertStatement
: INSERT (INTO | OVERWRITE) uid
(
insertPartitionDefinition? queryStatement
| valuesDefinition
)
;
insertPartitionDefinition
: PARTITION tablePropertyList
;
valuesDefinition
: VALUES valuesRowDefinition (COMMA valuesRowDefinition)*
;
valuesRowDefinition
: LR_BRACKET
constant (COMMA constant)*
RR_BRACKET
;
// Select statements
queryStatement
: valuesCaluse
| WITH withItem (COMMA withItem)* queryStatement
| '(' queryStatement ')'
| left=queryStatement operator=(INTERSECT | UNION | EXCEPT) ALL? right=queryStatement orderByCaluse? limitClause?
| selectClause orderByCaluse? limitClause?
| selectStatement orderByCaluse? limitClause?
;
valuesCaluse
: VALUES expression (COMMA expression )*
;
withItem
: withItemName (LR_BRACKET columnName (COMMA columnName)* RR_BRACKET)? AS LR_BRACKET queryStatement RR_BRACKET
;
withItemName
: identifier
;
selectStatement
: selectClause fromClause whereClause? groupByClause? havingClause? windowClause?
;
selectClause
: SELECT setQuantifier? (ASTERISK_SIGN | projectItemDefinition (COMMA projectItemDefinition)*)
;
projectItemDefinition
: expression (AS? expression)?
;
fromClause
: FROM tableExpression
;
tableExpression
: tableReference (COMMA tableReference)*
| tableExpression NATURAL? (LEFT | RIGHT | FULL | INNER)? OUTER? JOIN tableExpression joinCondition?
| tableExpression CROSS JOIN tableExpression
;
tableReference
: tablePrimary tableAlias?
;
tablePrimary
: TABLE? tablePath systemTimePeriod? (AS? correlationName)?
| LATERAL TABLE LR_BRACKET functionName LR_BRACKET expression (COMMA expression)* RR_BRACKET RR_BRACKET
| LATERAL? LR_BRACKET queryStatement RR_BRACKET
| UNNEST LR_BRACKET expression RR_BRACKET
;
tablePath
: uid
;
systemTimePeriod
: FOR SYSTEM_TIME AS OF dateTimeExpression
;
dateTimeExpression
: expression
;
joinCondition
: ON booleanExpression
| USING LR_BRACKET uid (COMMA uid)* RR_BRACKET
;
whereClause
: WHERE booleanExpression
;
groupByClause
: GROUP BY groupItemDefinition (COMMA groupItemDefinition)*
;
groupItemDefinition
: expression
| LR_BRACKET RR_BRACKET
| LR_BRACKET expression (COMMA expression)* RR_BRACKET
| CUBE LR_BRACKET expression (COMMA expression)* RR_BRACKET
| ROLLUP LR_BRACKET expression (COMMA expression)* RR_BRACKET
| GROUPING SETS LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET
;
havingClause
: HAVING booleanExpression
;
orderByCaluse
: ORDER BY orderItemDefition (COMMA orderItemDefition)*
;
orderItemDefition
: expression (ASC | DESC)?
;
limitClause
: LIMIT (ALL | limit=expression)
;
windowClause
: WINDOW namedWindow (',' namedWindow)*
;
namedWindow
: name=errorCapturingIdentifier AS windowSpec
;
windowSpec
: name=errorCapturingIdentifier?
'('
(ORDER BY sortItem (',' sortItem)*)?
(PARTITION BY expression (',' expression)*)?
windowFrame?
')'
;
sortItem
: expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))?
;
windowFrame
: RANGE frameBound
| ROWS frameBound
;
frameBound
: expression PRECEDING
;
// expression
expression
: booleanExpression
;
booleanExpression
: NOT booleanExpression #logicalNot
| EXISTS '(' queryStatement ')' #exists
| valueExpression predicate? #predicated
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
;
predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| NOT? kind=IN '(' expression (',' expression)* ')'
| NOT? kind=IN '(' queryStatement ')'
| kind=EXISTS '(' queryStatement ')'
| NOT? kind=RLIKE pattern=valueExpression
| NOT? kind=LIKE quantifier=(ANY | ALL) ('('')' | '(' expression (',' expression)* ')')
| NOT? kind=LIKE pattern=valueExpression
| IS NOT? kind=NULL
| IS NOT? kind=(TRUE | FALSE)
| IS NOT? kind=DISTINCT FROM right=valueExpression
;
valueExpression
: primaryExpression #valueExpressionDefault
| operator=('-' | '+' | '~') valueExpression #arithmeticUnary
| left=valueExpression operator=('*' | '/' | '%' | DIV) right=valueExpression #arithmeticBinary
| left=valueExpression operator=('+' | '-' | DOUBLE_VERTICAL_SIGN) right=valueExpression #arithmeticBinary
| left=valueExpression operator='&' right=valueExpression #arithmeticBinary
| left=valueExpression operator='^' right=valueExpression #arithmeticBinary
| left=valueExpression operator='|' right=valueExpression #arithmeticBinary
| left=valueExpression comparisonOperator right=valueExpression #comparison
;
primaryExpression
: CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| CAST '(' expression AS columnType ')' #cast
// | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
| FIRST '(' expression (IGNORE NULLS)? ')' #first
| LAST '(' expression (IGNORE NULLS)? ')' #last
| POSITION '(' substr=valueExpression IN str=valueExpression ')' #position
| constant #constantDefault
| '*' #star
| uid '.' '*' #star
// | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' queryStatement ')' #subqueryExpression
| functionName '(' (setQuantifier? expression (',' expression)*)? ')' #functionCall
// | identifier '->' expression #lambda
// | '(' identifier (',' identifier)+ ')' '->' expression #lambda
| value=primaryExpression LS_BRACKET index=valueExpression RS_BRACKET #subscript
| identifier #columnReference
| dereferenceDefinition #dereference
| '(' expression ')' #parenthesizedExpression
// | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
// | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression
// ((FOR | ',') len=valueExpression)? ')' #substring
// | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
// FROM srcStr=valueExpression ')' #trim
// | OVERLAY '(' input=valueExpression PLACING replace=valueExpression
// FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay
;
functionName
: uid
;
dereferenceDefinition
: uid
;
// base common
correlationName
: identifier
;
qualifiedName
: identifier | dereferenceDefinition
;
interval
: INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
;
errorCapturingMultiUnitsInterval
: multiUnitsInterval unitToUnitInterval?
;
multiUnitsInterval
: (intervalValue identifier)+
;
errorCapturingUnitToUnitInterval
: body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
;
unitToUnitInterval
: value=intervalValue from=identifier TO to=identifier
;
intervalValue
: ('+' | '-')? (DIG_LITERAL | REAL_LITERAL)
| STRING_LITERAL
;
columnAlias
: AS? strictIdentifier identifierList?
;
tableAlias
: AS? strictIdentifier identifierList?
;
errorCapturingIdentifier
: identifier errorCapturingIdentifierExtra
;
errorCapturingIdentifierExtra
: (MINUS identifier)+ #errorIdent
| #realIdent
;
identifierList
: '(' identifierSeq ')'
;
identifierSeq
: identifier (COMMA identifier)*
;
identifier
: strictIdentifier
| strictNonReserved
;
strictIdentifier
: unquotedIdentifier #unquotedIdentifierAlternative
| quotedIdentifier #quotedIdentifierAlternative
| ansiNonReserved #ansiNonReservedKeywords
| nonReserved #nonReservedKeywords
;
unquotedIdentifier
: DIG_LITERAL | ID_LITERAL
;
quotedIdentifier
: STRING_LITERAL
;
whenClause
: WHEN condition=expression THEN result=expression
;
uidList
: uid (',' uid)*
;
uid
: ID_LITERAL DOT_ID*?
;
plusUid // 匹配 xxx.$xx xx:xxxx 等字符
: (ID_LITERAL | PLUS_ID_LITERAL) (DOT_ID | PLUS_DOT_ID)*?
;
withOption
: WITH tablePropertyList
;
ifNotExists
: IF NOT EXISTS;
ifExists
: IF EXISTS;
tablePropertyList
: '(' tableProperty (',' tableProperty)* ')'
;
tableProperty
: key=tablePropertyKey (EQUAL_SYMBOL? value=tablePropertyValue)?
;
tablePropertyKey
: identifier | dereferenceDefinition
| STRING_LITERAL
;
tablePropertyValue
: DIG_LITERAL
| REAL_LITERAL
| booleanLiteral
| STRING_LITERAL
;
logicalOperator
: AND | '&' '&' | OR | '|' '|'
;
comparisonOperator
: '=' | '>' | '<' | '<' '=' | '>' '='
| '<' '>' | '!' '=' | '<' '=' '>'
;
bitOperator
: '<' '<' | '>' '>' | '&' | '^' | '|'
;
mathOperator
: '*' | SLASH_SIGN | PENCENT_SIGN | DIV | '+' | '-' | DOUBLE_HYPNEN_SIGN
;
unaryOperator
: '!' | '~' | ADD_SIGN | '-' | NOT
;
fullColumnName
: uid
;
constant
: stringLiteral // 引号包含的字符串
| decimalLiteral // 整数
| interval // INTERVAL keywords
| HYPNEN_SIGN decimalLiteral // 负整数
| booleanLiteral // 布尔值
| REAL_LITERAL // 小数
| BIT_STRING
| NOT? NULL // 空 | 非空
;
stringLiteral
: STRING_LITERAL
;
decimalLiteral
: DIG_LITERAL
;
booleanLiteral
: TRUE | FALSE;
setQuantifier
: DISTINCT
| ALL
;
ansiNonReserved
: ADD
| AFTER
| ALTER
| ANALYZE
| ANTI
| ARCHIVE
| ARRAY
| ASC
| AT
| BETWEEN
| BIGINT
| BINARY
| BOOLEAN
| BUCKET
| BUCKETS
| BY
| BYTES
| CACHE
| CASCADE
| CATALOG
| CATALOGS
| CHANGE
| CHAR
| CLEAR
| CLUSTER
| CLUSTERED
| CODEGEN
| COLLECTION
| COLUMNS
| COMMENT
| COMMIT
| COMPACT
| COMPACTIONS
| COMPUTE
| CONCATENATE
| CONSTRAINTS
| COST
| CUBE
| CURRENT
| DATA
| DATABASE
| DATABASES
| DATE
| DATETIME
| DBPROPERTIES
| DECIMAL
| DEFINED
| DELETE
| DELIMITED
| DESC
| DESCRIBE
| DFS
| DIRECTORIES
| DIRECTORY
| DISTRIBUTE
| DIV
| DOUBLE
| DROP
| ESCAPED
| EXCHANGE
| EXCLUDING
| EXISTS
| EXPLAIN
| EXPORT
| EXTENDED
| EXTERNAL
| EXTRACT
| FIELDS
| FILEFORMAT
| FIRST
| FLOAT
| FOLLOWING
| FORMAT
| FORMATTED
| FUNCTION
| FUNCTIONS
| GENERATED
| GLOBAL
| GROUPING
| IF
| IGNORE
| IMPORT
| INCLUDING
| INDEX
| INDEXES
| INPATH
| INPUTFORMAT
| INSERT
| INT
| INTERVAL
| ITEMS
| KEY
| KEYS
| LANGUAGE
| LAST
| LATERAL
| LAZY
| LIKE
| LIMIT
| LINES
| LIST
| LOAD
| LOCAL
| LOCATION
| LOCK
| LOCKS
| LOGICAL
| MACRO
| MAP
| MATCH
| MINUS
| MSCK
| MULTISET
| NEXT
| NO
| NULL
| NULLS
| OF
| OPTION
| OPTIONS
| OUT
| OUTPUTFORMAT
| OVER
| OVERWRITE
| PARTITION
| PARTITIONED
| PARTITIONS
| PERCENTLIT
| PERIOD
| PIVOT
| POSITION
| PRECEDING
| PRINCIPALS
| PURGE
| RANGE
| RAW
| RECORDREADER
| RECORDWRITER
| RECOVER
| REDUCE
| REFRESH
| RENAME
| REPAIR
| REPLACE
| RESET
| RESTRICT
| REVOKE
| RLIKE
| ROLE
| ROLES
| ROLLBACK
| ROLLUP
| ROW
| ROWS
| SEMI
| SEPARATED
| SERDE
| SERDEPROPERTIES
| SET
| SETMINUS
| SETS
| SHOW
| SKEWED
| SMALLINT
| SORT
| SORTED
| START
| STATISTICS
| STORED
| STRATIFY
| STRING
| STRUCT
| SYSTEM
| SYSTEM_TIME
| TABLES
| TABLESAMPLE
| TBLPROPERTIES
| TEMPORARY
| TERMINATED
| TIME
| TIMESTAMP
| TINYINT
| TOUCH
| TRANSACTION
| TRANSACTIONS
| TRANSFORM
| TRUE
| TRUNCATE
| UNARCHIVE
| UNBOUNDED
| UNCACHE
| UNLOCK
| UNSET
| UNNEST
| USE
| VALUES
| VARBINARY
| VARCHAR
| VIEW
| VIEWS
| WATERMARK
| WINDOW
| WITHIN
| WS
;
strictNonReserved
: ANTI
| CROSS
| EXCEPT
| FULL
| INNER
| INTERSECT
| JOIN
| LEFT
| NATURAL
| ON
| RIGHT
| SEMI
| SETMINUS
| UNION
| USING
;
nonReserved
: ADD
| AFTER
| ALL
| ALTER
| ANALYZE
| AND
| ANY
| COLUMNS
| ARRAY
| AS
| ASC
| AT
| BETWEEN
| BIGINT
| BINARY
| BOOLEAN
| BOTH
| BUCKET
| BUCKETS
| BY
| BYTES
| CACHE
| CASCADE
| CASE
| CAST
| CATALOG
| CATALOGS
| CHANGE
| CHAR
| CLEAR
| CLUSTER
| CLUSTERED
| CODEGEN
| COLLECTION
| COLUMN
| COLUMNS
| COMMENT
| COMMIT
| COMPACT
| COMPACTIONS
| COMPUTE
| CONCATENATE
| CONSTRAINT
| CONSTRAINTS
| COST
| CREATE
| CUBE
| CURRENT
| DATA
| DATABASE
| DATABASES
| DATE
| DATETIME
| DBPROPERTIES
| DECIMAL
| DEFINED
| DELETE
| DELIMITED
| DESC
| DESCRIBE
| DFS
| DIRECTORIES
| DIRECTORY
| DISTINCT
| DISTRIBUTE
| DIV
| DOUBLE
| DROP
| ELSE
| END
| ESCAPED
| EXCHANGE
| EXCLUDING
| EXISTS
| EXPLAIN
| EXPORT
| EXTENDED
| EXTERNAL
| EXTRACT
| FALSE
| FIELDS
| FILEFORMAT
| FIRST
| FLOAT
| FOLLOWING
| FOR
| FORMAT
| FORMATTED
| FROM
| FUNCTION
| FUNCTIONS
| GENERATED
| GLOBAL
| GRANT
| GROUP
| GROUPING
| HAVING
| IF
| IGNORE
| IMPORT
| IN
| INCLUDING
| INDEX
| INDEXES
| INPATH
| INPUTFORMAT
| INSERT
| INT
| INTERVAL
| INTO
| IS
| ITEMS
| KEY
| KEYS
| LANGUAGE
| LAST
| LATERAL
| LAZY
| LEADING
| LIKE
| LIMIT
| LINES
| LIST
| LOAD
| LOCAL
| LOCATION
| LOCK
| LOCKS
| LOGICAL
| MACRO
| MAP
| MATCH
| MINUS
| MSCK
| MULTISET
| NEXT
| NO
| NOT
| NULL
| NULLS
| OF
| OPTION
| OPTIONS
| OR
| ORDER
| OUT
| OUTER
| OUTPUTFORMAT
| OVER
| OVERWRITE
| PARTITION
| PARTITIONED
| PARTITIONS
| PERCENTLIT
| PERIOD
| PIVOT
| POSITION
| PRECEDING
| PRIMARY
| PRINCIPALS
| PURGE
| RANGE
| RAW
| RECORDREADER
| RECORDWRITER
| RECOVER
| REDUCE
| REFRESH
| RENAME
| REPAIR
| REPLACE
| RESET
| RESTRICT
| REVOKE
| RLIKE
| ROLE
| ROLES
| ROLLBACK
| ROLLUP
| ROW
| ROWS
| SELECT
| SEPARATED
| SERDE
| SERDEPROPERTIES
| SET
| SETS
| SHOW
| SKEWED
| SMALLINT
| SORT
| SORTED
| START
| STATISTICS
| STORED
| STRATIFY
| STRING
| STRUCT
| SYSTEM
| SYSTEM_TIME
| TABLE
| TABLES
| TABLESAMPLE
| TBLPROPERTIES
| TEMPORARY
| TERMINATED
| THEN
| TIME
| TIMESTAMP
| TINYINT
| TO
| TOUCH
| TRAILING
| TRANSACTION
| TRANSACTIONS
| TRANSFORM
| TRUE
| TRUNCATE
| UNARCHIVE
| UNBOUNDED
| UNCACHE
| UNLOCK
| UNSET
| UNNEST
| USE
| VALUES
| VARBINARY
| VARCHAR
| VIEW
| VIEWS
| WATERMARK
| WHEN
| WHERE
| WINDOW
| WITH
| WITHIN
| WS
;