feat: complete Query statements of FlinkSQL (#93)

* feat: add inlineDataValueClasue rule

* test: update tests of select statements

* feat: support flinksql window TVF grammar

* test: flink sql windown TVF statement test

* feat: support grouping sets grammar

* test: window TVF Aggregation and Group Window Aggregation tests

* test: supplemental selectAggregation with test cases

* test: add Having statement test case

* feat: support flinkSql over aggregation grammar

* test: add over aggregation grammar test cases

* test: flink sql join statement test cases

* test: flink sql set Operations grammar test cases

* test: flink sql limit clause test case

* feat: remove allPlusUid and replace with uid

* feat: support flink sql pattern recognition grammar

* test: flink sql pattern recognition tests

* feat: add flink sql with clause rule

* test: flink sql with clasue select tests

* feat: rebuild flink sql parser
This commit is contained in:
Hayden
2023-05-17 10:30:25 +08:00
committed by GitHub
parent fbee70cde5
commit a026ae0592
19 changed files with 8188 additions and 4486 deletions

View File

@ -273,6 +273,24 @@ ENFORCED: 'ENFORCED';
METADATA: 'METADATA';
VIRTUAL: 'VIRTUAL';
ZONE: 'ZONE';
TUMBLE: 'TUMBLE';
HOP: 'HOP';
CUMULATE: 'CUMULATE';
DESCRIPTOR: 'DESCRIPTOR';
TIMECOL: 'TIMECOL';
SIZE: 'SIZE';
OFFSET: 'OFFSET';
STEP: 'STEP';
SLIDE: 'SLIDE';
SESSION: 'SESSION';
MATCH_RECOGNIZE: 'MATCH_RECOGNIZE';
MEASURES: 'MEASURES';
PATTERN: 'PATTERN';
ONE: 'ONE';
PER: 'PER';
KW_SKIP: 'SKIP';
PAST: 'PAST';
DEFINE: 'DEFINE';
// DATA TYPE Keywords
@ -330,6 +348,8 @@ LS_BRACKET: '[';
RS_BRACKET: ']';
LR_BRACKET: '(';
RR_BRACKET: ')';
LB_BRACKET: '{';
RB_BRACKET: '}';
COMMA: ',';
SEMICOLON: ';';
AT_SIGN: '@';
@ -345,8 +365,8 @@ PENCENT_SIGN: '%';
DOUBLE_VERTICAL_SIGN: '||';
DOUBLE_HYPNEN_SIGN: '--';
SLASH_SIGN: '/';
QUESTION_MARK_SIGN: '?';
DOT_ID: '.' ID_LITERAL_FRAG;
PLUS_DOT_ID: (':' | '.') PLUS_ID_LITERAL;
STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
DIG_LITERAL: DEC_DIGIT+;
REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
@ -355,14 +375,13 @@ REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
| DEC_DIGIT+ EXPONENT_NUM_PART;
BIT_STRING: BIT_STRING_L;
ID_LITERAL: ID_LITERAL_FRAG;
PLUS_ID_LITERAL: PLUS_ID_LITERAL_FRAG;
FILE_PATH: FILE_PATH_STRING;
DOUBLE_ARROW: '=>';
fragment FILE_PATH_STRING: ([/\\] (~([/\\ ]))*)+;
fragment JAR_FILE_PARTTARN: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
fragment ID_LITERAL_FRAG: [A-Z_0-9a-z]*?[A-Z_a-z]+?[A-Z_0-9a-z]*;
fragment PLUS_ID_LITERAL_FRAG: [A-Z_0-9a-z*@#^$%&{}]*?[A-Z_a-z*@#^$%&{}]+?[A-Z_0-9a-z*@#^$%&{}]*;
fragment DEC_DIGIT: [0-9];
fragment DEC_LETTER: [A-Za-z];
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';

View File

@ -134,7 +134,7 @@ physicalColumnDefinition
;
columnName
: plusUid | expression
: uid | expression
;
columnNameList
@ -380,7 +380,7 @@ insertMulStatement
queryStatement
: valuesCaluse
| WITH withItem (COMMA withItem)* queryStatement
| withClause queryStatement
| '(' queryStatement ')'
| left=queryStatement operator=(INTERSECT | UNION | EXCEPT) ALL? right=queryStatement orderByCaluse? limitClause?
| selectClause orderByCaluse? limitClause?
@ -391,6 +391,10 @@ valuesCaluse
: VALUES expression (COMMA expression )*
;
withClause
: WITH withItem (COMMA withItem)*
;
withItem
: withItemName (LR_BRACKET columnName (COMMA columnName)* RR_BRACKET)? AS LR_BRACKET queryStatement RR_BRACKET
;
@ -401,6 +405,7 @@ withItemName
selectStatement
: selectClause fromClause whereClause? groupByClause? havingClause? windowClause?
| selectClause fromClause matchRecognizeClause
;
selectClause
@ -408,7 +413,13 @@ selectClause
;
projectItemDefinition
: expression (AS? expression)?
: overWindowItem
| expression (AS? expression)?
;
overWindowItem
: primaryExpression OVER windowSpec AS strictIdentifier
| primaryExpression OVER errorCapturingIdentifier AS strictIdentifier
;
fromClause
@ -419,6 +430,8 @@ tableExpression
: tableReference (COMMA tableReference)*
| tableExpression NATURAL? (LEFT | RIGHT | FULL | INNER)? OUTER? JOIN tableExpression joinCondition?
| tableExpression CROSS JOIN tableExpression
| inlineDataValueClause
| windoTVFClause
;
tableReference
@ -444,6 +457,46 @@ dateTimeExpression
: expression
;
inlineDataValueClause
: LR_BRACKET valuesDefinition RR_BRACKET tableAlias
;
windoTVFClause
: TABLE LR_BRACKET windowTVFExression RR_BRACKET
;
windowTVFExression
: windoTVFName LR_BRACKET windowTVFParam (COMMA windowTVFParam)* RR_BRACKET
;
windoTVFName
: TUMBLE
| HOP
| CUMULATE
;
windowTVFParam
: TABLE timeAttrColumn
| columnDescriptor
| timeIntervalExpression
| DATA DOUBLE_ARROW TABLE timeAttrColumn
| TIMECOL DOUBLE_ARROW columnDescriptor
| timeIntervalParamName DOUBLE_ARROW timeIntervalExpression
;
timeIntervalParamName
: DATA
| TIMECOL
| SIZE
| OFFSET
| STEP
| SLIDE
;
columnDescriptor
: DESCRIPTOR LR_BRACKET uid RR_BRACKET
;
joinCondition
: ON booleanExpression
| USING LR_BRACKET uid (COMMA uid)* RR_BRACKET
@ -459,29 +512,40 @@ groupByClause
groupItemDefinition
: expression
| groupWindowFunction
| LR_BRACKET RR_BRACKET
| LR_BRACKET expression (COMMA expression)* RR_BRACKET
| CUBE LR_BRACKET expression (COMMA expression)* RR_BRACKET
| ROLLUP LR_BRACKET expression (COMMA expression)* RR_BRACKET
| GROUPING SETS LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET
| groupingSetsNotaionName LR_BRACKET expression (COMMA expression)* RR_BRACKET
| groupingSets LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET
;
groupingSets
: GROUPING SETS
;
groupingSetsNotaionName
: CUBE
| ROLLUP
;
groupWindowFunction
: groupWindowFunctionName LR_BRACKET timeAttrColumn COMMA timeIntervalExpression RR_BRACKET
;
groupWindowFunctionName
: TUMBLE
| HOP
| SESSION
;
timeAttrColumn
: uid
;
havingClause
: HAVING booleanExpression
;
orderByCaluse
: ORDER BY orderItemDefition (COMMA orderItemDefition)*
;
orderItemDefition
: expression (ASC | DESC)?
;
limitClause
: LIMIT (ALL | limit=expression)
;
windowClause
: WINDOW namedWindow (',' namedWindow)*
;
@ -492,26 +556,99 @@ namedWindow
windowSpec
: name=errorCapturingIdentifier?
'('
(ORDER BY sortItem (',' sortItem)*)?
(PARTITION BY expression (',' expression)*)?
LR_BRACKET
partitionByClause?
orderByCaluse?
windowFrame?
')'
RR_BRACKET
;
sortItem
matchRecognizeClause
: MATCH_RECOGNIZE
LR_BRACKET
partitionByClause?
orderByCaluse?
measuresClause?
outputMode?
afterMatchStrategy?
patternDefination?
patternVariablesDefination
RR_BRACKET ( AS? strictIdentifier )?
;
orderByCaluse
: ORDER BY orderItemDefition (COMMA orderItemDefition)*
;
orderItemDefition
: expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))?
;
limitClause
: LIMIT (ALL | limit=expression)
;
partitionByClause
: PARTITION BY expression (COMMA expression)*
;
quantifiers
: (ASTERISK_SIGN)
| (ADD_SIGN)
| (QUESTION_MARK_SIGN)
| (LB_BRACKET DIG_LITERAL COMMA DIG_LITERAL RB_BRACKET)
| (LB_BRACKET DIG_LITERAL COMMA RB_BRACKET)
| (LB_BRACKET COMMA DIG_LITERAL RB_BRACKET)
;
measuresClause
: MEASURES projectItemDefinition (COMMA projectItemDefinition)*
;
patternDefination
: PATTERN
LR_BRACKET
patternVariable+
RR_BRACKET
withinClause?
;
patternVariable
: unquotedIdentifier quantifiers?
;
outputMode
: ALL ROWS PER MATCH
| ONE ROW PER MATCH
;
afterMatchStrategy
: AFTER MATCH KW_SKIP PAST LAST ROW
| AFTER MATCH KW_SKIP TO NEXT ROW
| AFTER MATCH KW_SKIP TO LAST unquotedIdentifier
| AFTER MATCH KW_SKIP TO FIRST unquotedIdentifier
;
patternVariablesDefination
: DEFINE projectItemDefinition (COMMA projectItemDefinition)*
;
windowFrame
: RANGE frameBound
| ROWS frameBound
: RANGE BETWEEN timeIntervalExpression frameBound
| ROWS BETWEEN DIG_LITERAL frameBound
;
frameBound
: expression PRECEDING
: PRECEDING AND CURRENT ROW
;
withinClause
: WITHIN timeIntervalExpression
;
timeIntervalExpression
: INTERVAL STRING_LITERAL ID_LITERAL
;
// expression
@ -686,10 +823,6 @@ uid
: ID_LITERAL DOT_ID*?
;
plusUid // 匹配 xxx.$xx xx:xxxx 等字符
: (ID_LITERAL | PLUS_ID_LITERAL) (DOT_ID | PLUS_DOT_ID)*?
;
withOption
: WITH tablePropertyList
;