From d7d32a382404df8917282d835134f50b1f3a6eff Mon Sep 17 00:00:00 2001 From: Erindcl Date: Tue, 17 Nov 2020 19:57:12 +0800 Subject: [PATCH] feat(fink): refactoring selectStatement --- src/grammar/flinksql/FlinkSqlLexer.g4 | 5 + src/grammar/flinksql/FlinkSqlParser.g4 | 296 ++++++++++++------------- 2 files changed, 143 insertions(+), 158 deletions(-) diff --git a/src/grammar/flinksql/FlinkSqlLexer.g4 b/src/grammar/flinksql/FlinkSqlLexer.g4 index 7b5e027..9f5e5b1 100644 --- a/src/grammar/flinksql/FlinkSqlLexer.g4 +++ b/src/grammar/flinksql/FlinkSqlLexer.g4 @@ -320,6 +320,8 @@ BIT_XOR_OP: '^'; // Constructors symbols DOT: '.'; +LS_BRACKET: '['; +RS_BRACKET: ']'; LR_BRACKET: '('; RR_BRACKET: ')'; COMMA: ','; @@ -333,6 +335,7 @@ DOUBLE_QUOTE_SYMB: '"'; REVERSE_QUOTE_SYMB: '`'; COLON_SYMB: ':'; ASTERISK_SIGN: '*'; +UNDERLINE_SIGN: '_'; STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; DECIMAL_LITERAL: DEC_DIGIT+; REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ @@ -340,10 +343,12 @@ REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ | (DEC_DIGIT+)? '.' (DEC_DIGIT+ EXPONENT_NUM_PART) | DEC_DIGIT+ EXPONENT_NUM_PART; BIT_STRING: BIT_STRING_L; +IDENTIFIER_BASE: (DEC_LETTER | DEC_DIGIT | UNDERLINE_SIGN)+; fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+; fragment ID_LITERAL: [A-Z_0-9a-z]*?[A-Z_a-z]+?[A-Z_0-9a-z]*; fragment DEC_DIGIT: [0-9]; +fragment DEC_LETTER: [A-Za-z]; fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; diff --git a/src/grammar/flinksql/FlinkSqlParser.g4 b/src/grammar/flinksql/FlinkSqlParser.g4 index c7eb226..4738f76 100644 --- a/src/grammar/flinksql/FlinkSqlParser.g4 +++ b/src/grammar/flinksql/FlinkSqlParser.g4 @@ -126,122 +126,6 @@ dropFunction ; -// Select statements - -queryStatement - : valuesDefinition | selectStatements queryOrderByDefinition? queryLimitDefinition? queryOffsetDefinition? queryFetchDefinition? - ; - -selectStatements - : selectStatement - | selectWithoutFromDefinition - // | queryStatement UNION ALL? queryStatement - // | queryStatement EXCEPT queryStatement - // | queryStatement INTERSECT queryStatement - ; - -selectStatement - : SELECT (ALL | DISTINCT)? - (ASTERISK_SIGN | projectItemDefinition (COMMA projectItemDefinition)*) - FROM tableExpression - (WHERE expression)? - (GROUP BY groupItemDefinition (COMMA groupItemDefinition)*) - (HAVING expression)? - // (WINDOW windowName AS windowSpec (COMMA windowName AS windowSpec)*)? - ; - -projectItemDefinition - : expression (AS? uid)? | uid '.' '*' - ; - -tableExpression - : tableReference (COMMA tableReference)* - | tableExpression NATURAL? (LEFT | RIGHT | FULL)? JOIN tableExpression joinCondition? - ; - -tableReference - : tablePrimary matchRecognize? (AS? uid (LR_BRACKET uid (COMMA uid)* RR_BRACKET)?)? - ; - -matchRecognize - : - ; - -tablePrimary - : TABLE? uid dynamicTableOptions? - | LATERAL TABLE LR_BRACKET uid LR_BRACKET expression (COMMA expression)* RR_BRACKET RR_BRACKET - | UNNEST LR_BRACKET expression RR_BRACKET - ; - -dynamicTableOptions - : - ; - -joinCondition - : ON booleanExpression | USING LR_BRACKET uid (COMMA uid)* RR_BRACKET - ; - -booleanExpression - : - ; - -groupItemDefinition - : expression - | LR_BRACKET RR_BRACKET - | LR_BRACKET expression (COMMA expression)* RR_BRACKET - | CUBE LR_BRACKET expression (COMMA expression)* RR_BRACKET - | ROLLUP LR_BRACKET expression (COMMA expression)* RR_BRACKET - | GROUPING SETS LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET - ; - -// windowRef -// : windowName | windowSpec -// ; - -// windowSpec -// : windowName -// LR_BRACKET -// (ORDER BY orderItem (COMMA orderItem)*)? -// (PARTITION BY expression (COMMA expression)*) -// ( -// RANGE numericOrIntervalExpression PRECEDING -// | ROWS numericExpression PRECEDING -// )? -// RR_BRACKET -// ; - -selectWithoutFromDefinition - : SELECT (ALL | DISTINCT)? (ASTERISK_SIGN | projectItem (COMMA projectItem)*) - ; - -projectItem - : expression (AS? uid)? | uid '.' '*' - ; - -queryOrderByDefinition - : ORDER BY orderItemDefition (COMMA orderItemDefition)* - ; - -orderItemDefition - : expression (ASC | DESC) - ; - -queryLimitDefinition - : LIMIT (countDefinition | ALL) - ; - -countDefinition - : - ; - -queryOffsetDefinition // OFFSET start (ROW | ROWS) - : - ; - -queryFetchDefinition // FETCH (FIRST | NEXT) countDefinition? (ROW | ROWS) ONLY - : - ; - // Insert statements insertStatement @@ -272,8 +156,134 @@ allValueDifinition : stringLiteral | booleanLiteral | DEC_DIGIT | NULL ; + +// Select statements + +queryStatement + : + ; + +selectStatement + : SELECT setQuantifier? + (ASTERISK_SIGN | projectItemDefinition (COMMA projectItemDefinition)*) + FROM tableExpression + ; + +projectItemDefinition + : expression (AS? uid)? | uid '.' '*' + ; + +tableExpression + : tableReference (COMMA tableReference)* + ; + +tableReference + : tablePrimary tableAlias + ; + +tablePrimary + : TABLE? uid + ; + +// expression + +expression + : booleanExpression + ; + +booleanExpression + : NOT booleanExpression #logicalNot + // | EXISTS '(' query ')' #exists + | valueExpression predicate? #predicated + | left=booleanExpression operator=AND right=booleanExpression #logicalBinary + | left=booleanExpression operator=OR right=booleanExpression #logicalBinary + ; + +predicate + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN '(' expression (',' expression)* ')' + // | NOT? kind=IN '(' query ')' + | NOT? kind=RLIKE pattern=valueExpression + | NOT? kind=LIKE quantifier=(ANY | ALL) ('('')' | '(' expression (',' expression)* ')') + | NOT? kind=LIKE pattern=valueExpression + | IS NOT? kind=NULL + | IS NOT? kind=(TRUE | FALSE) + | IS NOT? kind=DISTINCT FROM right=valueExpression + ; + +valueExpression + : primaryExpression #valueExpressionDefault + | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary + | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary + | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary + | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary + | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary + | left=valueExpression comparisonOperator right=valueExpression #comparison + ; + +primaryExpression + : CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase + | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase + // | CAST '(' expression AS dataType ')' #cast + // | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct + | FIRST '(' expression (IGNORE NULLS)? ')' #first + | LAST '(' expression (IGNORE NULLS)? ')' #last + | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position + | constant #constantDefault + | ASTERISK #star + // | qualifiedName '.' ASTERISK #star + // | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor + // | '(' query ')' #subqueryExpression + // | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' + // (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)? #functionCall + // | identifier '->' expression #lambda + // | '(' identifier (',' identifier)+ ')' '->' expression #lambda + | value=primaryExpression LS_BRACKET index=valueExpression RS_BRACKET #subscript + // | identifier #columnReference + // | base=primaryExpression '.' fieldName=identifier #dereference + | '(' expression ')' #parenthesizedExpression + // | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract + // | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression + // ((FOR | ',') len=valueExpression)? ')' #substring + // | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? + // FROM srcStr=valueExpression ')' #trim + // | OVERLAY '(' input=valueExpression PLACING replace=valueExpression + // FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay + ; + + // base common +tableAlias + : (AS? strictIdentifier identifierList?)? + ; + +identifierList + : '(' identifierSeq ')' + ; + +identifierSeq + : identifier (COMMA identifier)* + ; + +identifier + : strictIdentifier + ; + +strictIdentifier + : IDENTIFIER_BASE #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + ; + +quotedIdentifier + : STRING_LITERAL + ; + +whenClause + : WHEN condition=expression THEN result=expression + ; + uidList : uid (',' uid)* ; @@ -298,43 +308,6 @@ keyValueDefinition : DOUBLE_QUOTE_ID EQUAL_SYMBOL DOUBLE_QUOTE_ID ; -expressions - : expression (',' expression)* - ; - -// Expressions, predicates - -// Simplified approach for expression -expression - : notOperator=(NOT | '!') expression #notExpression - | expression logicalOperator expression #logicalExpression - | predicate IS NOT? testValue=(TRUE | FALSE) #isExpression - | predicate #predicateExpression - ; - -predicate - : predicate NOT? IN '(' (selectStatement | expressions) ')' #inPredicate - | left=predicate comparisonOperator right=predicate #binaryComparasionPredicate - | predicate comparisonOperator - quantifier=(ALL | ANY) '(' selectStatement ')' #subqueryComparasionPredicate - | predicate NOT? BETWEEN predicate AND predicate #betweenPredicate - | predicate NOT? LIKE predicate #likePredicate - | expressionAtom #expressionAtomPredicate - ; - -expressionAtom - : constant #constantExpressionAtom - | fullColumnName #fullColumnNameExpressionAtom - | unaryOperator expressionAtom #unaryExpressionAtom - | BINARY expressionAtom #binaryExpressionAtom - | '(' expression (',' expression)* ')' #nestedExpressionAtom - | ROW '(' expression (',' expression)+ ')' #nestedRowExpressionAtom - | EXISTS '(' selectStatement ')' #existsExpessionAtom - | '(' selectStatement ')' #subqueryExpessionAtom - | left=expressionAtom bitOperator right=expressionAtom #bitExpressionAtom - | left=expressionAtom mathOperator right=expressionAtom #mathExpressionAtom - ; - logicalOperator : AND | '&' '&' | OR | '|' '|' ; @@ -360,11 +333,13 @@ fullColumnName ; constant - : stringLiteral | decimalLiteral - | '-' decimalLiteral - | booleanLiteral - | REAL_LITERAL | BIT_STRING - | NOT? NULL + : stringLiteral // 引号包含的字符串 + | decimalLiteral // 整数 + | '-' decimalLiteral // 负整数 + | booleanLiteral // 布尔值 + | REAL_LITERAL // 小数 + | BIT_STRING + | NOT? NULL // 空 | 非空 ; stringLiteral @@ -377,3 +352,8 @@ decimalLiteral booleanLiteral : TRUE | FALSE; + +setQuantifier + : DISTINCT + | ALL + ;