feat: update the builtin funcs of flinksql (#102)

* style: put evrry branch on a single line

* feat: support ASYMMETRIC and SYMMETRIC for between and

* feat: support SIMILAR TO predicate

* feat: support nested boolean expression

* feat: support escape to like and similar in flink sql

* test: add where clause test cases

* feat: add reservedKeywordsUsedAsFunctionName rule

* feat: add timepointLiteral rule and improve function call grammar
This commit is contained in:
Hayden
2023-05-24 15:03:08 +08:00
committed by GitHub
parent c409b80ee1
commit 0924acf730
12 changed files with 5984 additions and 5237 deletions

View File

@ -46,6 +46,7 @@ KW_DIV: 'DIV';
KW_ENCODING: 'ENCODING';
KW_ENFORCED: 'ENFORCED';
KW_ENGINE: 'ENGINE';
KW_EPOCH: 'EPOCH';
KW_ERROR: 'ERROR';
KW_ESTIMATED_COST: 'ESTIMATED_COST';
KW_EXCEPTION: 'EXCEPTION';
@ -181,6 +182,7 @@ KW_VIRTUAL: 'VIRTUAL';
KW_WATERMARK: 'WATERMARK';
KW_WATERMARKS: 'WATERMARKS';
KW_WEEK: 'WEEK';
KW_WEEKS: 'WEEKS';
KW_WORK: 'WORK';
KW_WRAPPER: 'WRAPPER';
KW_YEARS: 'YEARS';
@ -194,13 +196,14 @@ KW_ZONE: 'ZONE';
KW_ABS: 'ABS';
KW_ALL: 'ALL';
ALLOW: 'ALLOW';
KW_ALLOW: 'ALLOW';
KW_ALTER : 'ALTER';
KW_AND: 'AND';
KW_ANY: 'ANY';
KW_ARE: 'ARE';
KW_ARRAY: 'ARRAY';
KW_AS: 'AS';
KW_ASYMMETRIC: 'ASYMMETRIC';
KW_AT: 'AT';
KW_AVG: 'AVG';
KW_BEGIN: 'BEGIN';
@ -256,6 +259,7 @@ KW_EACH: 'EACH';
KW_ELSE: 'ELSE';
KW_END: 'END';
KW_EQUALS: 'EQUALS';
KW_ESCAPE: 'ESCAPE';
KW_EXCEPT: 'EXCEPT';
KW_EXECUTE: 'EXECUTE';
KW_EXISTS: 'EXISTS';
@ -330,6 +334,7 @@ KW_PER: 'PER';
KW_PERCENT: 'PERCENT';
KW_PERIOD: 'PERIOD';
KW_POSITION: 'POSITION';
KW_POWER: 'POWER';
KW_PRIMARY: 'PRIMARY';
KW_RANGE: 'RANGE';
KW_RANK: 'RANK';
@ -345,12 +350,14 @@ KW_SECOND: 'SECOND';
KW_SELECT: 'SELECT';
KW_SET: 'SET';
KW_SHOW: 'SHOW';
KW_SIMILAR: 'SIMILAR';
KW_SKIP: 'SKIP';
KW_SMALLINT: 'SMALLINT';
KW_START: 'START';
KW_STATIC: 'STATIC';
KW_SUBSTRING: 'SUBSTRING';
KW_SUM: 'SUM';
KW_SYMMETRIC: 'SYMMETRIC';
KW_SYSTEM: 'SYSTEM';
KW_SYSTEM_TIME: 'SYSTEM_TIME';
KW_SYSTEM_USER: 'SYSTEM_USER';
@ -361,6 +368,7 @@ KW_TIME: 'TIME';
KW_TIMESTAMP: 'TIMESTAMP';
KW_TINYINT: 'TINYINT';
KW_TO: 'TO';
KW_TRAILING: 'TRAILING';
KW_TRUE: 'TRUE';
KW_TRUNCATE: 'TRUNCATE';
KW_UNION: 'UNION';
@ -425,7 +433,7 @@ DOUBLE_VERTICAL_SIGN: '||';
DOUBLE_HYPNEN_SIGN: '--';
SLASH_SIGN: '/';
QUESTION_MARK_SIGN: '?';
DOUBLE_RIGHT_ARROW: '=>';
DOUBLE_RIGHT_ARROW: '=>';
STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
DIG_LITERAL: DEC_DIGIT+;
REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+

View File

@ -445,7 +445,7 @@ tableReference
tablePrimary
: KW_TABLE? tablePath systemTimePeriod? (KW_AS? correlationName)?
| KW_LATERAL KW_TABLE LR_BRACKET functionName LR_BRACKET expression (COMMA expression)* RR_BRACKET RR_BRACKET
| KW_LATERAL KW_TABLE LR_BRACKET functionName LR_BRACKET functionParam (COMMA functionParam)* RR_BRACKET RR_BRACKET
| KW_LATERAL? LR_BRACKET queryStatement RR_BRACKET
| KW_UNNEST LR_BRACKET expression RR_BRACKET
;
@ -664,24 +664,27 @@ booleanExpression
| valueExpression predicate? #predicated
| left=booleanExpression operator=KW_AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=KW_OR right=booleanExpression #logicalBinary
| booleanExpression KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE | KW_UNKNOWN | KW_NULL) #logicalNested
;
predicate
: KW_NOT? kind=KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression
: KW_NOT?
kind=KW_BETWEEN (KW_ASYMMETRIC | KW_SYMMETRIC)?
lower=valueExpression KW_AND
upper=valueExpression
| KW_NOT? kind=KW_IN '(' expression (',' expression)* ')'
| KW_NOT? kind=KW_IN '(' queryStatement ')'
| kind=KW_EXISTS '(' queryStatement ')'
| KW_NOT? kind=KW_RLIKE pattern=valueExpression
| KW_NOT? kind=KW_LIKE quantifier=(KW_ANY | KW_ALL) ('('')' | '(' expression (',' expression)* ')')
| KW_NOT? kind=KW_LIKE pattern=valueExpression
| KW_IS KW_NOT? kind=KW_NULL
| KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE)
| likePredicate
| KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE | KW_UNKNOWN | KW_NULL)
| KW_IS KW_NOT? kind=KW_DISTINCT KW_FROM right=valueExpression
| KW_NOT? kind=KW_SIMILAR KW_TO right=valueExpression (KW_ESCAPE stringLiteral)?
;
likePredicate
: KW_NOT? kind=KW_LIKE quantifier=(KW_ANY | KW_ALL) ('('')' | '(' expression (',' expression)* ')')
| KW_NOT? kind=KW_LIKE pattern=valueExpression
| KW_NOT? kind=KW_LIKE pattern=valueExpression (KW_ESCAPE stringLiteral)?
;
valueExpression
@ -708,7 +711,7 @@ primaryExpression
| uid '.' '*' #star
// | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' queryStatement ')' #subqueryExpression
| functionName '(' (setQuantifier? expression (',' expression)*)? ')' #functionCall
| functionName '(' (setQuantifier? functionParam (',' functionParam)*)? ')' #functionCall
// | identifier '->' expression #lambda
// | '(' identifier (',' identifier)+ ')' '->' expression #lambda
| value=primaryExpression LS_BRACKET index=valueExpression RS_BRACKET #subscript
@ -725,16 +728,22 @@ primaryExpression
;
functionName
: reservedKeywords
: reservedKeywordsUsedAsFuncName
| nonReservedKeywords
| uid
;
functionParam
: reservedKeywordsUsedAsFuncParam
| timeIntervalUnit
| timePointUnit
| expression
;
dereferenceDefinition
: uid
;
// base common
correlationName
@ -754,7 +763,7 @@ errorCapturingMultiUnitsInterval
;
multiUnitsInterval
: (intervalValue intervalTimeUnit)+
: (intervalValue timeIntervalUnit)+
;
errorCapturingUnitToUnitInterval
@ -762,7 +771,7 @@ errorCapturingUnitToUnitInterval
;
unitToUnitInterval
: value=intervalValue from=intervalTimeUnit KW_TO to=intervalTimeUnit
: value=intervalValue from=timeIntervalUnit KW_TO to=timeIntervalUnit
;
intervalValue
@ -770,11 +779,6 @@ intervalValue
| STRING_LITERAL
;
intervalTimeUnit // TODO: 需要整理 interval 时间粒度比如 SECOND、DAY
: identifier
| reservedKeywords
;
columnAlias
: KW_AS? identifier identifierList?
;
@ -853,34 +857,62 @@ tablePropertyValue
;
logicalOperator
: KW_AND | '&' '&' | KW_OR | '|' '|'
: KW_AND
| '&' '&'
| KW_OR
| '|' '|'
;
comparisonOperator
: '=' | '>' | '<' | '<' '=' | '>' '='
| '<' '>' | '!' '=' | '<' '=' '>'
: '='
| '>'
| '<'
| '<' '='
| '>' '='
| '<' '>'
| '!' '='
| '<' '=' '>'
;
bitOperator
: '<' '<' | '>' '>' | '&' | '^' | '|'
: '<' '<'
| '>' '>'
| '&'
| '^'
| '|'
;
mathOperator
: '*' | SLASH_SIGN | PENCENT_SIGN | KW_DIV | '+' | '-' | DOUBLE_HYPNEN_SIGN
: '*'
| SLASH_SIGN
| PENCENT_SIGN
| KW_DIV
| '+'
| '-'
| DOUBLE_HYPNEN_SIGN
;
unaryOperator
: '!' | '~' | ADD_SIGN | '-' | KW_NOT
: '!'
| '~'
| ADD_SIGN
| '-'
| KW_NOT
;
constant
: stringLiteral // 引号包含的字符串
| decimalLiteral // 整数
| timeIntervalExpression // KW_INTERVAL keywords
| HYPNEN_SIGN decimalLiteral // 负整数
: timeIntervalExpression
| timePointLiteral
| stringLiteral // 引号包含的字符串
| HYPNEN_SIGN? decimalLiteral // 正/负整数
| booleanLiteral // 布尔值
| REAL_LITERAL // 小数
| BIT_STRING
| KW_NOT? KW_NULL // 空 | 非空
| KW_NOT? KW_NULL // 空 | 非空
;
timePointLiteral
: timePointUnit stringLiteral
;
stringLiteral
@ -899,16 +931,99 @@ setQuantifier
| KW_ALL
;
timePointUnit
: KW_YEAR
| KW_QUARTER
| KW_MONTH
| KW_WEEK
| KW_DAY
| KW_HOUR
| KW_MINUTE
| KW_SECOND
| KW_MILLISECOND
| KW_MICROSECOND
;
timeIntervalUnit
: KW_MILLENNIUM
| KW_CENTURY
| KW_DECADE
| KW_YEAR
| KW_YEARS
| KW_QUARTER
| KW_MONTH
| KW_MONTHS
| KW_WEEK
| KW_WEEKS
| KW_DAY
| KW_DAYS
| KW_HOUR
| KW_HOURS
| KW_MINUTE
| KW_MINUTES
| KW_SECOND
| KW_SECONDS
| KW_MILLISECOND
| KW_MICROSECOND
| KW_NANOSECOND
| KW_EPOCH
;
reservedKeywordsUsedAsFuncParam
: KW_LEADING
| KW_TRAILING
| KW_BOTH
| KW_ALL
| KW_DISTINCT
| ASTERISK_SIGN
;
reservedKeywordsUsedAsFuncName
: KW_ABS
| KW_ARRAY
| KW_AVG
| KW_CAST
| KW_CEIL
| KW_COALESCE
| KW_COLLECT
| KW_COUNT
| KW_DATE
| KW_GROUPING
| KW_HOUR
| KW_IF
| KW_LAG
| KW_LEFT
| KW_MAP
| KW_MINUTE
| KW_MONTH
| KW_OVERLAY
| KW_POSITION
| KW_POWER
| KW_QUARTER
| KW_RANK
| KW_RIGHT
| KW_SECOND
| KW_SUBSTRING
| KW_SUM
| KW_TIME
| KW_TIMESTAMP
| KW_TRUNCATE
| KW_UPPER
| KW_WEEK
| KW_YEAR
;
reservedKeywords
: KW_ABS
| KW_ALL
| ALLOW
| KW_ALLOW
| KW_ALTER
| KW_AND
| KW_ANY
| KW_ARE
| KW_ARRAY
| KW_AS
| KW_ASYMMETRIC
| KW_AT
| KW_AVG
| KW_BEGIN
@ -1053,12 +1168,14 @@ reservedKeywords
| KW_SELECT
| KW_SET
| KW_SHOW
| KW_SIMILAR
| KW_SKIP
| KW_SMALLINT
| KW_START
| KW_STATIC
| KW_SUBSTRING
| KW_SUM
| KW_SYSTEM_TIME
| KW_SYSTEM
| KW_SYSTEM_TIME
| KW_SYSTEM_USER