Feat/spark g4 (#168)

* feat: spark g4 test

* fix: fixed build lint

---------

Co-authored-by: liuyi <liuyi@dtstack.com>
Co-authored-by: dilu <dilu@dtstack.com>
This commit is contained in:
Frank
2023-10-08 14:23:06 +08:00
committed by GitHub
parent 0a9a7d15d3
commit 05da14d007
18 changed files with 26129 additions and 25144 deletions

View File

@ -26,44 +26,6 @@ lexer grammar SparkSqlLexer;
*/
public has_unclosed_bracketed_comment = false;
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public isValidDecimal() {
const nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
nextChar == '_') {
return false;
} else {
return true;
}
}
/**
* This method will be called when we see '/*' and try to match it as a bracketed comment.
* If the next character is '+', it should be parsed as hint later, and we cannot match
* it as a bracketed comment.
*
* Returns true if the next character is '+'.
*/
public isHint() {
const nextChar = _input.LA(1);
if (nextChar == '+') {
return true;
} else {
return false;
}
}
/**
* This method will be called when the character stream ends and try to find out the
* unclosed bracketed comment.
@ -71,7 +33,7 @@ lexer grammar SparkSqlLexer;
* and we set the flag and fail later.
*/
public markUnclosedComment() {
has_unclosed_bracketed_comment = true;
this.has_unclosed_bracketed_comment = true;
}
}
@ -488,26 +450,26 @@ INTEGER_VALUE
EXPONENT_VALUE
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT
;
DECIMAL_VALUE
: DECIMAL_DIGITS {isValidDecimal()}?
: DECIMAL_DIGITS
;
FLOAT_LITERAL
: DIGIT+ EXPONENT? 'F'
| DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'F'
;
DOUBLE_LITERAL
: DIGIT+ EXPONENT? 'D'
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'D'
;
BIGDECIMAL_LITERAL
: DIGIT+ EXPONENT? 'BD'
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'BD'
;
IDENTIFIER
@ -540,7 +502,7 @@ SIMPLE_COMMENT
;
BRACKETED_COMMENT
: '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
: '/*' ( BRACKETED_COMMENT | . )*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN)
;
WS

View File

@ -1166,11 +1166,15 @@ windowSpec
RIGHT_PAREN
;
/**
* replace start identifier with start_ in grammar.
* https://github.com/tunnelvisionlabs/antlr4ts/issues/417
*/
windowFrame
: frameType=KW_RANGE start=frameBound
| frameType=KW_ROWS start=frameBound
| frameType=KW_RANGE KW_BETWEEN start=frameBound KW_AND end=frameBound
| frameType=KW_ROWS KW_BETWEEN start=frameBound KW_AND end=frameBound
: frameType=KW_RANGE start_=frameBound
| frameType=KW_ROWS start_=frameBound
| frameType=KW_RANGE KW_BETWEEN start_=frameBound KW_AND end=frameBound
| frameType=KW_ROWS KW_BETWEEN start_=frameBound KW_AND end=frameBound
;
frameBound