fix: correct the grammar usage, especially in the parts targeting javascript (#109)

* build: ignore gen folder

* fix: correct the grammar when targeting Typescript

* fix: move base Lexer and fix javascript syntax

* fix: correct the usage of Javascript in grammar
This commit is contained in:
Ziv
2023-05-24 15:07:53 +08:00
committed by GitHub
parent 9c82a5d248
commit eeb111b5c5
23 changed files with 231 additions and 215 deletions

View File

@ -178,14 +178,14 @@ PARAM
Operator
: ((OperatorCharacter | ('+' | '-'
{checkLA('-')}?)+ (OperatorCharacter | '/'
{checkLA('*')}?) | '/'
{checkLA('*')}?)+ | // special handling for the single-character operators + and -
{this.checkLA('-')}?)+ (OperatorCharacter | '/'
{this.checkLA('*')}?) | '/'
{this.checkLA('*')}?)+ | // special handling for the single-character operators + and -
[+-])
//TODO somehow rewrite this part without using Actions
{
HandleLessLessGreaterGreater();
this.HandleLessLessGreaterGreater();
}
;
/* This rule handles operators which end with + or -, and sets the token type to Operator. It is comprised of four
@ -202,9 +202,9 @@ Operator
OperatorEndingWithPlusMinus
: (OperatorCharacterNotAllowPlusMinusAtEnd | '-'
{checkLA('-')}? | '/'
{checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-'
{checkLA('-')}?)+ -> type (Operator)
{this.checkLA('-')}? | '/'
{this.checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-'
{this.checkLA('-')}?)+ -> type (Operator)
;
// Each of the following fragment rules omits the +, -, and / characters, which must always be handled in a special way
@ -2200,11 +2200,11 @@ fragment IdentifierStartChar
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
| // these are the letters above 0xFF which only need a single UTF-16 code unit
[\u0100-\uD7FF\uE000-\uFFFF]
{charIsLetter()}?
{this.charIsLetter()}?
| // letters which require multiple UTF-16 code units
[\uD800-\uDBFF] [\uDC00-\uDFFF]
{
CheckIfUtf32Letter()
this.CheckIfUtf32Letter()
}?
;
@ -2315,7 +2315,7 @@ UnterminatedUnicodeEscapeStringConstant
BeginDollarStringConstant
: '$' Tag? '$'
{pushTag();} -> pushMode (DollarQuotedStringMode)
{this.pushTag();} -> pushMode (DollarQuotedStringMode)
;
/* "The tag, if any, of a dollar-quoted string follows the same rules as an
* unquoted identifier, except that it cannot contain a dollar sign."
@ -2366,7 +2366,7 @@ Integral
NumericFail
: Digits '..'
{HandleNumericFail();}
{this.HandleNumericFail();}
;
Numeric
@ -2424,7 +2424,7 @@ UnterminatedBlockComment
// Optional assertion to make sure this rule is working as intended
{
UnterminatedBlockCommentDebugAssert();
this.UnterminatedBlockCommentDebugAssert();
}
;
//
@ -2538,7 +2538,6 @@ DollarText
EndDollarStringConstant
: ('$' Tag? '$')
{isTag()}?
{popTag();} -> popMode
{this.isTag()}?
{this.popTag();} -> popMode
;

View File

@ -1957,10 +1957,8 @@ aggregate_with_argtypes_list
createfunc_opt_list
: createfunc_opt_item+
{
ParseRoutineBody(_localctx);
}
// | createfunc_opt_list createfunc_opt_item
this.ParseRoutineBody(localctx);
}
;
common_func_opt_item
@ -4562,7 +4560,6 @@ from pl_gram.y, line ~2982
* at least we need not worry about it appearing as an identifier.
*/
// | INTO
| LATERAL_P
| LEADING
@ -4606,7 +4603,8 @@ from pl_gram.y, line ~2982
/*PLSQL grammar */
/************************************************************************************************************************************************************/ pl_function
/************************************************************************************************************************************************************/
pl_function
: comp_options pl_block opt_semi
;
@ -4925,7 +4923,6 @@ exit_type
: EXIT
| CONTINUE_P
;
//todo implement RETURN statement according to initial grammar line 1754
stmt_return
: RETURN (NEXT sql_expression | QUERY (EXECUTE a_expr opt_for_using_expression | selectstmt) | opt_return_result) SEMI
@ -5324,4 +5321,3 @@ opt_returning_clause_into
: INTO opt_strict into_target
|
;

View File

@ -24,10 +24,6 @@ options {
superClass=PlSqlBaseLexer;
}
@lexer::postinclude {
#include <PlSqlBaseLexer.h>
}
ABORT: 'ABORT';
ABS: 'ABS';
ACCESS: 'ACCESS';
@ -2342,17 +2338,17 @@ INTRODUCER: '_';
SINGLE_LINE_COMMENT: '--' ~('\r' | '\n')* NEWLINE_EOF -> channel(HIDDEN);
MULTI_LINE_COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
// https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve034.htm#SQPUG054
REMARK_COMMENT: 'REM' {IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN);
REMARK_COMMENT: 'REM' {this.IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN);
// https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve032.htm#SQPUG052
PROMPT_MESSAGE: 'PRO' {IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
PROMPT_MESSAGE: 'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
// TODO: should starts with newline
START_CMD
//: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF
// https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
// https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
: '@' {IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF
: '@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF
;
REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*;
@ -2366,4 +2362,4 @@ fragment QUESTION_MARK : '?';
fragment SIMPLE_LETTER : [A-Z];
fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+;
fragment NEWLINE : '\r'? '\n';
fragment SPACE : [ \t];
fragment SPACE : [ \t];

View File

@ -25,10 +25,6 @@ options {
superClass=PlSqlBaseParser;
}
@parser::postinclude {
#include <PlSqlBaseParser.h>
}
program: sql_script EOF;
sql_script
@ -2254,7 +2250,7 @@ partial_database_recovery
;
partial_database_recovery_10g
: {isVersion10()}? STANDBY
: {this.isVersion10()}? STANDBY
( TABLESPACE tablespace (',' tablespace)*
| DATAFILE CHAR_STRING | filenumber (',' CHAR_STRING | filenumber)*
)
@ -6760,4 +6756,4 @@ numeric_function_name
| NVL
| ROUND
| SUM
;
;

View File

@ -17,57 +17,55 @@
grammar SparkSql;
@parser::members {
/**
* When false, INTERSECT is given the greater precedence over the other set
* operations (UNION, EXCEPT and MINUS) as per the SQL standard.
*/
// public boolean legacy_setops_precedence_enbled = false;
/**
* When false, a literal with an exponent would be converted into
* double type rather than decimal type.
*/
// public boolean legacy_exponent_literal_as_decimal_enabled = false;
global.legacy_exponent_literal_as_decimal_enabled = false;
/**
* When true, the behavior of keywords follows ANSI SQL standard.
*/
// public boolean SQL_standard_keyword_behavior = false;
global.legacy_setops_precedence_enbled = false;
global.legacy_exponent_literal_as_decimal_enabled = false;
global.SQL_standard_keyword_behavior = false;
/**
* When false, INTERSECT is given the greater precedence over the other set
* operations (UNION, EXCEPT and MINUS) as per the SQL standard.
*/
public legacy_setops_precedence_enbled = false;
/**
* When false, a literal with an exponent would be converted into
* double type rather than decimal type.
*/
public legacy_exponent_literal_as_decimal_enabled = false;
/**
* When true, the behavior of keywords follows ANSI SQL standard.
*/
public SQL_standard_keyword_behavior = false;
}
@lexer::members {
var ctx = this
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
global.isValidDecimal = function() {
let nextChar = ctx._input.LA(1);
return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_')
}
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
isValidDecimal() {
let nextChar = this.fromCodePoint(this._input.LA(1));
return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_')
}
/**
* This method will be called when we see '/*' and try to match it as a bracketed comment.
* If the next character is '+', it should be parsed as hint later, and we cannot match
* it as a bracketed comment.
*
* Returns true if the next character is '+'.
*/
global.isHint = function() {
let nextChar = ctx._input.LA(1);
return nextChar == '+'
}
/**
* This method will be called when we see '/*' and try to match it as a bracketed comment.
* If the next character is '+', it should be parsed as hint later, and we cannot match
* it as a bracketed comment.
*
* Returns true if the next character is '+'.
*/
isHint() {
let nextChar = this.fromCodePoint(this._input.LA(1));
return nextChar == '+'
}
fromCodePoint(codePoint) {
return String.fromCodePoint(codePoint);
}
}
program
@ -471,11 +469,11 @@ multiInsertQueryBody
queryTerm
: queryPrimary #queryTermDefault
| left=queryTerm {legacy_setops_precedence_enbled}?
| left=queryTerm {this.legacy_setops_precedence_enbled}?
operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
| left=queryTerm {!legacy_setops_precedence_enbled}?
| left=queryTerm {!this.legacy_setops_precedence_enbled}?
operator=INTERSECT setQuantifier? right=queryTerm #setOperation
| left=queryTerm {!legacy_setops_precedence_enbled}?
| left=queryTerm {!this.legacy_setops_precedence_enbled}?
operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
;
@ -928,10 +926,10 @@ windowSpec
;
windowFrame
: frameType=RANGE start=frameBound
| frameType=ROWS start=frameBound
| frameType=RANGE BETWEEN start=frameBound AND end=frameBound
| frameType=ROWS BETWEEN start=frameBound AND end=frameBound
: frameType=RANGE frameStart=frameBound
| frameType=ROWS frameStart=frameBound
| frameType=RANGE BETWEEN frameStart=frameBound AND end=frameBound
| frameType=ROWS BETWEEN frameStart=frameBound AND end=frameBound
;
frameBound
@ -970,14 +968,14 @@ errorCapturingIdentifierExtra
identifier
: strictIdentifier
| {!SQL_standard_keyword_behavior}? strictNonReserved
| {!this.SQL_standard_keyword_behavior}? strictNonReserved
;
strictIdentifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
| {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
| {this.SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
| {!this.SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
;
quotedIdentifier
@ -985,9 +983,9 @@ quotedIdentifier
;
number
: {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
| {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral
| {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
: {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
| {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral
| {this.legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
| MINUS? INTEGER_VALUE #integerLiteral
| MINUS? BIGINT_LITERAL #bigIntLiteral
| MINUS? SMALLINT_LITERAL #smallIntLiteral
@ -1004,7 +1002,7 @@ alterColumnAction
| setOrDrop=(SET | DROP) NOT NULL
;
// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
// When `this.SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
// - Reserved keywords:
// Keywords that are reserved and can't be used as identifiers for table, view, column,
// function, alias, etc.
@ -1770,26 +1768,26 @@ INTEGER_VALUE
EXPONENT_VALUE
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT {this.isValidDecimal()}?
;
DECIMAL_VALUE
: DECIMAL_DIGITS {isValidDecimal()}?
: DECIMAL_DIGITS {this.isValidDecimal()}?
;
FLOAT_LITERAL
: DIGIT+ EXPONENT? 'F'
| DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'F' {this.isValidDecimal()}?
;
DOUBLE_LITERAL
: DIGIT+ EXPONENT? 'D'
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'D' {this.isValidDecimal()}?
;
BIGDECIMAL_LITERAL
: DIGIT+ EXPONENT? 'BD'
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
| DECIMAL_DIGITS EXPONENT? 'BD' {this.isValidDecimal()}?
;
IDENTIFIER
@ -1826,7 +1824,7 @@ SIMPLE_COMMENT
;
BRACKETED_COMMENT
: '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
: '/*' {!this.isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
;
WS