fix: correct the grammar usage, especially in the parts targeting javascript (#109)

* build: ignore gen folder * fix: correct the grammar when targeting Typescript * fix: move base Lexer and fix javascript syntax * fix: correct the usage of Javascript in grammar
2023-05-24 15:07:53 +08:00
parent 9c82a5d248
commit eeb111b5c5
23 changed files with 231 additions and 215 deletions
--- a/src/grammar/pgsql/PostgreSQLLexer.g4
+++ b/src/grammar/pgsql/PostgreSQLLexer.g4
@ -178,14 +178,14 @@ PARAM

 Operator
   : ((OperatorCharacter | ('+' | '-'
-   {checkLA('-')}?)+ (OperatorCharacter | '/'
-   {checkLA('*')}?) | '/'
-   {checkLA('*')}?)+ | // special handling for the single-character operators + and -
+   {this.checkLA('-')}?)+ (OperatorCharacter | '/'
+   {this.checkLA('*')}?) | '/'
+   {this.checkLA('*')}?)+ | // special handling for the single-character operators + and -
   [+-])
   //TODO somehow rewrite this part without using Actions

   {
-    HandleLessLessGreaterGreater();
+    this.HandleLessLessGreaterGreater();
   }
   ;
 /* This rule handles operators which end with + or -, and sets the token type to Operator. It is comprised of four
@ -202,9 +202,9 @@ Operator

 OperatorEndingWithPlusMinus
   : (OperatorCharacterNotAllowPlusMinusAtEnd | '-'
-   {checkLA('-')}? | '/'
-   {checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-'
-   {checkLA('-')}?)+ -> type (Operator)
+   {this.checkLA('-')}? | '/'
+   {this.checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-'
+   {this.checkLA('-')}?)+ -> type (Operator)
   ;
   // Each of the following fragment rules omits the +, -, and / characters, which must always be handled in a special way

@ -2200,11 +2200,11 @@ fragment IdentifierStartChar
   [\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
   | // these are the letters above 0xFF which only need a single UTF-16 code unit
   [\u0100-\uD7FF\uE000-\uFFFF]
-   {charIsLetter()}?
+   {this.charIsLetter()}?
   | // letters which require multiple UTF-16 code units
   [\uD800-\uDBFF] [\uDC00-\uDFFF]
   {
-    CheckIfUtf32Letter()
+    this.CheckIfUtf32Letter()
   }?

   ;
@ -2315,7 +2315,7 @@ UnterminatedUnicodeEscapeStringConstant

 BeginDollarStringConstant
   : '$' Tag? '$'
-   {pushTag();} -> pushMode (DollarQuotedStringMode)
+   {this.pushTag();} -> pushMode (DollarQuotedStringMode)
   ;
 /* "The tag, if any, of a dollar-quoted string follows the same rules as an
 * unquoted identifier, except that it cannot contain a dollar sign."
@ -2366,7 +2366,7 @@ Integral

 NumericFail
   : Digits '..'
-   {HandleNumericFail();}
+   {this.HandleNumericFail();}
   ;

 Numeric
@ -2424,7 +2424,7 @@ UnterminatedBlockComment
   // Optional assertion to make sure this rule is working as intended

   {
-            UnterminatedBlockCommentDebugAssert();
+      this.UnterminatedBlockCommentDebugAssert();
   }
   ;
   //
@ -2538,7 +2538,6 @@ DollarText

 EndDollarStringConstant
   : ('$' Tag? '$')
-   {isTag()}?
-   {popTag();} -> popMode
+   {this.isTag()}?
+   {this.popTag();} -> popMode
   ;
-
--- a/src/grammar/pgsql/PostgreSQLParser.g4
+++ b/src/grammar/pgsql/PostgreSQLParser.g4
@ -1957,10 +1957,8 @@ aggregate_with_argtypes_list
 createfunc_opt_list
   : createfunc_opt_item+
   {
-                ParseRoutineBody(_localctx);
-            }
-   //                    | createfunc_opt_list createfunc_opt_item
-
+      this.ParseRoutineBody(localctx);
+   }
   ;

 common_func_opt_item
@ -4562,7 +4560,6 @@ from pl_gram.y, line ~2982
 	 * at least we need not worry about it appearing as an identifier.
 */

-
   //                 | INTO
   | LATERAL_P
   | LEADING
@ -4606,7 +4603,8 @@ from pl_gram.y, line ~2982

 /*PLSQL grammar */

-   /************************************************************************************************************************************************************/ pl_function
+/************************************************************************************************************************************************************/ 
+pl_function
   : comp_options pl_block opt_semi
   ;

@ -4925,7 +4923,6 @@ exit_type
   : EXIT
   | CONTINUE_P
   ;
-   //todo implement RETURN statement according to initial grammar line 1754

 stmt_return
   : RETURN (NEXT sql_expression | QUERY (EXECUTE a_expr opt_for_using_expression | selectstmt) | opt_return_result) SEMI
@ -5324,4 +5321,3 @@ opt_returning_clause_into
   : INTO opt_strict into_target
   |
   ;
-
--- a/src/grammar/plsql/PlSqlLexer.g4
+++ b/src/grammar/plsql/PlSqlLexer.g4
@ -24,10 +24,6 @@ options {
    superClass=PlSqlBaseLexer;
 }

-@lexer::postinclude {
-#include <PlSqlBaseLexer.h>
-}
-
 ABORT:                        'ABORT';
 ABS:                          'ABS';
 ACCESS:                       'ACCESS';
@ -2342,17 +2338,17 @@ INTRODUCER: '_';
 SINGLE_LINE_COMMENT: '--' ~('\r' | '\n')* NEWLINE_EOF                 -> channel(HIDDEN);
 MULTI_LINE_COMMENT:  '/*' .*? '*/'                                    -> channel(HIDDEN);
 // https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve034.htm#SQPUG054
-REMARK_COMMENT:      'REM' {IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN);
+REMARK_COMMENT:      'REM' {this.IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN);

 // https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve032.htm#SQPUG052
-PROMPT_MESSAGE:      'PRO' {IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
+PROMPT_MESSAGE:      'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;

 // TODO: should starts with newline
 START_CMD
    //: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF
    // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
    // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
-    : '@' {IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF
+    : '@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF
    ;

 REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*;
@ -2366,4 +2362,4 @@ fragment QUESTION_MARK  : '?';
 fragment SIMPLE_LETTER  : [A-Z];
 fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+;
 fragment NEWLINE        : '\r'? '\n';
-fragment SPACE          : [ \t];
+fragment SPACE          : [ \t];
--- a/src/grammar/plsql/PlSqlParser.g4
+++ b/src/grammar/plsql/PlSqlParser.g4
@ -25,10 +25,6 @@ options {
    superClass=PlSqlBaseParser;
 }

-@parser::postinclude {
-#include <PlSqlBaseParser.h>
-}
-
 program: sql_script EOF;

 sql_script
@ -2254,7 +2250,7 @@ partial_database_recovery
    ;

 partial_database_recovery_10g
-    : {isVersion10()}? STANDBY
+    : {this.isVersion10()}? STANDBY
      ( TABLESPACE tablespace (',' tablespace)*
      | DATAFILE CHAR_STRING | filenumber (',' CHAR_STRING | filenumber)*
      )
@ -6760,4 +6756,4 @@ numeric_function_name
    | NVL
    | ROUND
    | SUM
-    ;
+    ;
--- a/src/grammar/spark/SparkSql.g4
+++ b/src/grammar/spark/SparkSql.g4
@ -17,57 +17,55 @@
 grammar SparkSql;

@parser::members {
-    /**
-    * When false, INTERSECT is given the greater precedence over the other set
-    * operations (UNION, EXCEPT and MINUS) as per the SQL standard.
-    */
-    //  public boolean legacy_setops_precedence_enbled = false;
-    /**
-    * When false, a literal with an exponent would be converted into
-    * double type rather than decimal type.
-    */
-    //  public boolean legacy_exponent_literal_as_decimal_enabled = false;
-    global.legacy_exponent_literal_as_decimal_enabled = false;
-    /**
-    * When true, the behavior of keywords follows ANSI SQL standard.
-    */
-    //  public boolean SQL_standard_keyword_behavior = false;
-
-    global.legacy_setops_precedence_enbled = false;
-    global.legacy_exponent_literal_as_decimal_enabled = false;
-    global.SQL_standard_keyword_behavior = false;
+/**
+* When false, INTERSECT is given the greater precedence over the other set
+* operations (UNION, EXCEPT and MINUS) as per the SQL standard.
+*/
+public legacy_setops_precedence_enbled = false;
+/**
+* When false, a literal with an exponent would be converted into
+* double type rather than decimal type.
+*/
+public legacy_exponent_literal_as_decimal_enabled = false;
+/**
+* When true, the behavior of keywords follows ANSI SQL standard.
+*/
+public SQL_standard_keyword_behavior = false;
 }

@lexer::members {
-    var ctx = this
-    /**
-    * Verify whether current token is a valid decimal token (which contains dot).
-    * Returns true if the character that follows the token is not a digit or letter or underscore.
-    *
-    * For example:
-    * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
-    * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
-    * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
-    * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
-    * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
-    * which is not a digit or letter or underscore.
-    */
-    global.isValidDecimal = function() {
-        let nextChar = ctx._input.LA(1);
-        return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_')
-    }
+/**
+* Verify whether current token is a valid decimal token (which contains dot).
+* Returns true if the character that follows the token is not a digit or letter or underscore.
+*
+* For example:
+* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+* For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
+* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+* which is not a digit or letter or underscore.
+*/
+isValidDecimal() {
+    let nextChar = this.fromCodePoint(this._input.LA(1));
+    return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_')
+}

-    /**
-    * This method will be called when we see '/*' and try to match it as a bracketed comment.
-    * If the next character is '+', it should be parsed as hint later, and we cannot match
-    * it as a bracketed comment.
-    *
-    * Returns true if the next character is '+'.
-    */
-    global.isHint = function() {
-        let nextChar = ctx._input.LA(1);
-        return nextChar == '+'
-    }
+/**
+* This method will be called when we see '/*' and try to match it as a bracketed comment.
+* If the next character is '+', it should be parsed as hint later, and we cannot match
+* it as a bracketed comment.
+*
+* Returns true if the next character is '+'.
+*/
+isHint() {
+    let nextChar = this.fromCodePoint(this._input.LA(1));
+    return nextChar == '+'
+}
+
+fromCodePoint(codePoint) {
+    return String.fromCodePoint(codePoint);
+}
 }

 program
@ -471,11 +469,11 @@ multiInsertQueryBody

 queryTerm
    : queryPrimary                                                                       #queryTermDefault
-    | left=queryTerm {legacy_setops_precedence_enbled}?
+    | left=queryTerm {this.legacy_setops_precedence_enbled}?
        operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm  #setOperation
-    | left=queryTerm {!legacy_setops_precedence_enbled}?
+    | left=queryTerm {!this.legacy_setops_precedence_enbled}?
        operator=INTERSECT setQuantifier? right=queryTerm                                #setOperation
-    | left=queryTerm {!legacy_setops_precedence_enbled}?
+    | left=queryTerm {!this.legacy_setops_precedence_enbled}?
        operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm              #setOperation
    ;

@ -928,10 +926,10 @@ windowSpec
    ;

 windowFrame
-    : frameType=RANGE start=frameBound
-    | frameType=ROWS start=frameBound
-    | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
-    | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+    : frameType=RANGE frameStart=frameBound
+    | frameType=ROWS frameStart=frameBound
+    | frameType=RANGE BETWEEN frameStart=frameBound AND end=frameBound
+    | frameType=ROWS BETWEEN frameStart=frameBound AND end=frameBound
    ;

 frameBound
@ -970,14 +968,14 @@ errorCapturingIdentifierExtra

 identifier
    : strictIdentifier
-    | {!SQL_standard_keyword_behavior}? strictNonReserved
+    | {!this.SQL_standard_keyword_behavior}? strictNonReserved
    ;

 strictIdentifier
    : IDENTIFIER              #unquotedIdentifier
    | quotedIdentifier        #quotedIdentifierAlternative
-    | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
-    | {!SQL_standard_keyword_behavior}? nonReserved    #unquotedIdentifier
+    | {this.SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
+    | {!this.SQL_standard_keyword_behavior}? nonReserved    #unquotedIdentifier
    ;

 quotedIdentifier
@ -985,9 +983,9 @@ quotedIdentifier
    ;

 number
-    : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
-    | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE  #decimalLiteral
-    | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
+    : {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
+    | {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE  #decimalLiteral
+    | {this.legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
    | MINUS? INTEGER_VALUE            #integerLiteral
    | MINUS? BIGINT_LITERAL           #bigIntLiteral
    | MINUS? SMALLINT_LITERAL         #smallIntLiteral
@ -1004,7 +1002,7 @@ alterColumnAction
    | setOrDrop=(SET | DROP) NOT NULL
    ;

-// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
+// When `this.SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
 //     Keywords that are reserved and can't be used as identifiers for table, view, column,
 //     function, alias, etc.
@ -1770,26 +1768,26 @@ INTEGER_VALUE

 EXPONENT_VALUE
    : DIGIT+ EXPONENT
-    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT {this.isValidDecimal()}?
    ;

 DECIMAL_VALUE
-    : DECIMAL_DIGITS {isValidDecimal()}?
+    : DECIMAL_DIGITS {this.isValidDecimal()}?
    ;

 FLOAT_LITERAL
    : DIGIT+ EXPONENT? 'F'
-    | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT? 'F' {this.isValidDecimal()}?
    ;

 DOUBLE_LITERAL
    : DIGIT+ EXPONENT? 'D'
-    | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT? 'D' {this.isValidDecimal()}?
    ;

 BIGDECIMAL_LITERAL
    : DIGIT+ EXPONENT? 'BD'
-    | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT? 'BD' {this.isValidDecimal()}?
    ;

 IDENTIFIER
@ -1826,7 +1824,7 @@ SIMPLE_COMMENT
    ;

 BRACKETED_COMMENT
-    : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
+    : '/*' {!this.isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
    ;

 WS