diff --git a/.gitignore b/.gitignore index cf91e49..1b638ac 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ package-lock.json dist/ src/**/.antlr coverage -.idea \ No newline at end of file +.idea +gen/ \ No newline at end of file diff --git a/src/grammar/pgsql/PostgreSQLLexer.g4 b/src/grammar/pgsql/PostgreSQLLexer.g4 index 6646eda..622ca2b 100644 --- a/src/grammar/pgsql/PostgreSQLLexer.g4 +++ b/src/grammar/pgsql/PostgreSQLLexer.g4 @@ -178,14 +178,14 @@ PARAM Operator : ((OperatorCharacter | ('+' | '-' - {checkLA('-')}?)+ (OperatorCharacter | '/' - {checkLA('*')}?) | '/' - {checkLA('*')}?)+ | // special handling for the single-character operators + and - + {this.checkLA('-')}?)+ (OperatorCharacter | '/' + {this.checkLA('*')}?) | '/' + {this.checkLA('*')}?)+ | // special handling for the single-character operators + and - [+-]) //TODO somehow rewrite this part without using Actions { - HandleLessLessGreaterGreater(); + this.HandleLessLessGreaterGreater(); } ; /* This rule handles operators which end with + or -, and sets the token type to Operator. It is comprised of four @@ -202,9 +202,9 @@ Operator OperatorEndingWithPlusMinus : (OperatorCharacterNotAllowPlusMinusAtEnd | '-' - {checkLA('-')}? | '/' - {checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-' - {checkLA('-')}?)+ -> type (Operator) + {this.checkLA('-')}? | '/' + {this.checkLA('*')}?)* OperatorCharacterAllowPlusMinusAtEnd Operator? ('+' | '-' + {this.checkLA('-')}?)+ -> type (Operator) ; // Each of the following fragment rules omits the +, -, and / characters, which must always be handled in a special way @@ -2200,11 +2200,11 @@ fragment IdentifierStartChar [\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF] | // these are the letters above 0xFF which only need a single UTF-16 code unit [\u0100-\uD7FF\uE000-\uFFFF] - {charIsLetter()}? + {this.charIsLetter()}? | // letters which require multiple UTF-16 code units [\uD800-\uDBFF] [\uDC00-\uDFFF] { - CheckIfUtf32Letter() + this.CheckIfUtf32Letter() }? ; @@ -2315,7 +2315,7 @@ UnterminatedUnicodeEscapeStringConstant BeginDollarStringConstant : '$' Tag? '$' - {pushTag();} -> pushMode (DollarQuotedStringMode) + {this.pushTag();} -> pushMode (DollarQuotedStringMode) ; /* "The tag, if any, of a dollar-quoted string follows the same rules as an * unquoted identifier, except that it cannot contain a dollar sign." @@ -2366,7 +2366,7 @@ Integral NumericFail : Digits '..' - {HandleNumericFail();} + {this.HandleNumericFail();} ; Numeric @@ -2424,7 +2424,7 @@ UnterminatedBlockComment // Optional assertion to make sure this rule is working as intended { - UnterminatedBlockCommentDebugAssert(); + this.UnterminatedBlockCommentDebugAssert(); } ; // @@ -2538,7 +2538,6 @@ DollarText EndDollarStringConstant : ('$' Tag? '$') - {isTag()}? - {popTag();} -> popMode + {this.isTag()}? + {this.popTag();} -> popMode ; - diff --git a/src/grammar/pgsql/PostgreSQLParser.g4 b/src/grammar/pgsql/PostgreSQLParser.g4 index 496d011..f8d1f1c 100644 --- a/src/grammar/pgsql/PostgreSQLParser.g4 +++ b/src/grammar/pgsql/PostgreSQLParser.g4 @@ -1957,10 +1957,8 @@ aggregate_with_argtypes_list createfunc_opt_list : createfunc_opt_item+ { - ParseRoutineBody(_localctx); - } - // | createfunc_opt_list createfunc_opt_item - + this.ParseRoutineBody(localctx); + } ; common_func_opt_item @@ -4562,7 +4560,6 @@ from pl_gram.y, line ~2982 * at least we need not worry about it appearing as an identifier. */ - // | INTO | LATERAL_P | LEADING @@ -4606,7 +4603,8 @@ from pl_gram.y, line ~2982 /*PLSQL grammar */ - /************************************************************************************************************************************************************/ pl_function +/************************************************************************************************************************************************************/ +pl_function : comp_options pl_block opt_semi ; @@ -4925,7 +4923,6 @@ exit_type : EXIT | CONTINUE_P ; - //todo implement RETURN statement according to initial grammar line 1754 stmt_return : RETURN (NEXT sql_expression | QUERY (EXECUTE a_expr opt_for_using_expression | selectstmt) | opt_return_result) SEMI @@ -5324,4 +5321,3 @@ opt_returning_clause_into : INTO opt_strict into_target | ; - diff --git a/src/grammar/plsql/PlSqlLexer.g4 b/src/grammar/plsql/PlSqlLexer.g4 index 4d868ce..4e176b0 100644 --- a/src/grammar/plsql/PlSqlLexer.g4 +++ b/src/grammar/plsql/PlSqlLexer.g4 @@ -24,10 +24,6 @@ options { superClass=PlSqlBaseLexer; } -@lexer::postinclude { -#include -} - ABORT: 'ABORT'; ABS: 'ABS'; ACCESS: 'ACCESS'; @@ -2342,17 +2338,17 @@ INTRODUCER: '_'; SINGLE_LINE_COMMENT: '--' ~('\r' | '\n')* NEWLINE_EOF -> channel(HIDDEN); MULTI_LINE_COMMENT: '/*' .*? '*/' -> channel(HIDDEN); // https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve034.htm#SQPUG054 -REMARK_COMMENT: 'REM' {IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN); +REMARK_COMMENT: 'REM' {this.IsNewlineAtPos(-4)}? 'ARK'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF -> channel(HIDDEN); // https://docs.oracle.com/cd/E11882_01/server.112/e16604/ch_twelve032.htm#SQPUG052 -PROMPT_MESSAGE: 'PRO' {IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF; +PROMPT_MESSAGE: 'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF; // TODO: should starts with newline START_CMD //: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm - : '@' {IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF + : '@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF ; REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*; @@ -2366,4 +2362,4 @@ fragment QUESTION_MARK : '?'; fragment SIMPLE_LETTER : [A-Z]; fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+; fragment NEWLINE : '\r'? '\n'; -fragment SPACE : [ \t]; +fragment SPACE : [ \t]; \ No newline at end of file diff --git a/src/grammar/plsql/PlSqlParser.g4 b/src/grammar/plsql/PlSqlParser.g4 index d181fd5..15e3594 100644 --- a/src/grammar/plsql/PlSqlParser.g4 +++ b/src/grammar/plsql/PlSqlParser.g4 @@ -25,10 +25,6 @@ options { superClass=PlSqlBaseParser; } -@parser::postinclude { -#include -} - program: sql_script EOF; sql_script @@ -2254,7 +2250,7 @@ partial_database_recovery ; partial_database_recovery_10g - : {isVersion10()}? STANDBY + : {this.isVersion10()}? STANDBY ( TABLESPACE tablespace (',' tablespace)* | DATAFILE CHAR_STRING | filenumber (',' CHAR_STRING | filenumber)* ) @@ -6760,4 +6756,4 @@ numeric_function_name | NVL | ROUND | SUM - ; + ; \ No newline at end of file diff --git a/src/grammar/spark/SparkSql.g4 b/src/grammar/spark/SparkSql.g4 index ff9e0ba..9397bdb 100644 --- a/src/grammar/spark/SparkSql.g4 +++ b/src/grammar/spark/SparkSql.g4 @@ -17,57 +17,55 @@ grammar SparkSql; @parser::members { - /** - * When false, INTERSECT is given the greater precedence over the other set - * operations (UNION, EXCEPT and MINUS) as per the SQL standard. - */ - // public boolean legacy_setops_precedence_enbled = false; - /** - * When false, a literal with an exponent would be converted into - * double type rather than decimal type. - */ - // public boolean legacy_exponent_literal_as_decimal_enabled = false; - global.legacy_exponent_literal_as_decimal_enabled = false; - /** - * When true, the behavior of keywords follows ANSI SQL standard. - */ - // public boolean SQL_standard_keyword_behavior = false; - - global.legacy_setops_precedence_enbled = false; - global.legacy_exponent_literal_as_decimal_enabled = false; - global.SQL_standard_keyword_behavior = false; +/** +* When false, INTERSECT is given the greater precedence over the other set +* operations (UNION, EXCEPT and MINUS) as per the SQL standard. +*/ +public legacy_setops_precedence_enbled = false; +/** +* When false, a literal with an exponent would be converted into +* double type rather than decimal type. +*/ +public legacy_exponent_literal_as_decimal_enabled = false; +/** +* When true, the behavior of keywords follows ANSI SQL standard. +*/ +public SQL_standard_keyword_behavior = false; } @lexer::members { - var ctx = this - /** - * Verify whether current token is a valid decimal token (which contains dot). - * Returns true if the character that follows the token is not a digit or letter or underscore. - * - * For example: - * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. - * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. - * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. - * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed - * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' - * which is not a digit or letter or underscore. - */ - global.isValidDecimal = function() { - let nextChar = ctx._input.LA(1); - return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_') - } +/** +* Verify whether current token is a valid decimal token (which contains dot). +* Returns true if the character that follows the token is not a digit or letter or underscore. +* +* For example: +* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. +* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. +* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. +* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed +* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' +* which is not a digit or letter or underscore. +*/ +isValidDecimal() { + let nextChar = this.fromCodePoint(this._input.LA(1)); + return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_') +} - /** - * This method will be called when we see '/*' and try to match it as a bracketed comment. - * If the next character is '+', it should be parsed as hint later, and we cannot match - * it as a bracketed comment. - * - * Returns true if the next character is '+'. - */ - global.isHint = function() { - let nextChar = ctx._input.LA(1); - return nextChar == '+' - } +/** +* This method will be called when we see '/*' and try to match it as a bracketed comment. +* If the next character is '+', it should be parsed as hint later, and we cannot match +* it as a bracketed comment. +* +* Returns true if the next character is '+'. +*/ +isHint() { + let nextChar = this.fromCodePoint(this._input.LA(1)); + return nextChar == '+' +} + +fromCodePoint(codePoint) { + return String.fromCodePoint(codePoint); +} } program @@ -471,11 +469,11 @@ multiInsertQueryBody queryTerm : queryPrimary #queryTermDefault - | left=queryTerm {legacy_setops_precedence_enbled}? + | left=queryTerm {this.legacy_setops_precedence_enbled}? operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? + | left=queryTerm {!this.legacy_setops_precedence_enbled}? operator=INTERSECT setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? + | left=queryTerm {!this.legacy_setops_precedence_enbled}? operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation ; @@ -928,10 +926,10 @@ windowSpec ; windowFrame - : frameType=RANGE start=frameBound - | frameType=ROWS start=frameBound - | frameType=RANGE BETWEEN start=frameBound AND end=frameBound - | frameType=ROWS BETWEEN start=frameBound AND end=frameBound + : frameType=RANGE frameStart=frameBound + | frameType=ROWS frameStart=frameBound + | frameType=RANGE BETWEEN frameStart=frameBound AND end=frameBound + | frameType=ROWS BETWEEN frameStart=frameBound AND end=frameBound ; frameBound @@ -970,14 +968,14 @@ errorCapturingIdentifierExtra identifier : strictIdentifier - | {!SQL_standard_keyword_behavior}? strictNonReserved + | {!this.SQL_standard_keyword_behavior}? strictNonReserved ; strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative - | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier - | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier + | {this.SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier + | {!this.SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier ; quotedIdentifier @@ -985,9 +983,9 @@ quotedIdentifier ; number - : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral - | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral - | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral + : {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral + | {!this.legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral + | {this.legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral | MINUS? INTEGER_VALUE #integerLiteral | MINUS? BIGINT_LITERAL #bigIntLiteral | MINUS? SMALLINT_LITERAL #smallIntLiteral @@ -1004,7 +1002,7 @@ alterColumnAction | setOrDrop=(SET | DROP) NOT NULL ; -// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. +// When `this.SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. // - Reserved keywords: // Keywords that are reserved and can't be used as identifiers for table, view, column, // function, alias, etc. @@ -1770,26 +1768,26 @@ INTEGER_VALUE EXPONENT_VALUE : DIGIT+ EXPONENT - | DECIMAL_DIGITS EXPONENT {isValidDecimal()}? + | DECIMAL_DIGITS EXPONENT {this.isValidDecimal()}? ; DECIMAL_VALUE - : DECIMAL_DIGITS {isValidDecimal()}? + : DECIMAL_DIGITS {this.isValidDecimal()}? ; FLOAT_LITERAL : DIGIT+ EXPONENT? 'F' - | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}? + | DECIMAL_DIGITS EXPONENT? 'F' {this.isValidDecimal()}? ; DOUBLE_LITERAL : DIGIT+ EXPONENT? 'D' - | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? + | DECIMAL_DIGITS EXPONENT? 'D' {this.isValidDecimal()}? ; BIGDECIMAL_LITERAL : DIGIT+ EXPONENT? 'BD' - | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? + | DECIMAL_DIGITS EXPONENT? 'BD' {this.isValidDecimal()}? ; IDENTIFIER @@ -1826,7 +1824,7 @@ SIMPLE_COMMENT ; BRACKETED_COMMENT - : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + : '/*' {!this.isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) ; WS diff --git a/src/lib/pgsql/PostgreSQLLexer.ts b/src/lib/pgsql/PostgreSQLLexer.ts index 03588a0..33f91ba 100644 --- a/src/lib/pgsql/PostgreSQLLexer.ts +++ b/src/lib/pgsql/PostgreSQLLexer.ts @@ -1,10 +1,11 @@ -// dt-sql-parser/src/grammar/pgsql/PostgreSQLLexer.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/pgsql/PostgreSQLLexer.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, CharStream, DecisionState, DFA, + Lexer, LexerATNSimulator, RuleContext, PredictionContextCache, @@ -12,7 +13,7 @@ import { } from "antlr4"; -import PostgreSQLLexerBase from './base/PostgreSQLLexerBase'; +import PostgreSQLLexerBase from './PostgreSQLLexerBase'; export default class PostgreSQLLexer extends PostgreSQLLexerBase { public static readonly Dollar = 1; @@ -1195,9 +1196,11 @@ export default class PostgreSQLLexer extends PostgreSQLLexerBase { "EndDollarStringConstant", ]; + /* This field stores the tags which are used to detect the end of a dollar-quoted string literal. */ + constructor(input: CharStream) { super(input); this._interp = new LexerATNSimulator(this, PostgreSQLLexer._ATN, PostgreSQLLexer.DecisionsToDFA, new PredictionContextCache()); @@ -1244,7 +1247,9 @@ export default class PostgreSQLLexer extends PostgreSQLLexerBase { private Operator_action(localctx: RuleContext, actionIndex: number): void { switch (actionIndex) { case 0: + this.HandleLessLessGreaterGreater(); + break; } } @@ -1266,7 +1271,7 @@ export default class PostgreSQLLexer extends PostgreSQLLexerBase { switch (actionIndex) { case 3: - this.UnterminatedBlockCommentDebugAssert(); + this.UnterminatedBlockCommentDebugAssert(); break; } diff --git a/src/lib/pgsql/base/PostgreSQLLexerBase.ts b/src/lib/pgsql/PostgreSQLLexerBase.ts similarity index 100% rename from src/lib/pgsql/base/PostgreSQLLexerBase.ts rename to src/lib/pgsql/PostgreSQLLexerBase.ts diff --git a/src/lib/pgsql/PostgreSQLParser.ts b/src/lib/pgsql/PostgreSQLParser.ts index fcdfd65..e3c07ce 100644 --- a/src/lib/pgsql/PostgreSQLParser.ts +++ b/src/lib/pgsql/PostgreSQLParser.ts @@ -1,19 +1,26 @@ -// dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, DecisionState, DFA, FailedPredicateException, - RecognitionException, NoViableAltException, - ParserATNSimulator, - RuleContext, ParserRuleContext, PredictionContextCache, - TerminalNode, + RecognitionException, NoViableAltException, BailErrorStrategy, + Parser, ParserATNSimulator, + RuleContext, ParserRuleContext, PredictionMode, PredictionContextCache, + TerminalNode, RuleNode, Token, TokenStream, + Interval, IntervalSet } from 'antlr4'; import PostgreSQLParserListener from "./PostgreSQLParserListener.js"; import PostgreSQLParserVisitor from "./PostgreSQLParserVisitor.js"; -import PostgreSQLParserBase from './base/PostgreSQLParserBase'; +// for running tests with parameters, TODO: discuss strategy for typed parameters in CI +// eslint-disable-next-line no-unused-vars +type int = number; + + + +import PostgreSQLParserBase from './PostgreSQLParserBase'; export default class PostgreSQLParser extends PostgreSQLParserBase { public static readonly Dollar = 1; @@ -30836,7 +30843,9 @@ export default class PostgreSQLParser extends PostgreSQLParserBase { this._errHandler.sync(this); _alt = this._interp.adaptivePredict(this._input, 283, this._ctx); } while (_alt !== 2 && _alt !== ATN.INVALID_ALT_NUMBER); - this.ParseRoutineBody(localctx); + + this.ParseRoutineBody(localctx); + } } catch (re) { @@ -55874,7 +55883,6 @@ export default class PostgreSQLParser extends PostgreSQLParserBase { let _parentctx: ParserRuleContext = this._ctx; let _parentState: number = this.state; let localctx: B_exprContext = new B_exprContext(this, this._ctx, _parentState); - // @ts-ignore let _prevctx: B_exprContext = localctx; let _startState: number = 1192; this.enterRecursionRule(localctx, 1192, PostgreSQLParser.RULE_b_expr, _p); @@ -97208,10 +97216,6 @@ export class AltertsconfigurationstmtContext extends ParserRuleContext { public CONFIGURATION(): TerminalNode { return this.getToken(PostgreSQLParser.CONFIGURATION, 0); } - // @ts-ignore - public any_name_list(): Any_nameContext[] { - return this.getTypedRuleContexts(Any_nameContext) as Any_nameContext[]; - } public any_name(i: number): Any_nameContext { return this.getTypedRuleContext(Any_nameContext, i) as Any_nameContext; } @@ -97230,7 +97234,6 @@ export class AltertsconfigurationstmtContext extends ParserRuleContext { public any_with(): Any_withContext { return this.getTypedRuleContext(Any_withContext, 0) as Any_withContext; } - // @ts-ignore public any_name_list(): Any_name_listContext { return this.getTypedRuleContext(Any_name_listContext, 0) as Any_name_listContext; } diff --git a/src/lib/pgsql/base/PostgreSQLParserBase.ts b/src/lib/pgsql/PostgreSQLParserBase.ts similarity index 97% rename from src/lib/pgsql/base/PostgreSQLParserBase.ts rename to src/lib/pgsql/PostgreSQLParserBase.ts index 1b91956..f0609c1 100644 --- a/src/lib/pgsql/base/PostgreSQLParserBase.ts +++ b/src/lib/pgsql/PostgreSQLParserBase.ts @@ -1,7 +1,7 @@ /* eslint-disable new-cap,camelcase */ import { Parser, CharStreams, CommonTokenStream } from 'antlr4'; -import PostgreSQLLexer from '../PostgreSQLLexer'; -import PostgreSQLParser from '../PostgreSQLParser'; +import PostgreSQLLexer from './PostgreSQLLexer'; +import PostgreSQLParser from './PostgreSQLParser'; export default class PostgreSQLParserBase extends Parser { diff --git a/src/lib/pgsql/PostgreSQLParserListener.ts b/src/lib/pgsql/PostgreSQLParserListener.ts index ec46556..d3ad29a 100644 --- a/src/lib/pgsql/PostgreSQLParserListener.ts +++ b/src/lib/pgsql/PostgreSQLParserListener.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 import {ParseTreeListener} from "antlr4"; diff --git a/src/lib/pgsql/PostgreSQLParserVisitor.ts b/src/lib/pgsql/PostgreSQLParserVisitor.ts index 414bf42..6a9927e 100644 --- a/src/lib/pgsql/PostgreSQLParserVisitor.ts +++ b/src/lib/pgsql/PostgreSQLParserVisitor.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/pgsql/PostgreSQLParser.g4 by ANTLR 4.12.0 import {ParseTreeVisitor} from 'antlr4'; diff --git a/src/lib/plsql/base/PlSqlBaseLexer.ts b/src/lib/plsql/PlSqlBaseLexer.ts similarity index 100% rename from src/lib/plsql/base/PlSqlBaseLexer.ts rename to src/lib/plsql/PlSqlBaseLexer.ts diff --git a/src/lib/plsql/base/PlSqlBaseParser.ts b/src/lib/plsql/PlSqlBaseParser.ts similarity index 100% rename from src/lib/plsql/base/PlSqlBaseParser.ts rename to src/lib/plsql/PlSqlBaseParser.ts diff --git a/src/lib/plsql/PlSqlLexer.ts b/src/lib/plsql/PlSqlLexer.ts index 4f5c73d..4fab841 100644 --- a/src/lib/plsql/PlSqlLexer.ts +++ b/src/lib/plsql/PlSqlLexer.ts @@ -1,18 +1,17 @@ -// dt-sql-parser/src/grammar/plsql/PlSqlLexer.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/plsql/PlSqlLexer.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, CharStream, DecisionState, DFA, + Lexer, LexerATNSimulator, RuleContext, PredictionContextCache, Token } from "antlr4"; - - -import PlSqlBaseLexer from './base/PlSqlBaseLexer'; +import PlSqlBaseLexer from './PlSqlBaseLexer'; export default class PlSqlLexer extends PlSqlBaseLexer { public static readonly ABORT = 1; diff --git a/src/lib/plsql/PlSqlParser.ts b/src/lib/plsql/PlSqlParser.ts index ad63c39..7b20c81 100644 --- a/src/lib/plsql/PlSqlParser.ts +++ b/src/lib/plsql/PlSqlParser.ts @@ -1,20 +1,24 @@ -// dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, DecisionState, DFA, FailedPredicateException, - RecognitionException, NoViableAltException, ParserATNSimulator, - RuleContext, ParserRuleContext, PredictionContextCache, - TerminalNode, - Token, CommonTokenStream, + RecognitionException, NoViableAltException, BailErrorStrategy, + Parser, ParserATNSimulator, + RuleContext, ParserRuleContext, PredictionMode, PredictionContextCache, + TerminalNode, RuleNode, + Token, TokenStream, + Interval, IntervalSet } from 'antlr4'; - import PlSqlParserListener from "./PlSqlParserListener.js"; import PlSqlParserVisitor from "./PlSqlParserVisitor.js"; -import PlSqlBaseParser from './base/PlSqlBaseParser'; +// for running tests with parameters, TODO: discuss strategy for typed parameters in CI +// eslint-disable-next-line no-unused-vars +type int = number; +import PlSqlBaseParser from './PlSqlBaseParser'; export default class PlSqlParser extends PlSqlBaseParser { public static readonly ABORT = 1; @@ -6984,7 +6988,7 @@ export default class PlSqlParser extends PlSqlBaseParser { return new FailedPredicateException(this, predicate, message); } - constructor(input: CommonTokenStream) { + constructor(input: TokenStream) { super(input); this._interp = new ParserATNSimulator(this, PlSqlParser._ATN, PlSqlParser.DecisionsToDFA, new PredictionContextCache()); } @@ -55509,7 +55513,7 @@ export default class PlSqlParser extends PlSqlBaseParser { { this.state = 6733; if (!(this.isVersion10())) { - throw this.createFailedPredicateException("isVersion10()"); + throw this.createFailedPredicateException("this.isVersion10()"); } this.state = 6734; this.match(PlSqlParser.STANDBY); @@ -100098,7 +100102,6 @@ export default class PlSqlParser extends PlSqlBaseParser { let _parentctx: ParserRuleContext = this._ctx; let _parentState: number = this.state; let localctx: Logical_expressionContext = new Logical_expressionContext(this, this._ctx, _parentState); - // @ts-ignore let _prevctx: Logical_expressionContext = localctx; let _startState: number = 1238; this.enterRecursionRule(localctx, 1238, PlSqlParser.RULE_logical_expression, _p); @@ -100424,7 +100427,6 @@ export default class PlSqlParser extends PlSqlBaseParser { let _parentctx: ParserRuleContext = this._ctx; let _parentState: number = this.state; let localctx: Relational_expressionContext = new Relational_expressionContext(this, this._ctx, _parentState); - // @ts-ignore let _prevctx: Relational_expressionContext = localctx; let _startState: number = 1246; this.enterRecursionRule(localctx, 1246, PlSqlParser.RULE_relational_expression, _p); @@ -100800,7 +100802,6 @@ export default class PlSqlParser extends PlSqlBaseParser { let _parentctx: ParserRuleContext = this._ctx; let _parentState: number = this.state; let localctx: ConcatenationContext = new ConcatenationContext(this, this._ctx, _parentState); - // @ts-ignore let _prevctx: ConcatenationContext = localctx; let _startState: number = 1256; this.enterRecursionRule(localctx, 1256, PlSqlParser.RULE_concatenation, _p); diff --git a/src/lib/plsql/PlSqlParserListener.ts b/src/lib/plsql/PlSqlParserListener.ts index bcc9d96..ea45583 100644 --- a/src/lib/plsql/PlSqlParserListener.ts +++ b/src/lib/plsql/PlSqlParserListener.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 import {ParseTreeListener} from "antlr4"; diff --git a/src/lib/plsql/PlSqlParserVisitor.ts b/src/lib/plsql/PlSqlParserVisitor.ts index 4867108..0db5002 100644 --- a/src/lib/plsql/PlSqlParserVisitor.ts +++ b/src/lib/plsql/PlSqlParserVisitor.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/plsql/PlSqlParser.g4 by ANTLR 4.12.0 import {ParseTreeVisitor} from 'antlr4'; diff --git a/src/lib/spark/SparkSqlLexer.ts b/src/lib/spark/SparkSqlLexer.ts index c1bb033..dd03541 100644 --- a/src/lib/spark/SparkSqlLexer.ts +++ b/src/lib/spark/SparkSqlLexer.ts @@ -1,19 +1,17 @@ -// dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, CharStream, DecisionState, DFA, + Lexer, LexerATNSimulator, RuleContext, PredictionContextCache, Token } from "antlr4"; - -import SparkSqlBaseLexer from "./base/SparkSqlBaseLexer"; - -export default class SparkSqlLexer extends SparkSqlBaseLexer { +export default class SparkSqlLexer extends Lexer { public static readonly T__0 = 1; public static readonly T__1 = 2; public static readonly T__2 = 3; @@ -607,6 +605,41 @@ export default class SparkSqlLexer extends SparkSqlBaseLexer { "WS", "UNRECOGNIZED", ]; + + /** + * Verify whether current token is a valid decimal token (which contains dot). + * Returns true if the character that follows the token is not a digit or letter or underscore. + * + * For example: + * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. + * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. + * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. + * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed + * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' + * which is not a digit or letter or underscore. + */ + isValidDecimal() { + let nextChar = this.fromCodePoint(this._input.LA(1)); + return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_') + } + + /** + * This method will be called when we see '/*' and try to match it as a bracketed comment. + * If the next character is '+', it should be parsed as hint later, and we cannot match + * it as a bracketed comment. + * + * Returns true if the next character is '+'. + */ + isHint() { + let nextChar = this.fromCodePoint(this._input.LA(1)); + return nextChar == '+' + } + + fromCodePoint(codePoint) { + return String.fromCodePoint(codePoint); + } + + constructor(input: CharStream) { super(input); this._interp = new LexerATNSimulator(this, SparkSqlLexer._ATN, SparkSqlLexer.DecisionsToDFA, new PredictionContextCache()); diff --git a/src/lib/spark/SparkSqlListener.ts b/src/lib/spark/SparkSqlListener.ts index ae89078..9996267 100644 --- a/src/lib/spark/SparkSqlListener.ts +++ b/src/lib/spark/SparkSqlListener.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 import {ParseTreeListener} from "antlr4"; diff --git a/src/lib/spark/SparkSqlParser.ts b/src/lib/spark/SparkSqlParser.ts index 16dddb7..3806a82 100644 --- a/src/lib/spark/SparkSqlParser.ts +++ b/src/lib/spark/SparkSqlParser.ts @@ -1,24 +1,22 @@ -// dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 // noinspection ES6UnusedImports,JSUnusedGlobalSymbols,JSUnusedLocalSymbols import { ATN, ATNDeserializer, DecisionState, DFA, FailedPredicateException, - RecognitionException, NoViableAltException, + RecognitionException, NoViableAltException, BailErrorStrategy, Parser, ParserATNSimulator, - RuleContext, ParserRuleContext, PredictionContextCache, - TerminalNode, + RuleContext, ParserRuleContext, PredictionMode, PredictionContextCache, + TerminalNode, RuleNode, Token, TokenStream, + Interval, IntervalSet } from 'antlr4'; import SparkSqlListener from "./SparkSqlListener.js"; import SparkSqlVisitor from "./SparkSqlVisitor.js"; // for running tests with parameters, TODO: discuss strategy for typed parameters in CI // eslint-disable-next-line no-unused-vars - -const legacy_setops_precedence_enbled = false; -const legacy_exponent_literal_as_decimal_enabled = false; -const SQL_standard_keyword_behavior = false; +type int = number; export default class SparkSqlParser extends Parser { public static readonly T__0 = 1; @@ -882,6 +880,22 @@ export default class SparkSqlParser extends Parser { return new FailedPredicateException(this, predicate, message); } + + /** + * When false, INTERSECT is given the greater precedence over the other set + * operations (UNION, EXCEPT and MINUS) as per the SQL standard. + */ + public legacy_setops_precedence_enbled = false; + /** + * When false, a literal with an exponent would be converted into + * double type rather than decimal type. + */ + public legacy_exponent_literal_as_decimal_enabled = false; + /** + * When true, the behavior of keywords follows ANSI SQL standard. + */ + public SQL_standard_keyword_behavior = false; + constructor(input: TokenStream) { super(input); this._interp = new ParserATNSimulator(this, SparkSqlParser._ATN, SparkSqlParser.DecisionsToDFA, new PredictionContextCache()); @@ -6339,8 +6353,8 @@ export default class SparkSqlParser extends Parser { throw this.createFailedPredicateException("this.precpred(this._ctx, 3)"); } this.state = 1614; - if (!(legacy_setops_precedence_enbled)) { - throw this.createFailedPredicateException("legacy_setops_precedence_enbled"); + if (!(this.legacy_setops_precedence_enbled)) { + throw this.createFailedPredicateException("this.legacy_setops_precedence_enbled"); } this.state = 1615; (localctx as SetOperationContext)._operator = this._input.LT(1); @@ -6376,8 +6390,8 @@ export default class SparkSqlParser extends Parser { throw this.createFailedPredicateException("this.precpred(this._ctx, 2)"); } this.state = 1621; - if (!(!legacy_setops_precedence_enbled)) { - throw this.createFailedPredicateException("!legacy_setops_precedence_enbled"); + if (!(!this.legacy_setops_precedence_enbled)) { + throw this.createFailedPredicateException("!this.legacy_setops_precedence_enbled"); } this.state = 1622; (localctx as SetOperationContext)._operator = this.match(SparkSqlParser.INTERSECT); @@ -6405,8 +6419,8 @@ export default class SparkSqlParser extends Parser { throw this.createFailedPredicateException("this.precpred(this._ctx, 1)"); } this.state = 1628; - if (!(!legacy_setops_precedence_enbled)) { - throw this.createFailedPredicateException("!legacy_setops_precedence_enbled"); + if (!(!this.legacy_setops_precedence_enbled)) { + throw this.createFailedPredicateException("!this.legacy_setops_precedence_enbled"); } this.state = 1629; (localctx as SetOperationContext)._operator = this._input.LT(1); @@ -12272,7 +12286,7 @@ export default class SparkSqlParser extends Parser { this.state = 2894; localctx._frameType = this.match(SparkSqlParser.RANGE); this.state = 2895; - localctx._start = this.frameBound(); + localctx._frameStart = this.frameBound(); } break; case 2: @@ -12281,7 +12295,7 @@ export default class SparkSqlParser extends Parser { this.state = 2896; localctx._frameType = this.match(SparkSqlParser.ROWS); this.state = 2897; - localctx._start = this.frameBound(); + localctx._frameStart = this.frameBound(); } break; case 3: @@ -12292,7 +12306,7 @@ export default class SparkSqlParser extends Parser { this.state = 2899; this.match(SparkSqlParser.BETWEEN); this.state = 2900; - localctx._start = this.frameBound(); + localctx._frameStart = this.frameBound(); this.state = 2901; this.match(SparkSqlParser.AND); this.state = 2902; @@ -12307,7 +12321,7 @@ export default class SparkSqlParser extends Parser { this.state = 2905; this.match(SparkSqlParser.BETWEEN); this.state = 2906; - localctx._start = this.frameBound(); + localctx._frameStart = this.frameBound(); this.state = 2907; this.match(SparkSqlParser.AND); this.state = 2908; @@ -12642,8 +12656,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 2); { this.state = 2956; - if (!(!SQL_standard_keyword_behavior)) { - throw this.createFailedPredicateException("!SQL_standard_keyword_behavior"); + if (!(!this.SQL_standard_keyword_behavior)) { + throw this.createFailedPredicateException("!this.SQL_standard_keyword_behavior"); } this.state = 2957; this.strictNonReserved(); @@ -12694,8 +12708,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 3); { this.state = 2962; - if (!(SQL_standard_keyword_behavior)) { - throw this.createFailedPredicateException("SQL_standard_keyword_behavior"); + if (!(this.SQL_standard_keyword_behavior)) { + throw this.createFailedPredicateException("this.SQL_standard_keyword_behavior"); } this.state = 2963; this.ansiNonReserved(); @@ -12706,8 +12720,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 4); { this.state = 2964; - if (!(!SQL_standard_keyword_behavior)) { - throw this.createFailedPredicateException("!SQL_standard_keyword_behavior"); + if (!(!this.SQL_standard_keyword_behavior)) { + throw this.createFailedPredicateException("!this.SQL_standard_keyword_behavior"); } this.state = 2965; this.nonReserved(); @@ -12768,8 +12782,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 1); { this.state = 2970; - if (!(!legacy_exponent_literal_as_decimal_enabled)) { - throw this.createFailedPredicateException("!legacy_exponent_literal_as_decimal_enabled"); + if (!(!this.legacy_exponent_literal_as_decimal_enabled)) { + throw this.createFailedPredicateException("!this.legacy_exponent_literal_as_decimal_enabled"); } this.state = 2972; this._errHandler.sync(this); @@ -12790,8 +12804,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 2); { this.state = 2975; - if (!(!legacy_exponent_literal_as_decimal_enabled)) { - throw this.createFailedPredicateException("!legacy_exponent_literal_as_decimal_enabled"); + if (!(!this.legacy_exponent_literal_as_decimal_enabled)) { + throw this.createFailedPredicateException("!this.legacy_exponent_literal_as_decimal_enabled"); } this.state = 2977; this._errHandler.sync(this); @@ -12812,8 +12826,8 @@ export default class SparkSqlParser extends Parser { this.enterOuterAlt(localctx, 3); { this.state = 2980; - if (!(legacy_exponent_literal_as_decimal_enabled)) { - throw this.createFailedPredicateException("legacy_exponent_literal_as_decimal_enabled"); + if (!(this.legacy_exponent_literal_as_decimal_enabled)) { + throw this.createFailedPredicateException("this.legacy_exponent_literal_as_decimal_enabled"); } this.state = 2982; this._errHandler.sync(this); @@ -13173,15 +13187,15 @@ export default class SparkSqlParser extends Parser { case 0: return this.precpred(this._ctx, 3); case 1: - return legacy_setops_precedence_enbled; + return this.legacy_setops_precedence_enbled; case 2: return this.precpred(this._ctx, 2); case 3: - return !legacy_setops_precedence_enbled; + return !this.legacy_setops_precedence_enbled; case 4: return this.precpred(this._ctx, 1); case 5: - return !legacy_setops_precedence_enbled; + return !this.legacy_setops_precedence_enbled; } return true; } @@ -13223,27 +13237,27 @@ export default class SparkSqlParser extends Parser { private identifier_sempred(localctx: IdentifierContext, predIndex: number): boolean { switch (predIndex) { case 16: - return !SQL_standard_keyword_behavior; + return !this.SQL_standard_keyword_behavior; } return true; } private strictIdentifier_sempred(localctx: StrictIdentifierContext, predIndex: number): boolean { switch (predIndex) { case 17: - return SQL_standard_keyword_behavior; + return this.SQL_standard_keyword_behavior; case 18: - return !SQL_standard_keyword_behavior; + return !this.SQL_standard_keyword_behavior; } return true; } private number_sempred(localctx: NumberContext, predIndex: number): boolean { switch (predIndex) { case 19: - return !legacy_exponent_literal_as_decimal_enabled; + return !this.legacy_exponent_literal_as_decimal_enabled; case 20: - return !legacy_exponent_literal_as_decimal_enabled; + return !this.legacy_exponent_literal_as_decimal_enabled; case 21: - return legacy_exponent_literal_as_decimal_enabled; + return this.legacy_exponent_literal_as_decimal_enabled; } return true; } @@ -24999,7 +25013,7 @@ export class WindowDefContext extends WindowSpecContext { export class WindowFrameContext extends ParserRuleContext { public _frameType!: Token; - public _start!: FrameBoundContext; + public _frameStart!: FrameBoundContext; public _end!: FrameBoundContext; constructor(parser?: SparkSqlParser, parent?: ParserRuleContext, invokingState?: number) { super(parent, invokingState); diff --git a/src/lib/spark/SparkSqlVisitor.ts b/src/lib/spark/SparkSqlVisitor.ts index d670c5a..66833fe 100644 --- a/src/lib/spark/SparkSqlVisitor.ts +++ b/src/lib/spark/SparkSqlVisitor.ts @@ -1,4 +1,4 @@ -// dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 +// Generated from /Users/ziv/github.com/dt-sql-parser/src/grammar/spark/SparkSql.g4 by ANTLR 4.12.0 import {ParseTreeVisitor} from 'antlr4'; diff --git a/src/lib/spark/base/SparkSqlBaseLexer.ts b/src/lib/spark/base/SparkSqlBaseLexer.ts deleted file mode 100644 index d56179c..0000000 --- a/src/lib/spark/base/SparkSqlBaseLexer.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { Lexer } from "antlr4"; - -export default class SparkSqlBaseLexer extends Lexer { - - isValidDecimal() { - let nextChar = this.fromCodePoint(this._input.LA(1)); - return !(nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || nextChar == '_') - } - - /** - * This method will be called when we see '/*' and try to match it as a bracketed comment. - * If the next character is '+', it should be parsed as hint later, and we cannot match - * it as a bracketed comment. - * - * Returns true if the next character is '+'. - */ - isHint() { - let nextChar = this.fromCodePoint(this._input.LA(1)); - return nextChar == '+' - } - - fromCodePoint(codePoint) { - return String.fromCodePoint(codePoint); - } -}