feat: migrate to antlr4ng (#267)

* feat: replace antlr4ts with antlr4ng * feat: switch caseInsensitive option on * feat: recompile all g4 file * feat: update parser to fit antlr4ng * test: update test to fit antlr4ng
2024-02-26 20:25:09 +08:00
parent 5ce89cb421
commit 195878da9b
112 changed files with 648433 additions and 659067 deletions
--- a/src/parser/common/basicParser.ts
+++ b/src/parser/common/basicParser.ts
@ -4,10 +4,11 @@ import {
    Token,
    CharStreams,
    CommonTokenStream,
-    CodePointCharStream,
+    CharStream,
    ParserRuleContext,
-} from 'antlr4ts';
-import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
+    ParseTreeWalker,
+    ParseTreeListener,
+} from 'antlr4ng';
 import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
 import { findCaretTokenIndex } from './utils/findCaretTokenIndex';
 import {
@ -38,7 +39,7 @@ export default abstract class BasicParser<
    P extends IParser<PRC> = IParser<PRC>,
 > {
    /** members for cache start */
-    protected _charStreams: CodePointCharStream;
+    protected _charStreams: CharStream;
    protected _lexer: L;
    protected _tokenStream: CommonTokenStream;
    protected _parser: P;
@ -60,7 +61,7 @@ export default abstract class BasicParser<
     * Create a antlr4 Lexer instance.
     * @param input source string
     */
-    protected abstract createLexerFromCharStream(charStreams: CodePointCharStream): L;
+    protected abstract createLexerFromCharStream(charStreams: CharStream): L;

    /**
     * Create Parser by CommonTokenStream
@ -92,7 +93,7 @@ export default abstract class BasicParser<
     * @param input string
     */
    public createLexer(input: string, errorListener?: ErrorListener<any>) {
-        const charStreams = CharStreams.fromString(input.toUpperCase());
+        const charStreams = CharStreams.fromString(input);
        const lexer = this.createLexerFromCharStream(charStreams);
        if (errorListener) {
            lexer.removeErrorListeners();
@ -126,7 +127,7 @@ export default abstract class BasicParser<
     */
    public parse(input: string, errorListener?: ErrorListener<any>) {
        const parser = this.createParser(input, errorListener);
-        parser.buildParseTree = true;
+        parser.buildParseTrees = true;
        parser.errorHandler = new ErrorStrategy();

        return parser.program();
@ -139,7 +140,7 @@ export default abstract class BasicParser<
     */
    private createParserWithCache(input: string): P {
        this._parseTree = null;
-        this._charStreams = CharStreams.fromString(input.toUpperCase());
+        this._charStreams = CharStreams.fromString(input);
        this._lexer = this.createLexerFromCharStream(this._charStreams);

        this._lexer.removeErrorListeners();
@ -154,7 +155,7 @@ export default abstract class BasicParser<
        this._tokenStream.fill();

        this._parser = this.createParserFromTokenStream(this._tokenStream);
-        this._parser.buildParseTree = true;
+        this._parser.buildParseTrees = true;
        this._parser.errorHandler = new ErrorStrategy();

        return this._parser;
@ -239,13 +240,13 @@ export default abstract class BasicParser<
        const res = splitListener.statementsContext.map((context) => {
            const { start, stop } = context;
            return {
-                startIndex: start.startIndex,
-                endIndex: stop.stopIndex,
+                startIndex: start.start,
+                endIndex: stop.stop,
                startLine: start.line,
                endLine: stop.line,
-                startColumn: start.charPositionInLine + 1,
-                endColumn: stop.charPositionInLine + 1 + stop.text.length,
-                text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
+                startColumn: start.column + 1,
+                endColumn: stop.column + 1 + stop.text.length,
+                text: this._parsedInput.slice(start.start, stop.stop + 1),
            };
        });

@ -317,8 +318,8 @@ export default abstract class BasicParser<
            }

            // A boundary consisting of the index of the input.
-            const startIndex = startStatement?.start?.startIndex ?? 0;
-            const stopIndex = stopStatement?.stop?.stopIndex ?? input.length - 1;
+            const startIndex = startStatement?.start?.start ?? 0;
+            const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;

            /**
             * Save offset of the tokenIndex in the range of input
@ -340,7 +341,7 @@ export default abstract class BasicParser<

            const parser = this.createParserFromTokenStream(tokenStream);
            parser.removeErrorListeners();
-            parser.buildParseTree = true;
+            parser.buildParseTrees = true;
            parser.errorHandler = new ErrorStrategy();

            sqlParserIns = parser;
@ -362,12 +363,12 @@ export default abstract class BasicParser<
            (syntaxCtx) => {
                const wordRanges: WordRange[] = syntaxCtx.wordRanges.map((token) => {
                    return {
-                        text: this._parsedInput.slice(token.startIndex, token.stopIndex + 1),
-                        startIndex: token.startIndex,
-                        endIndex: token.stopIndex,
+                        text: this._parsedInput.slice(token.start, token.stop + 1),
+                        startIndex: token.start,
+                        endIndex: token.stop,
                        line: token.line,
-                        startColumn: token.charPositionInLine + 1,
-                        stopColumn: token.charPositionInLine + 1 + token.text.length,
+                        startColumn: token.column + 1,
+                        stopColumn: token.column + 1 + token.text.length,
                    };
                });
                return {
--- a/src/parser/common/errorStrategy.ts
+++ b/src/parser/common/errorStrategy.ts
@ -1,10 +1,12 @@
-import { DefaultErrorStrategy } from 'antlr4ts/DefaultErrorStrategy';
-import { Parser } from 'antlr4ts/Parser';
-import { InputMismatchException } from 'antlr4ts/InputMismatchException';
-import { IntervalSet } from 'antlr4ts/misc/IntervalSet';
-import { ParserRuleContext } from 'antlr4ts/ParserRuleContext';
-import { RecognitionException } from 'antlr4ts/RecognitionException';
-import { Token } from 'antlr4ts/Token';
+import {
+    DefaultErrorStrategy,
+    Parser,
+    InputMismatchException,
+    IntervalSet,
+    ParserRuleContext,
+    RecognitionException,
+    Token,
+} from 'antlr4ng';

 /**
 * Base on DefaultErrorStrategy.
@ -33,7 +35,7 @@ export class ErrorStrategy extends DefaultErrorStrategy {
        if (!this.lastErrorStates) {
            this.lastErrorStates = new IntervalSet();
        }
-        this.lastErrorStates.add(recognizer.state);
+        this.lastErrorStates.addOne(recognizer.state);
        let followSet: IntervalSet = this.getErrorRecoverySet(recognizer);
        this.consumeUntil(recognizer, followSet);
    }
@ -43,11 +45,7 @@ export class ErrorStrategy extends DefaultErrorStrategy {
        if (this.nextTokensContext === undefined) {
            e = new InputMismatchException(recognizer);
        } else {
-            e = new InputMismatchException(
-                recognizer,
-                this.nextTokensState,
-                this.nextTokensContext
-            );
+            e = new InputMismatchException(recognizer);
        }

        // Mark the context as an anomaly
--- a/src/parser/common/parseErrorListener.ts
+++ b/src/parser/common/parseErrorListener.ts
@ -1,5 +1,10 @@
-import { Token, Recognizer, ANTLRErrorListener, RecognitionException } from 'antlr4ts';
-import { ATNSimulator } from 'antlr4ts/atn/ATNSimulator';
+import {
+    Token,
+    Recognizer,
+    ANTLRErrorListener,
+    RecognitionException,
+    ATNSimulator,
+} from 'antlr4ng';

 /**
 * Converted from {@link SyntaxError}.
@ -20,7 +25,7 @@ export interface ParseError {
 * The type of error resulting from lexical parsing and parsing.
 */
 export interface SyntaxError<T> {
-    readonly recognizer: Recognizer<T, ATNSimulator>;
+    readonly recognizer: Recognizer<ATNSimulator>;
    readonly offendingSymbol: Token;
    readonly line: number;
    readonly charPositionInLine: number;
@ -34,15 +39,21 @@ export interface SyntaxError<T> {
 */
 export type ErrorListener<T> = (parseError: ParseError, originalError: SyntaxError<T>) => void;

-export default class ParseErrorListener implements ANTLRErrorListener<Token> {
+export default class ParseErrorListener implements ANTLRErrorListener {
    private _errorListener: ErrorListener<Token>;

    constructor(errorListener: ErrorListener<Token>) {
        this._errorListener = errorListener;
    }

+    reportAmbiguity() {}
+
+    reportAttemptingFullContext() {}
+
+    reportContextSensitivity() {}
+
    syntaxError(
-        recognizer: Recognizer<Token, ATNSimulator>,
+        recognizer: Recognizer<ATNSimulator>,
        offendingSymbol,
        line: number,
        charPositionInLine: number,
--- a/src/parser/common/utils/findCaretTokenIndex.ts
+++ b/src/parser/common/utils/findCaretTokenIndex.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CaretPosition } from '../basic-parser-types';

 /**
@ -15,15 +15,11 @@ export function findCaretTokenIndex(caretPosition: CaretPosition, allTokens: Tok
    while (left <= right) {
        const mid = left + ((right - left) >> 1);
        const token = allTokens[mid];
-        if (
-            token.line > caretLine ||
-            (token.line === caretLine && token.charPositionInLine + 1 >= caretCol)
-        ) {
+        if (token.line > caretLine || (token.line === caretLine && token.column + 1 >= caretCol)) {
            right = mid - 1;
        } else if (
            token.line < caretLine ||
-            (token.line === caretLine &&
-                token.charPositionInLine + token.text.length + 1 < caretCol)
+            (token.line === caretLine && token.column + token.text.length + 1 < caretCol)
        ) {
            left = mid + 1;
        } else {
--- a/src/parser/flinksql.ts
+++ b/src/parser/flinksql.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { FlinkSqlLexer } from '../lib/flinksql/FlinkSqlLexer';
 import {
@ -139,7 +139,10 @@ export class FlinkSqlSplitListener implements FlinkSqlParserListener {
        this._statementsContext.push(ctx);
    };

-    enterSingleStatement = (ctx: SingleStatementContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementsContext;
--- a/src/parser/hive.ts
+++ b/src/parser/hive.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer';
 import { HiveSqlParser, ProgramContext, StatementContext } from '../lib/hive/HiveSqlParser';
@ -130,7 +130,10 @@ export class HiveSqlSplitListener implements HiveSqlParserListener {
        this._statementContext.push(ctx);
    };

-    enterStatement = (ctx: StatementContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementContext;
--- a/src/parser/impala.ts
+++ b/src/parser/impala.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { ImpalaSqlLexer } from '../lib/impala/ImpalaSqlLexer';
 import {
@ -135,7 +135,10 @@ export class ImpalaSqlSplitListener implements ImpalaSqlParserListener {
        this._statementContext.push(ctx);
    };

-    enterSingleStatement = (ctx: SingleStatementContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementContext;
--- a/src/parser/mysql.ts
+++ b/src/parser/mysql.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { MySqlLexer } from '../lib/mysql/MySqlLexer';
 import { MySqlParser, ProgramContext, SingleStatementContext } from '../lib/mysql/MySqlParser';
@ -130,7 +130,10 @@ export class MysqlSplitListener implements MySqlParserListener {
        this._statementsContext.push(ctx);
    };

-    enterSingleStatement = (ctx: SingleStatementContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementsContext;
--- a/src/parser/pgsql.ts
+++ b/src/parser/pgsql.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { PostgreSQLLexer } from '../lib/pgsql/PostgreSQLLexer';
 import { PostgreSQLParser, ProgramContext, SingleStmtContext } from '../lib/pgsql/PostgreSQLParser';
@ -152,7 +152,10 @@ export class PgSqlSplitListener implements PostgreSQLParserListener {
        this._statementsContext.push(ctx);
    };

-    enterSingleStmt = (ctx: SingleStmtContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementsContext;
--- a/src/parser/plsql.ts
+++ b/src/parser/plsql.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { PlSqlLexer } from '../lib/plsql/PlSqlLexer';
 import { PlSqlParser, ProgramContext } from '../lib/plsql/PlSqlParser';
--- a/src/parser/spark.ts
+++ b/src/parser/spark.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { SparkSqlLexer } from '../lib/spark/SparkSqlLexer';
 import {
@ -135,7 +135,10 @@ export class SparkSqlSplitListener implements SparkSqlParserListener {
        this._statementsContext.push(ctx);
    };

-    enterSingleStatement = (ctx: SingleStatementContext) => {};
+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}

    get statementsContext() {
        return this._statementsContext;
--- a/src/parser/trinosql.ts
+++ b/src/parser/trinosql.ts
@ -1,4 +1,4 @@
-import { Token } from 'antlr4ts';
+import { Token } from 'antlr4ng';
 import { CandidatesCollection } from 'antlr4-c3';
 import { TrinoSqlLexer } from '../lib/trinosql/TrinoSqlLexer';
 import {
@ -135,6 +135,11 @@ export class TrinoSqlSplitListener implements TrinoSqlListener {
        this._statementsContext.push(ctx);
    };

+    visitTerminal() {}
+    visitErrorNode() {}
+    enterEveryRule() {}
+    exitEveryRule() {}
+
    get statementsContext() {
        return this._statementsContext;
    }