feat: migrate to antlr4ng (#267)

* feat: replace antlr4ts with antlr4ng

* feat: switch caseInsensitive option on

* feat: recompile all g4 file

* feat:  update parser to fit antlr4ng

* test: update test to fit antlr4ng
This commit is contained in:
Hayden
2024-02-26 20:25:09 +08:00
committed by GitHub
parent 5ce89cb421
commit 195878da9b
112 changed files with 648433 additions and 659067 deletions

View File

@ -4,10 +4,11 @@ import {
Token,
CharStreams,
CommonTokenStream,
CodePointCharStream,
CharStream,
ParserRuleContext,
} from 'antlr4ts';
import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
ParseTreeWalker,
ParseTreeListener,
} from 'antlr4ng';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { findCaretTokenIndex } from './utils/findCaretTokenIndex';
import {
@ -38,7 +39,7 @@ export default abstract class BasicParser<
P extends IParser<PRC> = IParser<PRC>,
> {
/** members for cache start */
protected _charStreams: CodePointCharStream;
protected _charStreams: CharStream;
protected _lexer: L;
protected _tokenStream: CommonTokenStream;
protected _parser: P;
@ -60,7 +61,7 @@ export default abstract class BasicParser<
* Create a antlr4 Lexer instance.
* @param input source string
*/
protected abstract createLexerFromCharStream(charStreams: CodePointCharStream): L;
protected abstract createLexerFromCharStream(charStreams: CharStream): L;
/**
* Create Parser by CommonTokenStream
@ -92,7 +93,7 @@ export default abstract class BasicParser<
* @param input string
*/
public createLexer(input: string, errorListener?: ErrorListener<any>) {
const charStreams = CharStreams.fromString(input.toUpperCase());
const charStreams = CharStreams.fromString(input);
const lexer = this.createLexerFromCharStream(charStreams);
if (errorListener) {
lexer.removeErrorListeners();
@ -126,7 +127,7 @@ export default abstract class BasicParser<
*/
public parse(input: string, errorListener?: ErrorListener<any>) {
const parser = this.createParser(input, errorListener);
parser.buildParseTree = true;
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
return parser.program();
@ -139,7 +140,7 @@ export default abstract class BasicParser<
*/
private createParserWithCache(input: string): P {
this._parseTree = null;
this._charStreams = CharStreams.fromString(input.toUpperCase());
this._charStreams = CharStreams.fromString(input);
this._lexer = this.createLexerFromCharStream(this._charStreams);
this._lexer.removeErrorListeners();
@ -154,7 +155,7 @@ export default abstract class BasicParser<
this._tokenStream.fill();
this._parser = this.createParserFromTokenStream(this._tokenStream);
this._parser.buildParseTree = true;
this._parser.buildParseTrees = true;
this._parser.errorHandler = new ErrorStrategy();
return this._parser;
@ -239,13 +240,13 @@ export default abstract class BasicParser<
const res = splitListener.statementsContext.map((context) => {
const { start, stop } = context;
return {
startIndex: start.startIndex,
endIndex: stop.stopIndex,
startIndex: start.start,
endIndex: stop.stop,
startLine: start.line,
endLine: stop.line,
startColumn: start.charPositionInLine + 1,
endColumn: stop.charPositionInLine + 1 + stop.text.length,
text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
startColumn: start.column + 1,
endColumn: stop.column + 1 + stop.text.length,
text: this._parsedInput.slice(start.start, stop.stop + 1),
};
});
@ -317,8 +318,8 @@ export default abstract class BasicParser<
}
// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.startIndex ?? 0;
const stopIndex = stopStatement?.stop?.stopIndex ?? input.length - 1;
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
/**
* Save offset of the tokenIndex in the range of input
@ -340,7 +341,7 @@ export default abstract class BasicParser<
const parser = this.createParserFromTokenStream(tokenStream);
parser.removeErrorListeners();
parser.buildParseTree = true;
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
sqlParserIns = parser;
@ -362,12 +363,12 @@ export default abstract class BasicParser<
(syntaxCtx) => {
const wordRanges: WordRange[] = syntaxCtx.wordRanges.map((token) => {
return {
text: this._parsedInput.slice(token.startIndex, token.stopIndex + 1),
startIndex: token.startIndex,
endIndex: token.stopIndex,
text: this._parsedInput.slice(token.start, token.stop + 1),
startIndex: token.start,
endIndex: token.stop,
line: token.line,
startColumn: token.charPositionInLine + 1,
stopColumn: token.charPositionInLine + 1 + token.text.length,
startColumn: token.column + 1,
stopColumn: token.column + 1 + token.text.length,
};
});
return {

View File

@ -1,10 +1,12 @@
import { DefaultErrorStrategy } from 'antlr4ts/DefaultErrorStrategy';
import { Parser } from 'antlr4ts/Parser';
import { InputMismatchException } from 'antlr4ts/InputMismatchException';
import { IntervalSet } from 'antlr4ts/misc/IntervalSet';
import { ParserRuleContext } from 'antlr4ts/ParserRuleContext';
import { RecognitionException } from 'antlr4ts/RecognitionException';
import { Token } from 'antlr4ts/Token';
import {
DefaultErrorStrategy,
Parser,
InputMismatchException,
IntervalSet,
ParserRuleContext,
RecognitionException,
Token,
} from 'antlr4ng';
/**
* Base on DefaultErrorStrategy.
@ -33,7 +35,7 @@ export class ErrorStrategy extends DefaultErrorStrategy {
if (!this.lastErrorStates) {
this.lastErrorStates = new IntervalSet();
}
this.lastErrorStates.add(recognizer.state);
this.lastErrorStates.addOne(recognizer.state);
let followSet: IntervalSet = this.getErrorRecoverySet(recognizer);
this.consumeUntil(recognizer, followSet);
}
@ -43,11 +45,7 @@ export class ErrorStrategy extends DefaultErrorStrategy {
if (this.nextTokensContext === undefined) {
e = new InputMismatchException(recognizer);
} else {
e = new InputMismatchException(
recognizer,
this.nextTokensState,
this.nextTokensContext
);
e = new InputMismatchException(recognizer);
}
// Mark the context as an anomaly

View File

@ -1,5 +1,10 @@
import { Token, Recognizer, ANTLRErrorListener, RecognitionException } from 'antlr4ts';
import { ATNSimulator } from 'antlr4ts/atn/ATNSimulator';
import {
Token,
Recognizer,
ANTLRErrorListener,
RecognitionException,
ATNSimulator,
} from 'antlr4ng';
/**
* Converted from {@link SyntaxError}.
@ -20,7 +25,7 @@ export interface ParseError {
* The type of error resulting from lexical parsing and parsing.
*/
export interface SyntaxError<T> {
readonly recognizer: Recognizer<T, ATNSimulator>;
readonly recognizer: Recognizer<ATNSimulator>;
readonly offendingSymbol: Token;
readonly line: number;
readonly charPositionInLine: number;
@ -34,15 +39,21 @@ export interface SyntaxError<T> {
*/
export type ErrorListener<T> = (parseError: ParseError, originalError: SyntaxError<T>) => void;
export default class ParseErrorListener implements ANTLRErrorListener<Token> {
export default class ParseErrorListener implements ANTLRErrorListener {
private _errorListener: ErrorListener<Token>;
constructor(errorListener: ErrorListener<Token>) {
this._errorListener = errorListener;
}
reportAmbiguity() {}
reportAttemptingFullContext() {}
reportContextSensitivity() {}
syntaxError(
recognizer: Recognizer<Token, ATNSimulator>,
recognizer: Recognizer<ATNSimulator>,
offendingSymbol,
line: number,
charPositionInLine: number,

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CaretPosition } from '../basic-parser-types';
/**
@ -15,15 +15,11 @@ export function findCaretTokenIndex(caretPosition: CaretPosition, allTokens: Tok
while (left <= right) {
const mid = left + ((right - left) >> 1);
const token = allTokens[mid];
if (
token.line > caretLine ||
(token.line === caretLine && token.charPositionInLine + 1 >= caretCol)
) {
if (token.line > caretLine || (token.line === caretLine && token.column + 1 >= caretCol)) {
right = mid - 1;
} else if (
token.line < caretLine ||
(token.line === caretLine &&
token.charPositionInLine + token.text.length + 1 < caretCol)
(token.line === caretLine && token.column + token.text.length + 1 < caretCol)
) {
left = mid + 1;
} else {

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { FlinkSqlLexer } from '../lib/flinksql/FlinkSqlLexer';
import {
@ -139,7 +139,10 @@ export class FlinkSqlSplitListener implements FlinkSqlParserListener {
this._statementsContext.push(ctx);
};
enterSingleStatement = (ctx: SingleStatementContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementsContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer';
import { HiveSqlParser, ProgramContext, StatementContext } from '../lib/hive/HiveSqlParser';
@ -130,7 +130,10 @@ export class HiveSqlSplitListener implements HiveSqlParserListener {
this._statementContext.push(ctx);
};
enterStatement = (ctx: StatementContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { ImpalaSqlLexer } from '../lib/impala/ImpalaSqlLexer';
import {
@ -135,7 +135,10 @@ export class ImpalaSqlSplitListener implements ImpalaSqlParserListener {
this._statementContext.push(ctx);
};
enterSingleStatement = (ctx: SingleStatementContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { MySqlLexer } from '../lib/mysql/MySqlLexer';
import { MySqlParser, ProgramContext, SingleStatementContext } from '../lib/mysql/MySqlParser';
@ -130,7 +130,10 @@ export class MysqlSplitListener implements MySqlParserListener {
this._statementsContext.push(ctx);
};
enterSingleStatement = (ctx: SingleStatementContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementsContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { PostgreSQLLexer } from '../lib/pgsql/PostgreSQLLexer';
import { PostgreSQLParser, ProgramContext, SingleStmtContext } from '../lib/pgsql/PostgreSQLParser';
@ -152,7 +152,10 @@ export class PgSqlSplitListener implements PostgreSQLParserListener {
this._statementsContext.push(ctx);
};
enterSingleStmt = (ctx: SingleStmtContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementsContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { PlSqlLexer } from '../lib/plsql/PlSqlLexer';
import { PlSqlParser, ProgramContext } from '../lib/plsql/PlSqlParser';

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { SparkSqlLexer } from '../lib/spark/SparkSqlLexer';
import {
@ -135,7 +135,10 @@ export class SparkSqlSplitListener implements SparkSqlParserListener {
this._statementsContext.push(ctx);
};
enterSingleStatement = (ctx: SingleStatementContext) => {};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementsContext;

View File

@ -1,4 +1,4 @@
import { Token } from 'antlr4ts';
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { TrinoSqlLexer } from '../lib/trinosql/TrinoSqlLexer';
import {
@ -135,6 +135,11 @@ export class TrinoSqlSplitListener implements TrinoSqlListener {
this._statementsContext.push(ctx);
};
visitTerminal() {}
visitErrorNode() {}
enterEveryRule() {}
exitEveryRule() {}
get statementsContext() {
return this._statementsContext;
}