feat: add ErrorStrategy(#230)

* refactor: rename errorHandler to errorListener

* feat: add ErrorStrategy to mark context exceptions

* test: errorStrategy unit tests
This commit is contained in:
Hayden
2023-12-11 17:34:49 +08:00
committed by GitHub
parent 61a00d7bb8
commit ee8b468778
12 changed files with 548 additions and 30 deletions

View File

@ -18,7 +18,7 @@ export * from './lib/impala/ImpalaSqlParserVisitor';
export { SyntaxContextType } from './parser/common/basic-parser-types';
export type * from './parser/common/basic-parser-types';
export type { SyntaxError, ParseError, ErrorHandler } from './parser/common/parseErrorListener';
export type { SyntaxError, ParseError, ErrorListener } from './parser/common/parseErrorListener';
/**
* @deprecated legacy, will be removed.

View File

@ -17,7 +17,8 @@ import {
WordRange,
TextSlice,
} from './basic-parser-types';
import ParseErrorListener, { ParseError, ErrorHandler } from './parseErrorListener';
import ParseErrorListener, { ParseError, ErrorListener } from './parseErrorListener';
import { ErrorStrategy } from './errorStrategy';
interface IParser<IParserRuleContext extends ParserRuleContext> extends Parser {
// Customized in our parser
@ -46,7 +47,7 @@ export default abstract class BasicParser<
protected _parseErrors: ParseError[] = [];
/** members for cache end */
private _errorHandler: ErrorHandler<any> = (error) => {
private _errorListener: ErrorListener<any> = (error) => {
this._parseErrors.push(error);
};
@ -90,7 +91,7 @@ export default abstract class BasicParser<
* Create an antlr4 lexer from input.
* @param input string
*/
public createLexer(input: string, errorListener?: ErrorHandler<any>) {
public createLexer(input: string, errorListener?: ErrorListener<any>) {
const charStreams = CharStreams.fromString(input.toUpperCase());
const lexer = this.createLexerFormCharStream(charStreams);
if (errorListener) {
@ -104,7 +105,7 @@ export default abstract class BasicParser<
* Create an antlr4 parser from input.
* @param input string
*/
public createParser(input: string, errorListener?: ErrorHandler<any>) {
public createParser(input: string, errorListener?: ErrorListener<any>) {
const lexer = this.createLexer(input, errorListener);
const tokenStream = new CommonTokenStream(lexer);
const parser = this.createParserFromTokenStream(tokenStream);
@ -123,9 +124,10 @@ export default abstract class BasicParser<
* @param errorListener listen parse errors and lexer errors.
* @returns parseTree
*/
public parse(input: string, errorListener?: ErrorHandler<any>) {
public parse(input: string, errorListener?: ErrorListener<any>) {
const parser = this.createParser(input, errorListener);
parser.buildParseTree = true;
parser.errorHandler = new ErrorStrategy();
return parser.program();
}
@ -141,7 +143,7 @@ export default abstract class BasicParser<
this._lexer = this.createLexerFormCharStream(this._charStreams);
this._lexer.removeErrorListeners();
this._lexer.addErrorListener(new ParseErrorListener(this._errorHandler));
this._lexer.addErrorListener(new ParseErrorListener(this._errorListener));
this._tokenStream = new CommonTokenStream(this._lexer);
/**
@ -153,6 +155,7 @@ export default abstract class BasicParser<
this._parser = this.createParserFromTokenStream(this._tokenStream);
this._parser.buildParseTree = true;
this._parser.errorHandler = new ErrorStrategy();
return this._parser;
}
@ -165,7 +168,7 @@ export default abstract class BasicParser<
* @param errorListener listen errors
* @returns parseTree
*/
private parseWithCache(input: string, errorListener?: ErrorHandler<any>) {
private parseWithCache(input: string, errorListener?: ErrorListener<any>) {
// Avoid parsing the same input repeatedly.
if (this._parsedInput === input && !errorListener) {
return this._parseTree;
@ -175,7 +178,7 @@ export default abstract class BasicParser<
this._parsedInput = input;
parser.removeErrorListeners();
parser.addErrorListener(new ParseErrorListener(this._errorHandler));
parser.addErrorListener(new ParseErrorListener(this._errorListener));
this._parseTree = parser.program();
@ -317,6 +320,7 @@ export default abstract class BasicParser<
const parser = this.createParserFromTokenStream(tokenStream);
parser.removeErrorListeners();
parser.buildParseTree = true;
parser.errorHandler = new ErrorStrategy();
sqlParserIns = parser;
c3Context = parser.program();

View File

@ -0,0 +1,75 @@
import { DefaultErrorStrategy } from 'antlr4ts/DefaultErrorStrategy';
import { Parser } from 'antlr4ts/Parser';
import { InputMismatchException } from 'antlr4ts/InputMismatchException';
import { IntervalSet } from 'antlr4ts/misc/IntervalSet';
import { ParserRuleContext } from 'antlr4ts/ParserRuleContext';
import { RecognitionException } from 'antlr4ts/RecognitionException';
import { Token } from 'antlr4ts/Token';
/**
* Base on DefaultErrorStrategy.
* The difference is that it assigns exception to the context.exception when it encounters error.
*/
export class ErrorStrategy extends DefaultErrorStrategy {
public recover(recognizer: Parser, e: RecognitionException): void {
// Mark the context as an anomaly
for (
let context: ParserRuleContext | undefined = recognizer.context;
context;
context = context.parent
) {
context.exception = e;
}
// Error recovery
if (
this.lastErrorIndex === recognizer.inputStream.index &&
this.lastErrorStates &&
this.lastErrorStates.contains(recognizer.state)
) {
recognizer.consume();
}
this.lastErrorIndex = recognizer.inputStream.index;
if (!this.lastErrorStates) {
this.lastErrorStates = new IntervalSet();
}
this.lastErrorStates.add(recognizer.state);
let followSet: IntervalSet = this.getErrorRecoverySet(recognizer);
this.consumeUntil(recognizer, followSet);
}
public recoverInline(recognizer: Parser): Token {
let e: RecognitionException;
if (this.nextTokensContext === undefined) {
e = new InputMismatchException(recognizer);
} else {
e = new InputMismatchException(
recognizer,
this.nextTokensState,
this.nextTokensContext
);
}
// Mark the context as an anomaly
for (
let context: ParserRuleContext | undefined = recognizer.context;
context;
context = context.parent
) {
context.exception = e;
}
// Error recovery
let matchedSymbol = this.singleTokenDeletion(recognizer);
if (matchedSymbol) {
recognizer.consume();
return matchedSymbol;
}
if (this.singleTokenInsertion(recognizer)) {
return this.getMissingSymbol(recognizer);
}
throw e;
}
}

View File

@ -25,16 +25,16 @@ export interface SyntaxError<T> {
}
/**
* ErrorHandler will be invoked when it encounters a parsing error.
* ErrorListener will be invoked when it encounters a parsing error.
* Includes lexical errors and parsing errors.
*/
export type ErrorHandler<T> = (parseError: ParseError, originalError: SyntaxError<T>) => void;
export type ErrorListener<T> = (parseError: ParseError, originalError: SyntaxError<T>) => void;
export default class ParseErrorListener implements ANTLRErrorListener<Token> {
private _errorHandler;
private _errorListener;
constructor(errorListener: ErrorHandler<Token>) {
this._errorHandler = errorListener;
constructor(errorListener: ErrorListener<Token>) {
this._errorListener = errorListener;
}
syntaxError(
@ -49,8 +49,8 @@ export default class ParseErrorListener implements ANTLRErrorListener<Token> {
if (offendingSymbol && offendingSymbol.text !== null) {
endCol = charPositionInLine + offendingSymbol.text.length;
}
if (this._errorHandler) {
this._errorHandler(
if (this._errorListener) {
this._errorListener(
{
startLine: line,
endLine: line,