feat: FlinkSQL supports auto complete (#115)

* feat: add antlr4-c3 dependencies

* feat: distinguish table, catalog and database from uid

* feat: move semicolon from sqlStatements to sqlStatement

* chore: move antlr4ts-cli to devDependencies

* feat: improve basic parser and support suggestions of token and syntax

* feat: implement suggest method in sql parsers

* test: flink sql suggestion test cases

* feat: optimize ts defination of suggestion

* feat: add split listener and optimize performance of auto-completion

* test: supplementary flink suggestion unit tests
This commit is contained in:
Hayden
2023-06-09 11:22:53 +08:00
committed by GitHub
parent 2637f90295
commit 1b02ff5d75
25 changed files with 4521 additions and 3418 deletions

View File

@ -0,0 +1,71 @@
/**
* The insertion position of the candidate list.
* Such as cursor position
*/
export interface CaretPosition {
/** start at 1 */
lineNumber: number;
/** start at 1 */
column: number;
}
/**
* Syntax context type at caret position
*/
export enum SyntaxContextType {
/** catalog name */
CATALOG = 'catalog',
/** database name path, such as catalog.db */
DATABASE = 'database',
/** database name path will be created */
DATABASE_CREATE = 'databaseCreate',
/** table name path, such as catalog.db.tb */
TABLE = 'table',
/** table name path will be created */
TABLE_CREATE = 'tableCreate'
}
export interface WordRange {
/** content of word */
text: string;
/** start at 0 */
startIndex: number;
stopIndex: number;
/** start at 1 */
line: number;
/** start at 1 */
startColumn: number;
stopColumn: number;
}
/**
* Suggested information analyzed from the input
*/
export interface SyntaxSuggestion<T = WordRange> {
syntaxContextType: SyntaxContextType;
wordRanges: T[];
}
/**
* Suggested information analyzed from the input
*/
export interface Suggestions<T = WordRange> {
/**
* Suggestions about syntax
*/
syntax: SyntaxSuggestion<T>[];
/**
* Suggestions about keywords
*/
keywords: string[];
}
export interface TextSlice {
startIndex: number;
endIndex: number;
startLine: number;
endLine: number;
startColumn: number;
endColumn: number;
text: string;
}

View File

@ -1,86 +1,164 @@
import { Parser } from 'antlr4ts';
import { ParseTreeWalker } from 'antlr4ts/tree';
import {
Parser,
Lexer,
Token,
CharStreams,
CommonTokenStream,
CodePointCharStream,
ParserRuleContext
} from 'antlr4ts';
import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { findCaretTokenIndex } from '../../utils/findCaretTokenIndex';
import {
CaretPosition,
Suggestions,
SyntaxSuggestion,
WordRange,
TextSlice
} from './basic-parser-types';
import ParserErrorListener, {
ParserError,
ErrorHandler,
ParserErrorCollector,
} from './parserErrorListener';
interface IParser extends Parser {
// Lost in type definition
ruleNames: string[];
interface IParser<IParserRuleContext extends ParserRuleContext> extends Parser {
// Customized in our parser
program(): any;
program(): IParserRuleContext;
}
interface SplitListener extends ParseTreeListener {
statementsContext: ParserRuleContext[];
}
/**
* Custom Parser class, subclass needs extends it.
*/
export default abstract class BasicParser {
private _parser: IParser;
public parse(
input: string,
errorListener?: ErrorHandler<any>,
) {
const parser = this.createParser(input);
this._parser = parser;
parser.removeErrorListeners();
parser.addErrorListener(new ParserErrorListener(errorListener));
const parserTree = parser.program();
return parserTree;
}
public validate(input: string): ParserError[] {
const lexerError = []; const syntaxErrors = [];
const parser = this.createParser(input);
this._parser = parser;
parser.removeErrorListeners();
parser.addErrorListener(new ParserErrorCollector(syntaxErrors));
parser.program();
return lexerError.concat(syntaxErrors);
}
export default abstract class BasicParser<
L extends Lexer = Lexer,
PRC extends ParserRuleContext = ParserRuleContext,
P extends IParser<PRC> = IParser<PRC>
> {
protected _charStreams: CodePointCharStream;
protected _lexer: L;
protected _tokenStream: CommonTokenStream;
protected _parser: P;
protected _parserTree: PRC;
protected _errorCollector: ParserErrorCollector = new ParserErrorCollector();
protected _parsedInput: string = null;
/**
* Create antrl4 Lexer object
* preferredRules for antlr4-c3
*/
public abstract preferredRules: Set<number>;
/**
* Create antrl4 Lexer instance
* @param input source string
*/
public abstract createLexer(input: string);
public abstract createLexerFormCharStream(charStreams: CodePointCharStream): L;
/**
* Create Parser by lexer
* @param lexer Lexer
* Create Parser by CommonTokenStream
* @param tokenStream CommonTokenStream
*/
public abstract createParserFromLexer(lexer);
public abstract createParserFromTokenStream(tokenStream: CommonTokenStream): P;
/**
* convert candidates to suggestions
* @param candidates candidate list
* @param allTokens all tokens from input
* @param caretTokenIndex tokenIndex of caretPosition
* @param tokenIndexOffset offset of the tokenIndex in the candidates
* compared to the tokenIndex in allTokens
*/
public abstract processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number,
): Suggestions<Token>;
/**
* Get all Tokens of input string
* @param input string
* splitListener instance Getter
*/
protected abstract get splitListener (): SplitListener;
/**
* If it is invoked multiple times in a row and the input parameters is the same
* this method returns the parsing result directly for the first time
* unless the errorListener parameter is passed
* @param input source string
* @param errorListener listen errors
* @returns parserTree
*/
public parse(
input: string,
errorListener?: ErrorHandler<any>
) {
// Avoid parsing the same input repeatedly
if(this._parsedInput === input && !errorListener) {
return;
}
const parser = this.createParser(input);
this._parsedInput = input;
parser.removeErrorListeners();
this._errorCollector.clear();
parser.addErrorListener(this._errorCollector);
if(errorListener) {
parser.addErrorListener(new ParserErrorListener(errorListener));
}
this._parserTree = parser.program();
return this._parserTree;
}
/**
* validate input string and return syntax errors
* @param input source string
* @returns syntax errors
*/
public validate(input: string): ParserError[] {
this.parse(input);
const lexerError = [];
return lexerError.concat(this._errorCollector.parserErrors);
}
/**
* Get all Tokens of input string'<EOF>' is not included
* @param input source string
* @returns Token[]
*/
public getAllTokens(input: string): string[] {
const lexer = this.createLexer(input);
return lexer.getAllTokens().map(token => token.text);
public getAllTokens(input: string): Token[] {
this.parse(input);
let allTokens = this._tokenStream.getTokens();
if(allTokens[allTokens.length - 1].text === '<EOF>') {
allTokens = allTokens.slice(0, -1)
}
return allTokens
};
/**
* Get Parser instance by input string
* @param input
* @param input string
*/
public createParser(input: string): IParser {
const lexer = this.createLexer(input);
const parser: any = this.createParserFromLexer(lexer);
parser.buildParseTrees = true;
this._parser = parser;
public createParser(input: string): P {
this._parserTree = null;
this._charStreams = CharStreams.fromString(input.toUpperCase());
this._lexer = this.createLexerFormCharStream(this._charStreams);
return parser;
this._tokenStream = new CommonTokenStream(this._lexer);
this._tokenStream.fill();
this._parser = this.createParserFromTokenStream(this._tokenStream);
this._parser.buildParseTree = true;
return this._parser
}
/**
@ -88,18 +166,15 @@ export default abstract class BasicParser {
* @param string input
*/
public parserTreeToString(input: string): string {
const parser = this.createParser(input);
this._parser = parser;
const tree = parser.program();
return tree.toStringTree(parser.ruleNames);
this.parse(input);
return this._parserTree.toStringTree(this._parser.ruleNames);
}
/**
* Get List-like style tree string
* @param parserTree
* @param parserTree ProgramRuleContext
*/
public toString(parserTree: any): string {
public toString(parserTree: PRC): string {
return parserTree.toStringTree(this._parser.ruleNames);
}
@ -107,7 +182,130 @@ export default abstract class BasicParser {
* @param listener Listener instance extends ParserListener
* @param parserTree parser Tree
*/
public listen(listener: any, parserTree: any) {
public listen<PTL extends ParseTreeListener = ParseTreeListener>(listener: PTL, parserTree: PRC) {
ParseTreeWalker.DEFAULT.walk(listener, parserTree);
}
/**
* split input into statements
* @param input source string
*/
public splitSQL(input): TextSlice[] {
this.parse(input);
const splitListener = this.splitListener;
this.listen(splitListener, this._parserTree);
const res = splitListener.statementsContext.map(context => {
const { start, stop } = context;
return {
startIndex: start.startIndex,
endIndex: stop.stopIndex,
startLine: start.line,
endLine: stop.line,
startColumn: start.charPositionInLine + 1,
endColumn: stop.charPositionInLine + stop.text.length,
text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
}
})
return res;
}
/**
* Get suggestions of syntax and token at caretPosition
* @param input source string
* @param caretPosition caret position, such as cursor position
* @returns suggestion
*/
public getSuggestionAtCaretPosition(input: string, caretPosition: CaretPosition): Suggestions | null {
const splitListener = this.splitListener;
// TODO: add splitListener to all sqlParser implements add remove following if
if(!splitListener) return null;
this.parse(input);
let sqlParserIns = this._parser;
let allTokens = this.getAllTokens(input);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
let c3Context: ParserRuleContext = this._parserTree;
let tokenIndexOffset: number = 0;
if(!caretTokenIndex && caretTokenIndex !== 0) return null;
/**
* Split sql by statement.
* Try to collect candidates from the caret statement only.
*/
this.listen(splitListener, this._parserTree);
// If there are multiple statements.
if (splitListener.statementsContext.length) {
// find statement rule context where caretPosition is located.
const caretStatementContext = splitListener?.statementsContext.find(ctx => {
return caretTokenIndex <= ctx.stop?.tokenIndex && caretTokenIndex >= ctx.start.tokenIndex;
});
if(caretStatementContext) {
c3Context = caretStatementContext
} else {
const lastIndex = splitListener.statementsContext.length > 1
? 2
: 1;
const lastStatementToken= splitListener
.statementsContext[splitListener?.statementsContext.length - lastIndex]
.stop;
/**
* If caretStatementContext is not found and it follows all statements.
* Reparses part of the input following the penultimate statement.
* And c3 will collect candidates in the new parserTreeContext.
*/
if (caretTokenIndex > lastStatementToken?.tokenIndex) {
/**
* Save offset of the tokenIndex in the partInput
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = lastStatementToken?.tokenIndex + 1;
// Correct caretTokenIndex
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
const inputSlice = input.slice(lastStatementToken.stopIndex + 1);
const charStreams = CharStreams.fromString(inputSlice.toUpperCase());
const lexer = this.createLexerFormCharStream(charStreams);
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
const parser = this.createParserFromTokenStream(tokenStream);
parser.buildParseTree = true;
sqlParserIns = parser;
c3Context = parser.program();
}
}
}
const core = new CodeCompletionCore(sqlParserIns);
core.preferredRules = this.preferredRules;
const candidates = core.collectCandidates(caretTokenIndex, c3Context);
const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex, tokenIndexOffset);
const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax
.map(syntaxCtx => {
const wordRanges: WordRange[] = syntaxCtx.wordRanges.map(token => {
return {
text: this._parsedInput.slice(token.startIndex, token.stopIndex + 1),
startIndex: token.startIndex,
stopIndex: token.stopIndex,
line: token.line,
startColumn: token.charPositionInLine + 1,
stopColumn: token.charPositionInLine + token.text.length
}
})
return {
syntaxContextType: syntaxCtx.syntaxContextType,
wordRanges,
}
})
return {
syntax: syntaxSuggestions,
keywords: originalSuggestions.keywords
}
}
}

View File

@ -1,4 +1,6 @@
import { Token, Recognizer, ParserErrorListener, RecognitionException } from 'antlr4ts';
import { ATNSimulator } from 'antlr4ts/atn/ATNSimulator'
export interface ParserError {
startLine: number;
endLine: number;
@ -8,56 +10,70 @@ export interface ParserError {
}
export interface SyntaxError<T> {
recognizer: Recognizer<T, any>;
recognizer: Recognizer<T, ATNSimulator>;
offendingSymbol: Token;
line: number;
charPositionInLine: number;
msg: string;
e: any;
e: RecognitionException;
}
type ErrorOffendingSymbol = {
text: string;
};
export type ErrorHandler<T> = (err: ParserError, errOption: SyntaxError<T>) => void;
export class ParserErrorCollector implements ParserErrorListener {
private _errors: ParserError[];
constructor(error: ParserError[]) {
this._errors = error;
}
private _parseErrors: ParserError[] = [];
private _syntaxErrors: SyntaxError<Token>[] = [];
syntaxError(
recognizer: Recognizer<ErrorOffendingSymbol, any>, offendingSymbol: ErrorOffendingSymbol, line: number,
charPositionInLine: number, msg: string, e: RecognitionException,
recognizer: Recognizer<Token, ATNSimulator>,
offendingSymbol: Token,
line: number,
charPositionInLine: number,
msg: string,
e: RecognitionException,
) {
let endCol = charPositionInLine + 1;
if (offendingSymbol && offendingSymbol.text !== null) {
endCol = charPositionInLine + offendingSymbol.text.length;
}
this._errors.push({
this._parseErrors.push({
startLine: line,
endLine: line,
startCol: charPositionInLine,
endCol: endCol,
message: msg,
});
this._syntaxErrors.push({
e,
line,
msg,
recognizer,
offendingSymbol,
charPositionInLine,
})
}
clear() {
this._parseErrors = [];
this._syntaxErrors = [];
}
get parserErrors () {
return this._parseErrors
}
}
export default class CustomParserErrorListener implements ParserErrorListener {
private _errorHandler;
constructor(errorListener: ErrorHandler<ErrorOffendingSymbol>) {
constructor(errorListener: ErrorHandler<Token>) {
this._errorHandler = errorListener;
}
syntaxError(
recognizer: Recognizer<ErrorOffendingSymbol, any>, offendingSymbol: ErrorOffendingSymbol, line: number,
charPositionInLine: number, msg: string, e: any,
recognizer: Recognizer<Token, ATNSimulator>, offendingSymbol: Token, line: number,
charPositionInLine: number, msg: string, e: RecognitionException,
) {
let endCol = charPositionInLine + 1;
if (offendingSymbol && offendingSymbol.text !== null) {
@ -81,4 +97,3 @@ export default class CustomParserErrorListener implements ParserErrorListener {
}
}
}