import { Parser, Lexer, Token, CharStreams, CommonTokenStream, CodePointCharStream, ParserRuleContext, } from 'antlr4ts'; import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree'; import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3'; import { findCaretTokenIndex } from '../../utils/findCaretTokenIndex'; import { CaretPosition, Suggestions, SyntaxSuggestion, WordRange, TextSlice, } from './basic-parser-types'; import ParserErrorListener, { ParserError, ErrorHandler, ParserErrorCollector, } from './parserErrorListener'; interface IParser extends Parser { // Customized in our parser program(): IParserRuleContext; } interface SplitListener extends ParseTreeListener { statementsContext: ParserRuleContext[]; } /** * Custom Parser class, subclass needs extends it. */ export default abstract class BasicParser< L extends Lexer = Lexer, PRC extends ParserRuleContext = ParserRuleContext, P extends IParser = IParser, > { protected _charStreams: CodePointCharStream; protected _lexer: L; protected _tokenStream: CommonTokenStream; protected _parser: P; protected _parserTree: PRC; protected _errorCollector: ParserErrorCollector = new ParserErrorCollector(); protected _parsedInput: string = null; /** * PreferredRules for antlr4-c3 */ protected abstract preferredRules: Set; /** * Create a antrl4 Lexer instance * @param input source string */ protected abstract createLexerFormCharStream(charStreams: CodePointCharStream): L; /** * Create Parser by CommonTokenStream * @param tokenStream CommonTokenStream */ protected abstract createParserFromTokenStream(tokenStream: CommonTokenStream): P; /** * Convert candidates to suggestions * @param candidates candidate list * @param allTokens all tokens from input * @param caretTokenIndex tokenIndex of caretPosition * @param tokenIndexOffset offset of the tokenIndex in the candidates * compared to the tokenIndex in allTokens */ protected abstract processCandidates( candidates: CandidatesCollection, allTokens: Token[], caretTokenIndex: number, tokenIndexOffset: number ): Suggestions; /** * Get splitListener instance. */ protected abstract get splitListener(): SplitListener; /** * Create an anltr4 lexer from input. * @param input string */ public createLexer(input: string) { const charStreams = CharStreams.fromString(input.toUpperCase()); const lexer = this.createLexerFormCharStream(charStreams); return lexer; } /** * Create an anltr4 parser from input. * @param input string */ public createParser(input: string) { const lexer = this.createLexer(input); const tokenStream = new CommonTokenStream(lexer); const parser = this.createParserFromTokenStream(tokenStream); return parser; } /** * Create an anltr4 parser from input. * And the instances will be cache. * @param input string */ protected createParserWithCache(input: string): P { this._parserTree = null; this._charStreams = CharStreams.fromString(input.toUpperCase()); this._lexer = this.createLexerFormCharStream(this._charStreams); this._tokenStream = new CommonTokenStream(this._lexer); this._tokenStream.fill(); this._parser = this.createParserFromTokenStream(this._tokenStream); this._parser.buildParseTree = true; return this._parser; } /** * If it is invoked multiple times in a row and the input parameters is the same, * this method returns the parsing result directly for the first time * unless the errorListener parameter is passed. * @param input source string * @param errorListener listen errors * @returns parserTree */ public parse(input: string, errorListener?: ErrorHandler) { // Avoid parsing the same input repeatedly. if (this._parsedInput === input && !errorListener) { return this._parserTree; } const parser = this.createParserWithCache(input); this._parsedInput = input; parser.removeErrorListeners(); this._errorCollector.clear(); parser.addErrorListener(this._errorCollector); if (errorListener) { parser.addErrorListener(new ParserErrorListener(errorListener)); } this._parserTree = parser.program(); return this._parserTree; } /** * Validate input string and return syntax errors if exists. * @param input source string * @returns syntax errors */ public validate(input: string): ParserError[] { this.parse(input); const lexerError = []; return lexerError.concat(this._errorCollector.parserErrors); } /** * Get all Tokens of input stringļ¼Œ'' is not included * @param input source string * @returns Token[] */ public getAllTokens(input: string): Token[] { this.parse(input); let allTokens = this._tokenStream.getTokens(); if (allTokens[allTokens.length - 1].text === '') { allTokens = allTokens.slice(0, -1); } return allTokens; } /** * It convert tree to string, it's convenient to use in unit test. * @param string input */ public parserTreeToString(input: string): string { this.parse(input); return this._parserTree.toStringTree(this._parser.ruleNames); } /** * Get List-like style tree string * @param parserTree ProgramRuleContext */ public toString(parserTree: PRC): string { return parserTree.toStringTree(this._parser.ruleNames); } /** * @param listener Listener instance extends ParserListener * @param parserTree parser Tree */ public listen( listener: PTL, parserTree: PRC ) { ParseTreeWalker.DEFAULT.walk(listener, parserTree); } /** * Split input into statements. * If exist syntax error it will return null. * @param input source string */ public splitSQLByStatement(input): TextSlice[] { this.parse(input); const splitListener = this.splitListener; this.listen(splitListener, this._parserTree); const res = splitListener.statementsContext.map((context) => { const { start, stop } = context; return { startIndex: start.startIndex, endIndex: stop.stopIndex, startLine: start.line, endLine: stop.line, startColumn: start.charPositionInLine + 1, endColumn: stop.charPositionInLine + stop.text.length, text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1), }; }); return res; } /** * Get suggestions of syntax and token at caretPosition * @param input source string * @param caretPosition caret position, such as cursor position * @returns suggestion */ public getSuggestionAtCaretPosition( input: string, caretPosition: CaretPosition ): Suggestions | null { const splitListener = this.splitListener; // TODO: add splitListener to all sqlParser implements add remove following if if (!splitListener) return null; this.parse(input); let sqlParserIns = this._parser; const allTokens = this.getAllTokens(input); let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens); let c3Context: ParserRuleContext = this._parserTree; let tokenIndexOffset: number = 0; if (!caretTokenIndex && caretTokenIndex !== 0) return null; /** * Split sql by statement. * Try to collect candidates from the caret statement only. */ this.listen(splitListener, this._parserTree); // If there are multiple statements. if (splitListener.statementsContext.length > 1) { // find statement rule context where caretPosition is located. const caretStatementContext = splitListener?.statementsContext.find((ctx) => { return ( caretTokenIndex <= ctx.stop?.tokenIndex && caretTokenIndex >= ctx.start.tokenIndex ); }); if (caretStatementContext) { c3Context = caretStatementContext; } else { const lastStatementToken = splitListener.statementsContext[splitListener?.statementsContext.length - 1] .start; /** * If caretStatementContext is not found and it follows all statements. * Reparses part of the input following the penultimate statement. * And c3 will collect candidates in the new parserTreeContext. */ if (caretTokenIndex > lastStatementToken?.tokenIndex) { /** * Save offset of the tokenIndex in the partInput * compared to the tokenIndex in the whole input */ tokenIndexOffset = lastStatementToken?.tokenIndex; // Correct caretTokenIndex caretTokenIndex = caretTokenIndex - tokenIndexOffset; const inputSlice = input.slice(lastStatementToken.startIndex); const lexer = this.createLexer(inputSlice); const tokenStream = new CommonTokenStream(lexer); tokenStream.fill(); const parser = this.createParserFromTokenStream(tokenStream); parser.removeErrorListeners(); parser.buildParseTree = true; sqlParserIns = parser; c3Context = parser.program(); } } } const core = new CodeCompletionCore(sqlParserIns); core.preferredRules = this.preferredRules; const candidates = core.collectCandidates(caretTokenIndex, c3Context); const originalSuggestions = this.processCandidates( candidates, allTokens, caretTokenIndex, tokenIndexOffset ); const syntaxSuggestions: SyntaxSuggestion[] = originalSuggestions.syntax.map( (syntaxCtx) => { const wordRanges: WordRange[] = syntaxCtx.wordRanges.map((token) => { return { text: this._parsedInput.slice(token.startIndex, token.stopIndex + 1), startIndex: token.startIndex, stopIndex: token.stopIndex, line: token.line, startColumn: token.charPositionInLine + 1, stopColumn: token.charPositionInLine + token.text.length, }; }); return { syntaxContextType: syntaxCtx.syntaxContextType, wordRanges, }; } ); return { syntax: syntaxSuggestions, keywords: originalSuggestions.keywords, }; } }