2023-10-13 11:16:36 +08:00
|
|
|
|
import {
|
|
|
|
|
Parser,
|
|
|
|
|
Lexer,
|
2023-06-09 11:22:53 +08:00
|
|
|
|
Token,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
CharStreams,
|
|
|
|
|
CommonTokenStream,
|
2023-06-09 11:22:53 +08:00
|
|
|
|
CodePointCharStream,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
ParserRuleContext,
|
2023-06-09 11:22:53 +08:00
|
|
|
|
} from 'antlr4ts';
|
|
|
|
|
import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
|
|
|
|
|
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
|
|
|
|
|
import { findCaretTokenIndex } from '../../utils/findCaretTokenIndex';
|
2023-10-13 11:16:36 +08:00
|
|
|
|
import {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
CaretPosition,
|
|
|
|
|
Suggestions,
|
|
|
|
|
SyntaxSuggestion,
|
|
|
|
|
WordRange,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
TextSlice,
|
2023-06-09 11:22:53 +08:00
|
|
|
|
} from './basic-parser-types';
|
2020-09-11 17:39:10 +08:00
|
|
|
|
import ParserErrorListener, {
|
|
|
|
|
ParserError,
|
|
|
|
|
ErrorHandler,
|
|
|
|
|
ParserErrorCollector,
|
|
|
|
|
} from './parserErrorListener';
|
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
interface IParser<IParserRuleContext extends ParserRuleContext> extends Parser {
|
2023-05-04 10:13:05 +08:00
|
|
|
|
// Customized in our parser
|
2023-06-09 11:22:53 +08:00
|
|
|
|
program(): IParserRuleContext;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface SplitListener extends ParseTreeListener {
|
|
|
|
|
statementsContext: ParserRuleContext[];
|
2023-05-04 10:13:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-09-11 17:39:10 +08:00
|
|
|
|
/**
|
|
|
|
|
* Custom Parser class, subclass needs extends it.
|
|
|
|
|
*/
|
2023-06-09 11:22:53 +08:00
|
|
|
|
export default abstract class BasicParser<
|
2023-10-13 11:16:36 +08:00
|
|
|
|
L extends Lexer = Lexer,
|
2023-06-09 11:22:53 +08:00
|
|
|
|
PRC extends ParserRuleContext = ParserRuleContext,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
P extends IParser<PRC> = IParser<PRC>,
|
|
|
|
|
> {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
protected _charStreams: CodePointCharStream;
|
|
|
|
|
protected _lexer: L;
|
|
|
|
|
protected _tokenStream: CommonTokenStream;
|
|
|
|
|
protected _parser: P;
|
|
|
|
|
protected _parserTree: PRC;
|
|
|
|
|
protected _errorCollector: ParserErrorCollector = new ParserErrorCollector();
|
|
|
|
|
protected _parsedInput: string = null;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* PreferredRules for antlr4-c3
|
2023-06-09 11:22:53 +08:00
|
|
|
|
*/
|
2023-06-16 16:14:53 +08:00
|
|
|
|
protected abstract preferredRules: Set<number>;
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Create a antrl4 Lexer instance
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @param input source string
|
|
|
|
|
*/
|
2023-06-16 16:14:53 +08:00
|
|
|
|
protected abstract createLexerFormCharStream(charStreams: CodePointCharStream): L;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
|
|
|
|
* Create Parser by CommonTokenStream
|
|
|
|
|
* @param tokenStream CommonTokenStream
|
|
|
|
|
*/
|
2023-06-16 16:14:53 +08:00
|
|
|
|
protected abstract createParserFromTokenStream(tokenStream: CommonTokenStream): P;
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Convert candidates to suggestions
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @param candidates candidate list
|
|
|
|
|
* @param allTokens all tokens from input
|
|
|
|
|
* @param caretTokenIndex tokenIndex of caretPosition
|
2023-10-13 11:16:36 +08:00
|
|
|
|
* @param tokenIndexOffset offset of the tokenIndex in the candidates
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* compared to the tokenIndex in allTokens
|
|
|
|
|
*/
|
2023-06-16 16:14:53 +08:00
|
|
|
|
protected abstract processCandidates(
|
2023-10-13 11:16:36 +08:00
|
|
|
|
candidates: CandidatesCollection,
|
|
|
|
|
allTokens: Token[],
|
2023-06-09 11:22:53 +08:00
|
|
|
|
caretTokenIndex: number,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
tokenIndexOffset: number
|
2023-06-09 11:22:53 +08:00
|
|
|
|
): Suggestions<Token>;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Get splitListener instance.
|
2023-06-09 11:22:53 +08:00
|
|
|
|
*/
|
2023-10-13 11:16:36 +08:00
|
|
|
|
protected abstract get splitListener(): SplitListener;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Create an anltr4 lexer from input.
|
|
|
|
|
* @param input string
|
|
|
|
|
*/
|
|
|
|
|
public createLexer(input: string) {
|
|
|
|
|
const charStreams = CharStreams.fromString(input.toUpperCase());
|
|
|
|
|
const lexer = this.createLexerFormCharStream(charStreams);
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
2023-06-16 16:14:53 +08:00
|
|
|
|
return lexer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create an anltr4 parser from input.
|
|
|
|
|
* @param input string
|
|
|
|
|
*/
|
|
|
|
|
public createParser(input: string) {
|
|
|
|
|
const lexer = this.createLexer(input);
|
|
|
|
|
const tokenStream = new CommonTokenStream(lexer);
|
|
|
|
|
const parser = this.createParserFromTokenStream(tokenStream);
|
2023-10-13 11:16:36 +08:00
|
|
|
|
return parser;
|
2023-06-16 16:14:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create an anltr4 parser from input.
|
|
|
|
|
* And the instances will be cache.
|
|
|
|
|
* @param input string
|
|
|
|
|
*/
|
|
|
|
|
protected createParserWithCache(input: string): P {
|
|
|
|
|
this._parserTree = null;
|
|
|
|
|
this._charStreams = CharStreams.fromString(input.toUpperCase());
|
|
|
|
|
this._lexer = this.createLexerFormCharStream(this._charStreams);
|
|
|
|
|
|
|
|
|
|
this._tokenStream = new CommonTokenStream(this._lexer);
|
|
|
|
|
this._tokenStream.fill();
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
2023-06-16 16:14:53 +08:00
|
|
|
|
this._parser = this.createParserFromTokenStream(this._tokenStream);
|
|
|
|
|
this._parser.buildParseTree = true;
|
|
|
|
|
|
2023-10-13 11:16:36 +08:00
|
|
|
|
return this._parser;
|
2023-06-16 16:14:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* If it is invoked multiple times in a row and the input parameters is the same,
|
|
|
|
|
* this method returns the parsing result directly for the first time
|
|
|
|
|
* unless the errorListener parameter is passed.
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @param input source string
|
|
|
|
|
* @param errorListener listen errors
|
|
|
|
|
* @returns parserTree
|
|
|
|
|
*/
|
2023-10-13 11:16:36 +08:00
|
|
|
|
public parse(input: string, errorListener?: ErrorHandler<any>) {
|
2023-06-16 16:14:53 +08:00
|
|
|
|
// Avoid parsing the same input repeatedly.
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (this._parsedInput === input && !errorListener) {
|
|
|
|
|
return this._parserTree;
|
2023-06-09 11:22:53 +08:00
|
|
|
|
}
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-16 16:14:53 +08:00
|
|
|
|
const parser = this.createParserWithCache(input);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
this._parsedInput = input;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
|
|
|
|
parser.removeErrorListeners();
|
2023-06-09 11:22:53 +08:00
|
|
|
|
this._errorCollector.clear();
|
|
|
|
|
|
|
|
|
|
parser.addErrorListener(this._errorCollector);
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (errorListener) {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
parser.addErrorListener(new ParserErrorListener(errorListener));
|
|
|
|
|
}
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
this._parserTree = parser.program();
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
return this._parserTree;
|
2020-09-11 17:39:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Validate input string and return syntax errors if exists.
|
2020-09-11 17:39:10 +08:00
|
|
|
|
* @param input source string
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @returns syntax errors
|
2020-09-11 17:39:10 +08:00
|
|
|
|
*/
|
2023-06-09 11:22:53 +08:00
|
|
|
|
public validate(input: string): ParserError[] {
|
|
|
|
|
this.parse(input);
|
|
|
|
|
const lexerError = [];
|
|
|
|
|
return lexerError.concat(this._errorCollector.parserErrors);
|
|
|
|
|
}
|
2020-09-11 17:39:10 +08:00
|
|
|
|
|
|
|
|
|
/**
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* Get all Tokens of input string,'<EOF>' is not included
|
|
|
|
|
* @param input source string
|
2023-05-04 10:13:05 +08:00
|
|
|
|
* @returns Token[]
|
2020-09-11 17:39:10 +08:00
|
|
|
|
*/
|
2023-06-09 11:22:53 +08:00
|
|
|
|
public getAllTokens(input: string): Token[] {
|
|
|
|
|
this.parse(input);
|
|
|
|
|
let allTokens = this._tokenStream.getTokens();
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (allTokens[allTokens.length - 1].text === '<EOF>') {
|
|
|
|
|
allTokens = allTokens.slice(0, -1);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
}
|
2023-10-13 11:16:36 +08:00
|
|
|
|
return allTokens;
|
|
|
|
|
}
|
2020-09-11 17:39:10 +08:00
|
|
|
|
/**
|
|
|
|
|
* It convert tree to string, it's convenient to use in unit test.
|
|
|
|
|
* @param string input
|
|
|
|
|
*/
|
|
|
|
|
public parserTreeToString(input: string): string {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
this.parse(input);
|
|
|
|
|
return this._parserTree.toStringTree(this._parser.ruleNames);
|
2020-09-11 17:39:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get List-like style tree string
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @param parserTree ProgramRuleContext
|
2020-09-11 17:39:10 +08:00
|
|
|
|
*/
|
2023-06-09 11:22:53 +08:00
|
|
|
|
public toString(parserTree: PRC): string {
|
2020-09-11 17:39:10 +08:00
|
|
|
|
return parserTree.toStringTree(this._parser.ruleNames);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param listener Listener instance extends ParserListener
|
|
|
|
|
* @param parserTree parser Tree
|
|
|
|
|
*/
|
2023-10-13 11:16:36 +08:00
|
|
|
|
public listen<PTL extends ParseTreeListener = ParseTreeListener>(
|
|
|
|
|
listener: PTL,
|
|
|
|
|
parserTree: PRC
|
|
|
|
|
) {
|
2020-09-11 17:39:10 +08:00
|
|
|
|
ParseTreeWalker.DEFAULT.walk(listener, parserTree);
|
|
|
|
|
}
|
2023-06-09 11:22:53 +08:00
|
|
|
|
|
|
|
|
|
/**
|
2023-06-16 16:14:53 +08:00
|
|
|
|
* Split input into statements.
|
|
|
|
|
* If exist syntax error it will return null.
|
2023-06-09 11:22:53 +08:00
|
|
|
|
* @param input source string
|
|
|
|
|
*/
|
2023-06-16 16:14:53 +08:00
|
|
|
|
public splitSQLByStatement(input): TextSlice[] {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
this.parse(input);
|
|
|
|
|
const splitListener = this.splitListener;
|
|
|
|
|
this.listen(splitListener, this._parserTree);
|
2023-10-13 11:16:36 +08:00
|
|
|
|
|
|
|
|
|
const res = splitListener.statementsContext.map((context) => {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
const { start, stop } = context;
|
|
|
|
|
return {
|
|
|
|
|
startIndex: start.startIndex,
|
|
|
|
|
endIndex: stop.stopIndex,
|
|
|
|
|
startLine: start.line,
|
|
|
|
|
endLine: stop.line,
|
|
|
|
|
startColumn: start.charPositionInLine + 1,
|
|
|
|
|
endColumn: stop.charPositionInLine + stop.text.length,
|
|
|
|
|
text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
|
2023-10-13 11:16:36 +08:00
|
|
|
|
};
|
|
|
|
|
});
|
2023-06-09 11:22:53 +08:00
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get suggestions of syntax and token at caretPosition
|
|
|
|
|
* @param input source string
|
|
|
|
|
* @param caretPosition caret position, such as cursor position
|
|
|
|
|
* @returns suggestion
|
|
|
|
|
*/
|
2023-10-13 11:16:36 +08:00
|
|
|
|
public getSuggestionAtCaretPosition(
|
|
|
|
|
input: string,
|
|
|
|
|
caretPosition: CaretPosition
|
|
|
|
|
): Suggestions | null {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
const splitListener = this.splitListener;
|
|
|
|
|
// TODO: add splitListener to all sqlParser implements add remove following if
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (!splitListener) return null;
|
2023-06-09 11:22:53 +08:00
|
|
|
|
|
|
|
|
|
this.parse(input);
|
|
|
|
|
let sqlParserIns = this._parser;
|
2023-06-12 15:21:27 +08:00
|
|
|
|
const allTokens = this.getAllTokens(input);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
|
|
|
|
|
let c3Context: ParserRuleContext = this._parserTree;
|
|
|
|
|
let tokenIndexOffset: number = 0;
|
|
|
|
|
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (!caretTokenIndex && caretTokenIndex !== 0) return null;
|
|
|
|
|
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
|
|
|
|
* Split sql by statement.
|
|
|
|
|
* Try to collect candidates from the caret statement only.
|
|
|
|
|
*/
|
|
|
|
|
this.listen(splitListener, this._parserTree);
|
|
|
|
|
|
|
|
|
|
// If there are multiple statements.
|
2023-06-12 15:21:27 +08:00
|
|
|
|
if (splitListener.statementsContext.length > 1) {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
// find statement rule context where caretPosition is located.
|
2023-10-13 11:16:36 +08:00
|
|
|
|
const caretStatementContext = splitListener?.statementsContext.find((ctx) => {
|
|
|
|
|
return (
|
|
|
|
|
caretTokenIndex <= ctx.stop?.tokenIndex &&
|
|
|
|
|
caretTokenIndex >= ctx.start.tokenIndex
|
|
|
|
|
);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
});
|
|
|
|
|
|
2023-10-13 11:16:36 +08:00
|
|
|
|
if (caretStatementContext) {
|
|
|
|
|
c3Context = caretStatementContext;
|
2023-06-09 11:22:53 +08:00
|
|
|
|
} else {
|
2023-10-13 11:16:36 +08:00
|
|
|
|
const lastStatementToken =
|
|
|
|
|
splitListener.statementsContext[splitListener?.statementsContext.length - 1]
|
|
|
|
|
.start;
|
2023-06-09 11:22:53 +08:00
|
|
|
|
/**
|
|
|
|
|
* If caretStatementContext is not found and it follows all statements.
|
|
|
|
|
* Reparses part of the input following the penultimate statement.
|
|
|
|
|
* And c3 will collect candidates in the new parserTreeContext.
|
|
|
|
|
*/
|
|
|
|
|
if (caretTokenIndex > lastStatementToken?.tokenIndex) {
|
|
|
|
|
/**
|
|
|
|
|
* Save offset of the tokenIndex in the partInput
|
2023-10-13 11:16:36 +08:00
|
|
|
|
* compared to the tokenIndex in the whole input
|
|
|
|
|
*/
|
2023-06-12 15:21:27 +08:00
|
|
|
|
tokenIndexOffset = lastStatementToken?.tokenIndex;
|
2023-06-09 11:22:53 +08:00
|
|
|
|
// Correct caretTokenIndex
|
|
|
|
|
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
|
|
|
|
|
|
2023-06-12 15:21:27 +08:00
|
|
|
|
const inputSlice = input.slice(lastStatementToken.startIndex);
|
2023-06-16 16:14:53 +08:00
|
|
|
|
const lexer = this.createLexer(inputSlice);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
const tokenStream = new CommonTokenStream(lexer);
|
|
|
|
|
tokenStream.fill();
|
|
|
|
|
const parser = this.createParserFromTokenStream(tokenStream);
|
2023-06-16 16:14:53 +08:00
|
|
|
|
parser.removeErrorListeners();
|
2023-06-09 11:22:53 +08:00
|
|
|
|
parser.buildParseTree = true;
|
|
|
|
|
sqlParserIns = parser;
|
|
|
|
|
c3Context = parser.program();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const core = new CodeCompletionCore(sqlParserIns);
|
|
|
|
|
core.preferredRules = this.preferredRules;
|
|
|
|
|
|
|
|
|
|
const candidates = core.collectCandidates(caretTokenIndex, c3Context);
|
2023-10-13 11:16:36 +08:00
|
|
|
|
const originalSuggestions = this.processCandidates(
|
|
|
|
|
candidates,
|
|
|
|
|
allTokens,
|
|
|
|
|
caretTokenIndex,
|
|
|
|
|
tokenIndexOffset
|
|
|
|
|
);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
|
2023-10-13 11:16:36 +08:00
|
|
|
|
const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
|
|
|
|
|
(syntaxCtx) => {
|
|
|
|
|
const wordRanges: WordRange[] = syntaxCtx.wordRanges.map((token) => {
|
2023-06-09 11:22:53 +08:00
|
|
|
|
return {
|
|
|
|
|
text: this._parsedInput.slice(token.startIndex, token.stopIndex + 1),
|
|
|
|
|
startIndex: token.startIndex,
|
|
|
|
|
stopIndex: token.stopIndex,
|
|
|
|
|
line: token.line,
|
|
|
|
|
startColumn: token.charPositionInLine + 1,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
stopColumn: token.charPositionInLine + token.text.length,
|
|
|
|
|
};
|
|
|
|
|
});
|
2023-06-09 11:22:53 +08:00
|
|
|
|
return {
|
|
|
|
|
syntaxContextType: syntaxCtx.syntaxContextType,
|
|
|
|
|
wordRanges,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-06-09 11:22:53 +08:00
|
|
|
|
return {
|
|
|
|
|
syntax: syntaxSuggestions,
|
2023-10-13 11:16:36 +08:00
|
|
|
|
keywords: originalSuggestions.keywords,
|
|
|
|
|
};
|
2023-06-09 11:22:53 +08:00
|
|
|
|
}
|
2020-09-11 17:39:10 +08:00
|
|
|
|
}
|