feat: collect entity (#265)
* feat: add text and word utils * feat: add entity collector class * refactor: rename SyntaxContextType to EntityContextType * refactor: improve EntityCollector * feat: improve mysql parser grammar * feat: add mysql entity collector * test: mysql entity collector tests * feat: remove useless method * feat: improve spark grammar file * feat: add spark entity collector * test: spark entity collector unit tests * feat: remove useless code * feat: add queryStatement label * feat: add crateDatabaseStmt * feat: add trino entity collector * feat: rename trinosql to trino * test: trino collect entity unit tests * test: fix spark test * feat(impala): support impale entity collector (#256) * Feat/collect entity hive (#263) * feat(hive): support hive collect entity * feat(hive): update tableAllColumns * feat: replace antlr4ts with antlr4ng * feat(pgsql): pgsql collect entity (#268) * feat(pgsql): pgsql collect entity * feat(pgsql): optimize some name --------- Co-authored-by: zhaoge <> * feat: get word text by token.text * feat: supprt collect db/function and add splitListener (#270) * feat: supprt collect db/function and add splitListner * feat: remove SplitListener interface in baseParser to use SplitListener in root * fix(mysql): fix show create xxx not celloct as createXXXEntity type * test: fix pgsql unit tests * Feat/error recover predicate (#274) * feat: optimize pgsql grammar * feat: add sql parser base * feat: apply SQLParserBase * feat: add geAllEntities method * test: test collect table when missing column * feat: compose collect and suggestion (#276) * feat: mark stmt which contain caret * test: correct name of getAllEntities * test: remove misscolumn unit tests * test: add suggestionWithEntity tests * feat: flink collect entity (#277) * feat: improve flink sql parser * feat: support flink entity collector * test: flink entity collect unit test * feat: move combine entities to parent class --------- Co-authored-by: 霜序 <976060700@qq.com> Co-authored-by: XCynthia <942884029@qq.com>
This commit is contained in:
133
src/parser/spark/index.ts
Normal file
133
src/parser/spark/index.ts
Normal file
@ -0,0 +1,133 @@
|
||||
import { Token } from 'antlr4ng';
|
||||
import { CandidatesCollection } from 'antlr4-c3';
|
||||
import { SparkSqlLexer } from '../../lib/spark/SparkSqlLexer';
|
||||
import { SparkSqlParser, ProgramContext } from '../../lib/spark/SparkSqlParser';
|
||||
import BasicParser from '../common/basicParser';
|
||||
import { Suggestions, EntityContextType, SyntaxSuggestion } from '../common/basic-parser-types';
|
||||
import { StmtContextType } from '../common/entityCollector';
|
||||
import SparkSqlSplitListener from './sparkSplitListener';
|
||||
import SparkEntityCollector from './sparkEntityCollector';
|
||||
|
||||
export { SparkSqlSplitListener, SparkEntityCollector };
|
||||
|
||||
export default class SparkSQL extends BasicParser<SparkSqlLexer, ProgramContext, SparkSqlParser> {
|
||||
protected createLexerFromCharStream(charStreams) {
|
||||
const lexer = new SparkSqlLexer(charStreams);
|
||||
return lexer;
|
||||
}
|
||||
|
||||
protected createParserFromTokenStream(tokenStream) {
|
||||
const parser = new SparkSqlParser(tokenStream);
|
||||
return parser;
|
||||
}
|
||||
|
||||
protected preferredRules: Set<number> = new Set([
|
||||
SparkSqlParser.RULE_namespaceName,
|
||||
SparkSqlParser.RULE_namespaceNameCreate,
|
||||
SparkSqlParser.RULE_tableName,
|
||||
SparkSqlParser.RULE_tableNameCreate,
|
||||
SparkSqlParser.RULE_viewName,
|
||||
SparkSqlParser.RULE_viewNameCreate,
|
||||
SparkSqlParser.RULE_functionName,
|
||||
SparkSqlParser.RULE_functionNameCreate,
|
||||
SparkSqlParser.RULE_columnName,
|
||||
SparkSqlParser.RULE_columnNameCreate,
|
||||
]);
|
||||
|
||||
protected get splitListener() {
|
||||
return new SparkSqlSplitListener();
|
||||
}
|
||||
|
||||
protected createEntityCollector(input: string, caretTokenIndex?: number) {
|
||||
return new SparkEntityCollector(input, caretTokenIndex);
|
||||
}
|
||||
|
||||
protected processCandidates(
|
||||
candidates: CandidatesCollection,
|
||||
allTokens: Token[],
|
||||
caretTokenIndex: number,
|
||||
tokenIndexOffset: number
|
||||
): Suggestions<Token> {
|
||||
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
|
||||
const keywords: string[] = [];
|
||||
|
||||
for (const candidate of candidates.rules) {
|
||||
const [ruleType, candidateRule] = candidate;
|
||||
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
|
||||
const tokenRanges = allTokens.slice(
|
||||
startTokenIndex,
|
||||
caretTokenIndex + tokenIndexOffset + 1
|
||||
);
|
||||
|
||||
let syntaxContextType: EntityContextType | StmtContextType;
|
||||
switch (ruleType) {
|
||||
case SparkSqlParser.RULE_namespaceName: {
|
||||
syntaxContextType = EntityContextType.DATABASE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_namespaceNameCreate: {
|
||||
syntaxContextType = EntityContextType.DATABASE_CREATE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_tableName: {
|
||||
syntaxContextType = EntityContextType.TABLE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_tableNameCreate: {
|
||||
syntaxContextType = EntityContextType.TABLE_CREATE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_viewName: {
|
||||
syntaxContextType = EntityContextType.VIEW;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_viewNameCreate: {
|
||||
syntaxContextType = EntityContextType.VIEW_CREATE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_functionName: {
|
||||
syntaxContextType = EntityContextType.FUNCTION;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_functionNameCreate: {
|
||||
syntaxContextType = EntityContextType.FUNCTION_CREATE;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_columnName: {
|
||||
syntaxContextType = EntityContextType.COLUMN;
|
||||
break;
|
||||
}
|
||||
case SparkSqlParser.RULE_columnNameCreate: {
|
||||
syntaxContextType = EntityContextType.COLUMN_CREATE;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (syntaxContextType) {
|
||||
originalSyntaxSuggestions.push({
|
||||
syntaxContextType,
|
||||
wordRanges: tokenRanges,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const candidate of candidates.tokens) {
|
||||
const symbolicName = this._parser.vocabulary.getSymbolicName(candidate[0]);
|
||||
const displayName = this._parser.vocabulary.getDisplayName(candidate[0]);
|
||||
if (symbolicName && symbolicName.startsWith('KW_')) {
|
||||
const keyword =
|
||||
displayName.startsWith("'") && displayName.endsWith("'")
|
||||
? displayName.slice(1, -1)
|
||||
: displayName;
|
||||
keywords.push(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
syntax: originalSyntaxSuggestions,
|
||||
keywords,
|
||||
};
|
||||
}
|
||||
}
|
151
src/parser/spark/sparkEntityCollector.ts
Normal file
151
src/parser/spark/sparkEntityCollector.ts
Normal file
@ -0,0 +1,151 @@
|
||||
import type {
|
||||
NamespaceNameContext,
|
||||
NamespaceNameCreateContext,
|
||||
SingleStatementContext,
|
||||
TableNameContext,
|
||||
TableNameCreateContext,
|
||||
ViewNameContext,
|
||||
ViewNameCreateContext,
|
||||
FunctionNameCreateContext,
|
||||
ColumnNameCreateContext,
|
||||
CreateTableContext,
|
||||
CreateTableLikeContext,
|
||||
ReplaceTableContext,
|
||||
QueryStatementContext,
|
||||
InsertFromQueryContext,
|
||||
MultipleInsertContext,
|
||||
CreateViewContext,
|
||||
CreateTempViewUsingContext,
|
||||
CreateNamespaceContext,
|
||||
CreateFunctionContext,
|
||||
} from '../../lib/spark/SparkSqlParser';
|
||||
import type { SparkSqlParserListener } from '../../lib/spark/SparkSqlParserListener';
|
||||
import { EntityContextType } from '../common/basic-parser-types';
|
||||
import EntityCollector, { StmtContextType } from '../common/entityCollector';
|
||||
|
||||
export default class SparkEntityCollector
|
||||
extends EntityCollector
|
||||
implements SparkSqlParserListener
|
||||
{
|
||||
/** ====== Entity Begin */
|
||||
exitNamespaceName(ctx: NamespaceNameContext) {
|
||||
this.pushEntity(ctx, EntityContextType.DATABASE);
|
||||
}
|
||||
|
||||
exitNamespaceNameCreate(ctx: NamespaceNameCreateContext) {
|
||||
this.pushEntity(ctx, EntityContextType.DATABASE_CREATE);
|
||||
}
|
||||
|
||||
exitTableName(ctx: TableNameContext) {
|
||||
this.pushEntity(ctx, EntityContextType.TABLE);
|
||||
}
|
||||
|
||||
exitTableNameCreate(ctx: TableNameCreateContext) {
|
||||
this.pushEntity(ctx, EntityContextType.TABLE_CREATE);
|
||||
}
|
||||
|
||||
exitViewName(ctx: ViewNameContext) {
|
||||
this.pushEntity(ctx, EntityContextType.VIEW);
|
||||
}
|
||||
|
||||
exitViewNameCreate(ctx: ViewNameCreateContext) {
|
||||
this.pushEntity(ctx, EntityContextType.VIEW_CREATE);
|
||||
}
|
||||
|
||||
exitFunctionNameCreate(ctx: FunctionNameCreateContext) {
|
||||
this.pushEntity(ctx, EntityContextType.FUNCTION_CREATE);
|
||||
}
|
||||
|
||||
exitColumnNameCreate(ctx: ColumnNameCreateContext) {
|
||||
this.pushEntity(ctx, EntityContextType.COLUMN_CREATE);
|
||||
}
|
||||
|
||||
/** ===== Statement begin */
|
||||
enterSingleStatement(ctx: SingleStatementContext) {
|
||||
this.pushStmt(ctx, StmtContextType.COMMON_STMT);
|
||||
}
|
||||
|
||||
exitSingleStatement(ctx: SingleStatementContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateTable(ctx: CreateTableContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
|
||||
}
|
||||
|
||||
exitCreateTable(ctx: CreateTableContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateTableLike(ctx: CreateTableLikeContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
|
||||
}
|
||||
|
||||
exitCreateTableLike(ctx: CreateTableLikeContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterReplaceTable(ctx: ReplaceTableContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
|
||||
}
|
||||
|
||||
exitReplaceTable(ctx: ReplaceTableContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateView(ctx: CreateViewContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_VIEW_STMT);
|
||||
}
|
||||
|
||||
exitCreateView(ctx: CreateViewContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateTempViewUsing(ctx: CreateTempViewUsingContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_VIEW_STMT);
|
||||
}
|
||||
|
||||
exitCreateTempViewUsing(ctx: CreateTempViewUsingContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterQueryStatement(ctx: QueryStatementContext) {
|
||||
this.pushStmt(ctx, StmtContextType.SELECT_STMT);
|
||||
}
|
||||
|
||||
exitQueryStatement(ctx: QueryStatementContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterInsertFromQuery(ctx: InsertFromQueryContext) {
|
||||
this.pushStmt(ctx, StmtContextType.INSERT_STMT);
|
||||
}
|
||||
|
||||
exitInsertFromQuery(ctx: InsertFromQueryContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterMultipleInsert(ctx: MultipleInsertContext) {
|
||||
this.pushStmt(ctx, StmtContextType.INSERT_STMT);
|
||||
}
|
||||
|
||||
exitMultipleInsert(ctx: MultipleInsertContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateNamespace(ctx: CreateNamespaceContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_DATABASE_STMT);
|
||||
}
|
||||
|
||||
exitCreateNamespace(ctx: CreateNamespaceContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
|
||||
enterCreateFunction(ctx: CreateFunctionContext) {
|
||||
this.pushStmt(ctx, StmtContextType.CREATE_FUNCTION_STMT);
|
||||
}
|
||||
|
||||
exitCreateFunction(ctx: CreateFunctionContext) {
|
||||
this.popStmt();
|
||||
}
|
||||
}
|
13
src/parser/spark/sparkSplitListener.ts
Normal file
13
src/parser/spark/sparkSplitListener.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { SingleStatementContext } from '../../lib/spark/SparkSqlParser';
|
||||
|
||||
import { SparkSqlParserListener } from '../../lib/spark/SparkSqlParserListener';
|
||||
import SplitListener from '../common/splitListener';
|
||||
|
||||
export default class SparkSqlSplitListener
|
||||
extends SplitListener<SingleStatementContext>
|
||||
implements SparkSqlParserListener
|
||||
{
|
||||
exitSingleStatement = (ctx: SingleStatementContext) => {
|
||||
this._statementsContext.push(ctx);
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user