feat: collect entity (#265)

* feat: add text and word utils

* feat: add entity collector class

* refactor: rename SyntaxContextType to EntityContextType

* refactor: improve EntityCollector

* feat: improve mysql parser grammar

* feat: add mysql entity collector

* test: mysql entity collector tests

* feat: remove useless method

* feat: improve spark grammar file

* feat: add spark entity collector

* test: spark entity collector unit tests

* feat: remove useless code

* feat: add queryStatement label

* feat: add crateDatabaseStmt

* feat: add trino entity collector

* feat: rename trinosql to trino

* test: trino collect entity unit tests

* test: fix spark test

* feat(impala): support impale entity collector (#256)

* Feat/collect entity hive (#263)

* feat(hive): support hive collect entity

* feat(hive): update tableAllColumns

* feat: replace antlr4ts with antlr4ng

* feat(pgsql): pgsql collect entity (#268)

* feat(pgsql): pgsql collect entity

* feat(pgsql): optimize some name

---------

Co-authored-by: zhaoge <>

* feat: get word text by token.text

* feat: supprt collect db/function and add splitListener (#270)

* feat: supprt collect db/function and add splitListner

* feat: remove SplitListener interface in baseParser to use SplitListener in root

* fix(mysql): fix show create xxx not celloct as createXXXEntity type

* test: fix pgsql unit tests

* Feat/error recover predicate (#274)

* feat: optimize pgsql grammar

* feat: add sql parser base

* feat: apply SQLParserBase

* feat: add geAllEntities method

* test: test collect table when missing column

* feat: compose collect and suggestion (#276)

* feat: mark stmt which contain caret

* test: correct name of getAllEntities

* test: remove misscolumn unit tests

* test: add suggestionWithEntity tests

* feat: flink collect entity (#277)

* feat: improve flink sql parser

* feat: support flink entity collector

* test: flink entity collect unit test

* feat: move combine entities to parent class

---------

Co-authored-by: 霜序 <976060700@qq.com>
Co-authored-by: XCynthia <942884029@qq.com>
This commit is contained in:
Hayden
2024-03-26 14:28:27 +08:00
committed by GitHub
parent 3f62ad0d32
commit a99721162b
230 changed files with 56908 additions and 46672 deletions

133
src/parser/spark/index.ts Normal file
View File

@ -0,0 +1,133 @@
import { Token } from 'antlr4ng';
import { CandidatesCollection } from 'antlr4-c3';
import { SparkSqlLexer } from '../../lib/spark/SparkSqlLexer';
import { SparkSqlParser, ProgramContext } from '../../lib/spark/SparkSqlParser';
import BasicParser from '../common/basicParser';
import { Suggestions, EntityContextType, SyntaxSuggestion } from '../common/basic-parser-types';
import { StmtContextType } from '../common/entityCollector';
import SparkSqlSplitListener from './sparkSplitListener';
import SparkEntityCollector from './sparkEntityCollector';
export { SparkSqlSplitListener, SparkEntityCollector };
export default class SparkSQL extends BasicParser<SparkSqlLexer, ProgramContext, SparkSqlParser> {
protected createLexerFromCharStream(charStreams) {
const lexer = new SparkSqlLexer(charStreams);
return lexer;
}
protected createParserFromTokenStream(tokenStream) {
const parser = new SparkSqlParser(tokenStream);
return parser;
}
protected preferredRules: Set<number> = new Set([
SparkSqlParser.RULE_namespaceName,
SparkSqlParser.RULE_namespaceNameCreate,
SparkSqlParser.RULE_tableName,
SparkSqlParser.RULE_tableNameCreate,
SparkSqlParser.RULE_viewName,
SparkSqlParser.RULE_viewNameCreate,
SparkSqlParser.RULE_functionName,
SparkSqlParser.RULE_functionNameCreate,
SparkSqlParser.RULE_columnName,
SparkSqlParser.RULE_columnNameCreate,
]);
protected get splitListener() {
return new SparkSqlSplitListener();
}
protected createEntityCollector(input: string, caretTokenIndex?: number) {
return new SparkEntityCollector(input, caretTokenIndex);
}
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];
for (const candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
let syntaxContextType: EntityContextType | StmtContextType;
switch (ruleType) {
case SparkSqlParser.RULE_namespaceName: {
syntaxContextType = EntityContextType.DATABASE;
break;
}
case SparkSqlParser.RULE_namespaceNameCreate: {
syntaxContextType = EntityContextType.DATABASE_CREATE;
break;
}
case SparkSqlParser.RULE_tableName: {
syntaxContextType = EntityContextType.TABLE;
break;
}
case SparkSqlParser.RULE_tableNameCreate: {
syntaxContextType = EntityContextType.TABLE_CREATE;
break;
}
case SparkSqlParser.RULE_viewName: {
syntaxContextType = EntityContextType.VIEW;
break;
}
case SparkSqlParser.RULE_viewNameCreate: {
syntaxContextType = EntityContextType.VIEW_CREATE;
break;
}
case SparkSqlParser.RULE_functionName: {
syntaxContextType = EntityContextType.FUNCTION;
break;
}
case SparkSqlParser.RULE_functionNameCreate: {
syntaxContextType = EntityContextType.FUNCTION_CREATE;
break;
}
case SparkSqlParser.RULE_columnName: {
syntaxContextType = EntityContextType.COLUMN;
break;
}
case SparkSqlParser.RULE_columnNameCreate: {
syntaxContextType = EntityContextType.COLUMN_CREATE;
break;
}
default:
break;
}
if (syntaxContextType) {
originalSyntaxSuggestions.push({
syntaxContextType,
wordRanges: tokenRanges,
});
}
}
for (const candidate of candidates.tokens) {
const symbolicName = this._parser.vocabulary.getSymbolicName(candidate[0]);
const displayName = this._parser.vocabulary.getDisplayName(candidate[0]);
if (symbolicName && symbolicName.startsWith('KW_')) {
const keyword =
displayName.startsWith("'") && displayName.endsWith("'")
? displayName.slice(1, -1)
: displayName;
keywords.push(keyword);
}
}
return {
syntax: originalSyntaxSuggestions,
keywords,
};
}
}

View File

@ -0,0 +1,151 @@
import type {
NamespaceNameContext,
NamespaceNameCreateContext,
SingleStatementContext,
TableNameContext,
TableNameCreateContext,
ViewNameContext,
ViewNameCreateContext,
FunctionNameCreateContext,
ColumnNameCreateContext,
CreateTableContext,
CreateTableLikeContext,
ReplaceTableContext,
QueryStatementContext,
InsertFromQueryContext,
MultipleInsertContext,
CreateViewContext,
CreateTempViewUsingContext,
CreateNamespaceContext,
CreateFunctionContext,
} from '../../lib/spark/SparkSqlParser';
import type { SparkSqlParserListener } from '../../lib/spark/SparkSqlParserListener';
import { EntityContextType } from '../common/basic-parser-types';
import EntityCollector, { StmtContextType } from '../common/entityCollector';
export default class SparkEntityCollector
extends EntityCollector
implements SparkSqlParserListener
{
/** ====== Entity Begin */
exitNamespaceName(ctx: NamespaceNameContext) {
this.pushEntity(ctx, EntityContextType.DATABASE);
}
exitNamespaceNameCreate(ctx: NamespaceNameCreateContext) {
this.pushEntity(ctx, EntityContextType.DATABASE_CREATE);
}
exitTableName(ctx: TableNameContext) {
this.pushEntity(ctx, EntityContextType.TABLE);
}
exitTableNameCreate(ctx: TableNameCreateContext) {
this.pushEntity(ctx, EntityContextType.TABLE_CREATE);
}
exitViewName(ctx: ViewNameContext) {
this.pushEntity(ctx, EntityContextType.VIEW);
}
exitViewNameCreate(ctx: ViewNameCreateContext) {
this.pushEntity(ctx, EntityContextType.VIEW_CREATE);
}
exitFunctionNameCreate(ctx: FunctionNameCreateContext) {
this.pushEntity(ctx, EntityContextType.FUNCTION_CREATE);
}
exitColumnNameCreate(ctx: ColumnNameCreateContext) {
this.pushEntity(ctx, EntityContextType.COLUMN_CREATE);
}
/** ===== Statement begin */
enterSingleStatement(ctx: SingleStatementContext) {
this.pushStmt(ctx, StmtContextType.COMMON_STMT);
}
exitSingleStatement(ctx: SingleStatementContext) {
this.popStmt();
}
enterCreateTable(ctx: CreateTableContext) {
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
}
exitCreateTable(ctx: CreateTableContext) {
this.popStmt();
}
enterCreateTableLike(ctx: CreateTableLikeContext) {
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
}
exitCreateTableLike(ctx: CreateTableLikeContext) {
this.popStmt();
}
enterReplaceTable(ctx: ReplaceTableContext) {
this.pushStmt(ctx, StmtContextType.CREATE_TABLE_STMT);
}
exitReplaceTable(ctx: ReplaceTableContext) {
this.popStmt();
}
enterCreateView(ctx: CreateViewContext) {
this.pushStmt(ctx, StmtContextType.CREATE_VIEW_STMT);
}
exitCreateView(ctx: CreateViewContext) {
this.popStmt();
}
enterCreateTempViewUsing(ctx: CreateTempViewUsingContext) {
this.pushStmt(ctx, StmtContextType.CREATE_VIEW_STMT);
}
exitCreateTempViewUsing(ctx: CreateTempViewUsingContext) {
this.popStmt();
}
enterQueryStatement(ctx: QueryStatementContext) {
this.pushStmt(ctx, StmtContextType.SELECT_STMT);
}
exitQueryStatement(ctx: QueryStatementContext) {
this.popStmt();
}
enterInsertFromQuery(ctx: InsertFromQueryContext) {
this.pushStmt(ctx, StmtContextType.INSERT_STMT);
}
exitInsertFromQuery(ctx: InsertFromQueryContext) {
this.popStmt();
}
enterMultipleInsert(ctx: MultipleInsertContext) {
this.pushStmt(ctx, StmtContextType.INSERT_STMT);
}
exitMultipleInsert(ctx: MultipleInsertContext) {
this.popStmt();
}
enterCreateNamespace(ctx: CreateNamespaceContext) {
this.pushStmt(ctx, StmtContextType.CREATE_DATABASE_STMT);
}
exitCreateNamespace(ctx: CreateNamespaceContext) {
this.popStmt();
}
enterCreateFunction(ctx: CreateFunctionContext) {
this.pushStmt(ctx, StmtContextType.CREATE_FUNCTION_STMT);
}
exitCreateFunction(ctx: CreateFunctionContext) {
this.popStmt();
}
}

View File

@ -0,0 +1,13 @@
import { SingleStatementContext } from '../../lib/spark/SparkSqlParser';
import { SparkSqlParserListener } from '../../lib/spark/SparkSqlParserListener';
import SplitListener from '../common/splitListener';
export default class SparkSqlSplitListener
extends SplitListener<SingleStatementContext>
implements SparkSqlParserListener
{
exitSingleStatement = (ctx: SingleStatementContext) => {
this._statementsContext.push(ctx);
};
}