feat: collect entity (#265)

* feat: add text and word utils

* feat: add entity collector class

* refactor: rename SyntaxContextType to EntityContextType

* refactor: improve EntityCollector

* feat: improve mysql parser grammar

* feat: add mysql entity collector

* test: mysql entity collector tests

* feat: remove useless method

* feat: improve spark grammar file

* feat: add spark entity collector

* test: spark entity collector unit tests

* feat: remove useless code

* feat: add queryStatement label

* feat: add crateDatabaseStmt

* feat: add trino entity collector

* feat: rename trinosql to trino

* test: trino collect entity unit tests

* test: fix spark test

* feat(impala): support impale entity collector (#256)

* Feat/collect entity hive (#263)

* feat(hive): support hive collect entity

* feat(hive): update tableAllColumns

* feat: replace antlr4ts with antlr4ng

* feat(pgsql): pgsql collect entity (#268)

* feat(pgsql): pgsql collect entity

* feat(pgsql): optimize some name

---------

Co-authored-by: zhaoge <>

* feat: get word text by token.text

* feat: supprt collect db/function and add splitListener (#270)

* feat: supprt collect db/function and add splitListner

* feat: remove SplitListener interface in baseParser to use SplitListener in root

* fix(mysql): fix show create xxx not celloct as createXXXEntity type

* test: fix pgsql unit tests

* Feat/error recover predicate (#274)

* feat: optimize pgsql grammar

* feat: add sql parser base

* feat: apply SQLParserBase

* feat: add geAllEntities method

* test: test collect table when missing column

* feat: compose collect and suggestion (#276)

* feat: mark stmt which contain caret

* test: correct name of getAllEntities

* test: remove misscolumn unit tests

* test: add suggestionWithEntity tests

* feat: flink collect entity (#277)

* feat: improve flink sql parser

* feat: support flink entity collector

* test: flink entity collect unit test

* feat: move combine entities to parent class

---------

Co-authored-by: 霜序 <976060700@qq.com>
Co-authored-by: XCynthia <942884029@qq.com>
This commit is contained in:
Hayden
2024-03-26 14:28:27 +08:00
committed by GitHub
parent 3f62ad0d32
commit a99721162b
230 changed files with 56908 additions and 46672 deletions

View File

@ -0,0 +1,11 @@
SELECT FROM my_db.tb;
SELECT name, calculate_age(birthdate) AS age, FROM students;
INSERT INTO insert_tb SELECT FROM from_tb;
INSERT INTO insert_tb SELECT id, age, FROM from_tb;
CREATE TABLE sorted_census_data AS SELECT FROM unsorted_census_data;
CREATE TABLE sorted_census_data AS SELECT id, age, FROM unsorted_census_data;

View File

@ -47,3 +47,11 @@ SELECT id, n FROM tbl GROUP BY ;
SELECT id, n FROM tbl ORDER BY name, i ;
SELECT id FROM tb1 GROUP BY ROLLUP( );
SHOW CREATE FUNCTION func_name;
SHOW CREATE TABLE tbl_name;
SHOW CREATE DATABASE IF NOT EXISTS db_name;
SHOW CREATE VIEW test.v;

View File

@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import MySQL from 'src/parser/mysql';
import { CaretPosition, SyntaxContextType } from 'src/parser/common/basic-parser-types';
import { CaretPosition, EntityContextType } from 'src/parser/common/basic-parser-types';
const syntaxSql = fs.readFileSync(
path.join(__dirname, 'fixtures', 'multipleStatement.sql'),
@ -18,7 +18,7 @@ describe('MySQL Multiple Statements Syntax Suggestion', () => {
};
const syntaxes = parser.getSuggestionAtCaretPosition(syntaxSql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
@ -32,7 +32,7 @@ describe('MySQL Multiple Statements Syntax Suggestion', () => {
};
const syntaxes = parser.getSuggestionAtCaretPosition(syntaxSql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE_CREATE
(syn) => syn.syntaxContextType === EntityContextType.TABLE_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -46,7 +46,7 @@ describe('MySQL Multiple Statements Syntax Suggestion', () => {
};
const syntaxes = parser.getSuggestionAtCaretPosition(syntaxSql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
@ -60,7 +60,7 @@ describe('MySQL Multiple Statements Syntax Suggestion', () => {
};
const syntaxes = parser.getSuggestionAtCaretPosition(syntaxSql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();

View File

@ -0,0 +1,156 @@
import fs from 'fs';
import path from 'path';
import MySQL from 'src/parser/mysql';
import { CaretPosition, EntityContextType } from 'src/parser/common/basic-parser-types';
import { commentOtherLine } from 'test/helper';
const syntaxSql = fs.readFileSync(
path.join(__dirname, 'fixtures', 'suggestionWithEntity.sql'),
'utf-8'
);
describe('MySQL Syntax Suggestion with collect entity', () => {
const mysql = new MySQL();
test('select with no columns', () => {
const pos: CaretPosition = {
lineNumber: 1,
column: 8,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(1);
expect(entities[0].text).toBe('my_db.tb');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
});
test('select with columns with trailing comma', () => {
const pos: CaretPosition = {
lineNumber: 3,
column: 47,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(1);
expect(entities[0].text).toBe('students');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
});
test('insert into table as select with no column', () => {
const pos: CaretPosition = {
lineNumber: 5,
column: 30,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(2);
expect(entities[0].text).toBe('insert_tb');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
expect(entities[1].text).toBe('from_tb');
expect(entities[1].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[1].belongStmt.isContainCaret).toBeTruthy();
});
test('insert into table as select with trailing comma', () => {
const pos: CaretPosition = {
lineNumber: 7,
column: 39,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(2);
expect(entities[0].text).toBe('insert_tb');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
expect(entities[1].text).toBe('from_tb');
expect(entities[1].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[1].belongStmt.isContainCaret).toBeTruthy();
});
test('create table as select with no column', () => {
const pos: CaretPosition = {
lineNumber: 9,
column: 43,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(2);
expect(entities[0].text).toBe('sorted_census_data');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE_CREATE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
expect(entities[1].text).toBe('unsorted_census_data');
expect(entities[1].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[1].belongStmt.isContainCaret).toBeTruthy();
});
test('create table as select with trailing comma', () => {
const pos: CaretPosition = {
lineNumber: 11,
column: 52,
};
const sql = commentOtherLine(syntaxSql, pos.lineNumber);
const syntaxes = mysql.getSuggestionAtCaretPosition(sql, pos)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
const entities = mysql.getAllEntities(sql, pos);
expect(entities.length).toBe(2);
expect(entities[0].text).toBe('sorted_census_data');
expect(entities[0].entityContextType).toBe(EntityContextType.TABLE_CREATE);
expect(entities[0].belongStmt.isContainCaret).toBeTruthy();
expect(entities[1].text).toBe('unsorted_census_data');
expect(entities[1].entityContextType).toBe(EntityContextType.TABLE);
expect(entities[1].belongStmt.isContainCaret).toBeTruthy();
});
});

View File

@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import MySQL from 'src/parser/mysql';
import { SyntaxContextType, CaretPosition } from 'src/parser/common/basic-parser-types';
import { EntityContextType, CaretPosition } from 'src/parser/common/basic-parser-types';
import { commentOtherLine } from 'test/helper';
const syntaxSql = fs.readFileSync(
@ -26,7 +26,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
@ -43,7 +43,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
@ -60,7 +60,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE_CREATE
(syn) => syn.syntaxContextType === EntityContextType.TABLE_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -77,7 +77,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
@ -94,7 +94,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW_CREATE
(syn) => syn.syntaxContextType === EntityContextType.VIEW_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -111,7 +111,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
(syn) => syn.syntaxContextType === EntityContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
@ -128,7 +128,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.FUNCTION_CREATE
(syn) => syn.syntaxContextType === EntityContextType.FUNCTION_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -145,7 +145,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.FUNCTION
(syn) => syn.syntaxContextType === EntityContextType.FUNCTION
);
expect(suggestion).not.toBeUndefined();
@ -162,7 +162,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.DATABASE_CREATE
(syn) => syn.syntaxContextType === EntityContextType.DATABASE_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -179,7 +179,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.DATABASE
(syn) => syn.syntaxContextType === EntityContextType.DATABASE
);
expect(suggestion).not.toBeUndefined();
@ -196,7 +196,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -213,7 +213,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -230,7 +230,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -247,7 +247,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN_CREATE
(syn) => syn.syntaxContextType === EntityContextType.COLUMN_CREATE
);
expect(suggestion).not.toBeUndefined();
@ -264,7 +264,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -281,7 +281,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -298,7 +298,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -315,7 +315,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -332,7 +332,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -349,7 +349,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -366,7 +366,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -383,7 +383,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -400,7 +400,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -417,7 +417,7 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
@ -434,10 +434,78 @@ describe('MySQL Syntax Suggestion', () => {
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
(syn) => syn.syntaxContextType === EntityContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
});
test('show create function', () => {
const pos: CaretPosition = {
lineNumber: 51,
column: 31,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.FUNCTION
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['func_name']);
});
test('show create table', () => {
const pos: CaretPosition = {
lineNumber: 53,
column: 27,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['tbl_name']);
});
test('show create dbName', () => {
const pos: CaretPosition = {
lineNumber: 55,
column: 43,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.DATABASE
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db_name']);
});
test('show create view', () => {
const pos: CaretPosition = {
lineNumber: 57,
column: 24,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === EntityContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['test', '.', 'v']);
});
});