test: hive complete data types' check and unit tests (#160)

* feat: add Authorization SQL and update syntax file

* test: hive complete data types' check and unit tests

* feat: hive add syntax complete automaticlly

* feat: update hive's syntax complete

feat:update hive's syntax complete

---------

Co-authored-by: zhaoge <>
This commit is contained in:
XCynthia 2023-10-08 14:14:03 +08:00 committed by GitHub
parent b8c47d0ac9
commit 0a9a7d15d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 162 additions and 40 deletions

View File

@ -22,7 +22,15 @@ export enum SyntaxContextType {
/** table name path, such as catalog.db.tb */ /** table name path, such as catalog.db.tb */
TABLE = 'table', TABLE = 'table',
/** table name path will be created */ /** table name path will be created */
TABLE_CREATE = 'tableCreate' TABLE_CREATE = 'tableCreate',
/** view name */
VIEW = 'view',
/** function name */
FUNCTION = 'function',
/** principal name */
PRINCIPAL = 'principal',
/** hint arg name */
HTNTARG = 'hintArg',
} }
export interface WordRange { export interface WordRange {

View File

@ -1,9 +1,10 @@
import { Token } from 'antlr4ts'; import { Token } from 'antlr4ts';
import { CandidatesCollection } from 'antlr4-c3'; import { CandidatesCollection } from 'antlr4-c3';
import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer'; import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer';
import { HiveSqlParser, ProgramContext } from '../lib/hive/HiveSqlParser'; import { HiveSqlParser, ProgramContext, StatementContext, ExplainStatementContext, ExecStatementContext } from '../lib/hive/HiveSqlParser';
import BasicParser from './common/basicParser'; import BasicParser from './common/basicParser';
import { Suggestions } from './common/basic-parser-types'; import { HiveSqlParserListener } from '../lib/hive/HiveSqlParserListener';
import { SyntaxContextType, Suggestions, SyntaxSuggestion } from './common/basic-parser-types';
export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, HiveSqlParser> { export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, HiveSqlParser> {
@ -16,21 +17,93 @@ export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, H
return new HiveSqlParser(tokenStream); return new HiveSqlParser(tokenStream);
} }
protected preferredRules: Set<number> = new Set([
HiveSqlParser.RULE_tableName, // table name
HiveSqlParser.RULE_viewName, // view name
HiveSqlParser.RULE_functionIdentifier, // function name
HiveSqlParser.RULE_principalIdentifier, // USER/ROLE/GROUP name
HiveSqlParser.RULE_hintArgName, // hint name
]);
protected get splitListener () { protected get splitListener () {
return null as any; return new HiveSqlSplitListener();
} }
protected preferredRules: Set<number> = new Set();
protected processCandidates( protected processCandidates(
candidates: CandidatesCollection, candidates: CandidatesCollection,
allTokens: Token[], allTokens: Token[],
caretTokenIndex: number caretTokenIndex: number,
tokenIndexOffset: number,
): Suggestions<Token> { ): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];
for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + tokenIndexOffset + 1);
let syntaxContextType: SyntaxContextType;
switch (ruleType) {
case HiveSqlParser.RULE_tableName: {
syntaxContextType = SyntaxContextType.TABLE;
break;
}
case HiveSqlParser.RULE_viewName: {
syntaxContextType = SyntaxContextType.VIEW;
break;
}
case HiveSqlParser.RULE_functionIdentifier: {
syntaxContextType = SyntaxContextType.FUNCTION;
break;
}
case HiveSqlParser.RULE_principalIdentifier: {
syntaxContextType = SyntaxContextType.PRINCIPAL;
break;
}
case HiveSqlParser.RULE_hintArgName: {
syntaxContextType = SyntaxContextType.HTNTARG;
break;
}
default:
break;
}
if (syntaxContextType) {
originalSyntaxSuggestions.push({
syntaxContextType,
wordRanges: tokenRanges,
});
}
}
for (let candidate of candidates.tokens) {
const symbolicName = this._parser.vocabulary.getSymbolicName(candidate[0]);
const displayName = this._parser.vocabulary.getDisplayName(candidate[0]);
if (symbolicName && symbolicName.startsWith('KW_')) {
const keyword = displayName.startsWith("'") && displayName.endsWith("'") ? displayName.slice(1, -1) : displayName;
keywords.push(keyword);
}
}
return { return {
syntax: [], syntax: [],
keywords: [] keywords: [],
} };
}
}
export class HiveSqlSplitListener implements HiveSqlParserListener {
private _statementContext: StatementContext[] = [];
exitStatement = (ctx: StatementContext) => {
this._statementContext.push(ctx);
}
enterStatement = (ctx: StatementContext) => {
};
get statementsContext() {
return this._statementContext;
} }
} }

View File

@ -6,8 +6,8 @@ import HiveSQL from '../../../src/parser/hive';
describe('HiveSQL Listener Tests', () => { describe('HiveSQL Listener Tests', () => {
const parser = new HiveSQL(); const parser = new HiveSQL();
test('Listener enterSelectList', async () => { test('Listener enterSelectList', async () => {
const expectTableName = 'userName'; const expectTableName = 'username';
const sql = `select ${expectTableName} from user1 where inc_day='20190601' limit 1000;`; const sql = `select ${expectTableName} from tablename where inc_day='20190601' limit 1000;`;
const parserTree = parser.parse(sql); const parserTree = parser.parse(sql);
let result = ''; let result = '';

View File

@ -1,20 +0,0 @@
import HiveSQL from '../../../src/parser/hive';
describe('HiveSQL Syntax Tests', () => {
const parser = new HiveSQL();
test('Create Table Statement', () => {
const sql = 'CREATE TABLE person(name STRING,age INT);';
const result = parser.validate(sql);
expect(result.length).toBe(0);
});
test('Create Table Statement', () => {
const sql = `alter table dm_gis.table_name add if not exists partition (inc_day = '20190601');`;
const result = parser.validate(sql);
expect(result.length).toBe(0);
});
test('Wrong Select Statement', () => {
const sql = 'SELECT add ABC FROM WHERE;';
const result = parser.validate(sql);
expect(result.length).toBe(1);
});
});

View File

@ -0,0 +1,16 @@
import HiveSQL from '../../../../src/parser/hive';
import { readSQL } from '../../../helper';
const parser = new HiveSQL();
const features = {
dataTypes: readSQL(__dirname, 'dataTypes.sql'),
};
describe('HiveSQL Check Data Types Tests', () => {
features.dataTypes.forEach((dataType) => {
it(dataType, () => {
expect(parser.validate(dataType).length).toBe(0);
});
});
});

View File

@ -28,7 +28,7 @@ CREATE TEMPORARY EXTERNAL TABLE page_view(
page_url STRING, page_url STRING,
referrer_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User' ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE; ) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TEXTFILE;
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view( CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT, viewTime INT,
@ -36,7 +36,7 @@ CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
page_url STRING, page_url STRING,
referrer_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User' ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE; ) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS RCFILE;
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view( CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT, viewTime INT,
@ -44,7 +44,7 @@ CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
page_url STRING, page_url STRING,
referrer_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User' ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path'; ) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS ORC LOCATION '/hsd_path';
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view( CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT, viewTime INT,
@ -52,7 +52,7 @@ CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
page_url STRING, page_url STRING,
referrer_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User' ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path' AS ) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS PARQUET LOCATION '/hsd_path' AS
SELECT SELECT
(key % 1024) new_key, (key % 1024) new_key,
concat(key, value) key_value_pair concat(key, value) key_value_pair
@ -62,9 +62,9 @@ FROM
CREATE TABLE list_bucket_single (key STRING, value STRING) CREATE TABLE list_bucket_single (key STRING, value STRING)
SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES; SKEWED BY (key) ON (1,5,6) STORED AS AVRO;
CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS ORC; CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS INPUTFORMAT 'inputfilename' OUTPUTFORMAT 'outputfilename';
CREATE TABLE IF NOT EXISTS copy_table LIKE origin_table; CREATE TABLE IF NOT EXISTS copy_table LIKE origin_table;
@ -104,4 +104,4 @@ CREATE MANAGED TABLE managed_table (
name STRING COMMENT '名称' name STRING COMMENT '名称'
) COMMENT '测试分桶' CLUSTERED BY(id) SORTED BY (id) INTO 4 BUCKETS STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'; ) COMMENT '测试分桶' CLUSTERED BY(id) SORTED BY (id) INTO 4 BUCKETS STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler';
CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS DIRECTORIES; CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS JSONFILE;

View File

@ -0,0 +1,45 @@
-- TINYINT
-- | SMALLINT
-- | INT
-- | BIGINT
-- | BOOLEAN
-- | FLOAT
-- | DOUBLE
-- | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
-- | STRING
-- | BINARY -- (Note: Available in Hive 0.8.0 and later)
-- | TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
-- | DECIMAL -- (Note: Available in Hive 0.11.0 and later)
-- | DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
-- | DATE -- (Note: Available in Hive 0.12.0 and later)
-- | VARCHAR -- (Note: Available in Hive 0.12.0 and later)
-- | CHAR -- (Note: Available in Hive 0.13.0 and later)
-- | ARRAY<data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
-- | MAP<primitive_type, data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
-- | STRUCT<col_name : data_type [COMMENT col_comment], ...>
-- | UNIONTYPE<data_type, data_type, ...> (Note: Only available starting with Hive 0.7.0.)
CREATE TABLE test_table (
viewTime INT,
userid BIGINT,
page_url STRING,
age TINYINT,
instance FLOAT,
isAduit BOOLEAN,
score DOUBLE,
aver DOUBLE PRECISION,
somename BINARY,
someid DECIMAL,
birth TIMESTAMP,
schooldt DATE,
someint SMALLINT,
colvarchar VARCHAR(1),
colchar CHAR(2),
coldecil DECIMAL(3,4),
list ARRAY <STRING>,
realmap MAP <STRING, DATE>,
realstruct STRUCT<col_name: STRING>,
collast UNIONTYPE <DOUBLE, STRING>
);
-- INTERVAL
UPDATE tablenames SET birthdt=INTERVAL '1-2' YEAR TO MONTH;