feat(spark): support materialized view for spark sql (#262)

* feat(spark): support materialized view for spark sql

* fix(spark): code review update

* fix(spark): update spark  materilized view and zorder grammar

* test(spark): add syntaxSuggestion test of materialized view

---------

Co-authored-by: jialan <jialan@dtstack.com>
This commit is contained in:
JackWang032 2024-02-26 17:25:19 +08:00 committed by GitHub
parent 081ff7f067
commit 5ce89cb421
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 10156 additions and 9101 deletions

View File

@ -142,12 +142,14 @@ KW_DESCRIBE : 'DESCRIBE';
KW_DFS : 'DFS'; KW_DFS : 'DFS';
KW_DIRECTORIES : 'DIRECTORIES'; KW_DIRECTORIES : 'DIRECTORIES';
KW_DIRECTORY : 'DIRECTORY'; KW_DIRECTORY : 'DIRECTORY';
KW_DISABLE : 'DISABLE';
KW_DISTINCT : 'DISTINCT'; KW_DISTINCT : 'DISTINCT';
KW_DISTRIBUTE : 'DISTRIBUTE'; KW_DISTRIBUTE : 'DISTRIBUTE';
KW_DIV : 'DIV'; KW_DIV : 'DIV';
KW_DOUBLE : 'DOUBLE'; KW_DOUBLE : 'DOUBLE';
KW_DROP : 'DROP'; KW_DROP : 'DROP';
KW_ELSE : 'ELSE'; KW_ELSE : 'ELSE';
KW_ENABLE : 'ENABLE';
KW_END : 'END'; KW_END : 'END';
KW_ESCAPE : 'ESCAPE'; KW_ESCAPE : 'ESCAPE';
KW_ESCAPED : 'ESCAPED'; KW_ESCAPED : 'ESCAPED';
@ -224,6 +226,7 @@ KW_LOCKS : 'LOCKS';
KW_LOGICAL : 'LOGICAL'; KW_LOGICAL : 'LOGICAL';
KW_LONG : 'LONG'; KW_LONG : 'LONG';
KW_MACRO : 'MACRO'; KW_MACRO : 'MACRO';
KW_MATERIALIZED : 'MATERIALIZED';
KW_MAP : 'MAP'; KW_MAP : 'MAP';
KW_MATCHED : 'MATCHED'; KW_MATCHED : 'MATCHED';
KW_MERGE : 'MERGE'; KW_MERGE : 'MERGE';
@ -252,6 +255,7 @@ KW_OF : 'OF';
KW_OFFSET : 'OFFSET'; KW_OFFSET : 'OFFSET';
KW_ON : 'ON'; KW_ON : 'ON';
KW_ONLY : 'ONLY'; KW_ONLY : 'ONLY';
KW_OPTIMIZE : 'OPTIMIZE';
KW_OPTION : 'OPTION'; KW_OPTION : 'OPTION';
KW_OPTIONS : 'OPTIONS'; KW_OPTIONS : 'OPTIONS';
KW_OR : 'OR'; KW_OR : 'OR';
@ -294,6 +298,7 @@ KW_REPLACE : 'REPLACE';
KW_RESET : 'RESET'; KW_RESET : 'RESET';
KW_RESPECT : 'RESPECT'; KW_RESPECT : 'RESPECT';
KW_RESTRICT : 'RESTRICT'; KW_RESTRICT : 'RESTRICT';
KW_REWRITE : 'REWRITE';
KW_REVOKE : 'REVOKE'; KW_REVOKE : 'REVOKE';
KW_RIGHT : 'RIGHT'; KW_RIGHT : 'RIGHT';
KW_RLIKE : 'RLIKE'; KW_RLIKE : 'RLIKE';
@ -396,6 +401,7 @@ KW_WITHIN : 'WITHIN';
KW_YEAR : 'YEAR'; KW_YEAR : 'YEAR';
KW_YEARS : 'YEARS'; KW_YEARS : 'YEARS';
KW_ZONE : 'ZONE'; KW_ZONE : 'ZONE';
KW_ZORDER : 'ZORDER';
//--SPARK-KEYWORD-LIST-END //--SPARK-KEYWORD-LIST-END
//============================ //============================
// End of the keywords list // End of the keywords list

View File

@ -92,8 +92,11 @@ statement
)* KW_PURGE? )* KW_PURGE?
| KW_ALTER KW_TABLE tableName (partitionSpec)? KW_SET locationSpec | KW_ALTER KW_TABLE tableName (partitionSpec)? KW_SET locationSpec
| KW_ALTER KW_TABLE tableName KW_RECOVER KW_PARTITIONS | KW_ALTER KW_TABLE tableName KW_RECOVER KW_PARTITIONS
| KW_ALTER KW_MATERIALIZED KW_VIEW viewName (KW_ENABLE | KW_DISABLE) KW_REWRITE
| KW_ALTER KW_MATERIALIZED KW_VIEW viewName KW_SET KW_TBLPROPERTIES propertyList
| KW_DROP KW_TABLE (ifExists)? tableName KW_PURGE? | KW_DROP KW_TABLE (ifExists)? tableName KW_PURGE?
| KW_DROP KW_VIEW (ifExists)? viewName | KW_DROP KW_VIEW (ifExists)? viewName
| KW_DROP KW_MATERIALIZED KW_VIEW (ifExists)? viewName
| KW_CREATE (KW_OR KW_REPLACE)? (KW_GLOBAL? KW_TEMPORARY)? KW_VIEW (ifNotExists)? viewNameCreate identifierCommentList? ( | KW_CREATE (KW_OR KW_REPLACE)? (KW_GLOBAL? KW_TEMPORARY)? KW_VIEW (ifNotExists)? viewNameCreate identifierCommentList? (
commentSpec commentSpec
| (KW_PARTITIONED KW_ON identifierList) | (KW_PARTITIONED KW_ON identifierList)
@ -106,6 +109,19 @@ statement
| KW_CREATE (KW_OR KW_REPLACE)? KW_TEMPORARY? KW_FUNCTION (ifNotExists)? functionNameCreate KW_AS className=stringLit ( | KW_CREATE (KW_OR KW_REPLACE)? KW_TEMPORARY? KW_FUNCTION (ifNotExists)? functionNameCreate KW_AS className=stringLit (
KW_USING resource (COMMA resource)* KW_USING resource (COMMA resource)*
)? )?
|
// Self developed materialized view syntax by dtstack, spark not support now.
KW_CREATE KW_MATERIALIZED KW_VIEW (ifNotExists)? viewNameCreate tableProvider? (
(KW_OPTIONS options=propertyList)
| (KW_PARTITIONED KW_BY partitioning=partitionFieldList)
| skewSpec
| bucketSpec
| rowFormat
| createFileFormat
| locationSpec
| commentSpec
| (KW_TBLPROPERTIES tableProps=propertyList)
)* KW_AS query
| KW_DROP KW_TEMPORARY? KW_FUNCTION (ifExists)? functionName | KW_DROP KW_TEMPORARY? KW_FUNCTION (ifExists)? functionName
| KW_DECLARE (KW_OR KW_REPLACE)? KW_VARIABLE? functionName dataType? variableDefaultExpression? | KW_DECLARE (KW_OR KW_REPLACE)? KW_VARIABLE? functionName dataType? variableDefaultExpression?
| KW_DROP KW_TEMPORARY KW_VARIABLE (ifExists)? (tableName | viewName | functionName) | KW_DROP KW_TEMPORARY KW_VARIABLE (ifExists)? (tableName | viewName | functionName)
@ -122,6 +138,10 @@ statement
| KW_SHOW KW_CREATE KW_TABLE tableName (KW_AS KW_SERDE)? | KW_SHOW KW_CREATE KW_TABLE tableName (KW_AS KW_SERDE)?
| KW_SHOW KW_CURRENT dbSchema | KW_SHOW KW_CURRENT dbSchema
| KW_SHOW KW_CATALOGS (KW_LIKE? pattern=stringLit)? | KW_SHOW KW_CATALOGS (KW_LIKE? pattern=stringLit)?
| KW_SHOW KW_MATERIALIZED KW_VIEWS ((KW_FROM | KW_IN) db_name=dbSchemaName)? (
KW_LIKE? pattern=stringLit
)?
| KW_SHOW KW_CREATE KW_MATERIALIZED KW_VIEW viewName (KW_AS KW_SERDE)?
| (KW_DESC | KW_DESCRIBE) KW_FUNCTION KW_EXTENDED? describeFuncName | (KW_DESC | KW_DESCRIBE) KW_FUNCTION KW_EXTENDED? describeFuncName
| (KW_DESC | KW_DESCRIBE) KW_DATABASE KW_EXTENDED? dbSchemaName | (KW_DESC | KW_DESCRIBE) KW_DATABASE KW_EXTENDED? dbSchemaName
| (KW_DESC | KW_DESCRIBE) KW_TABLE? option=(KW_EXTENDED | KW_FORMATTED)? tableName partitionSpec? describeColName? | (KW_DESC | KW_DESCRIBE) KW_TABLE? option=(KW_EXTENDED | KW_FORMATTED)? tableName partitionSpec? describeColName?
@ -131,6 +151,7 @@ statement
| KW_REFRESH KW_TABLE tableName | KW_REFRESH KW_TABLE tableName
| KW_REFRESH KW_FUNCTION functionName | KW_REFRESH KW_FUNCTION functionName
| KW_REFRESH (stringLit | .*?) | KW_REFRESH (stringLit | .*?)
| KW_REFRESH KW_MATERIALIZED KW_VIEW viewName
| KW_CACHE KW_LAZY? KW_TABLE tableName (KW_OPTIONS options=propertyList)? (KW_AS? query)? | KW_CACHE KW_LAZY? KW_TABLE tableName (KW_OPTIONS options=propertyList)? (KW_AS? query)?
| KW_UNCACHE KW_TABLE (ifExists)? tableName | KW_UNCACHE KW_TABLE (ifExists)? tableName
| KW_CLEAR KW_CACHE | KW_CLEAR KW_CACHE
@ -154,6 +175,7 @@ statement
KW_USING indexType=identifier KW_USING indexType=identifier
)? LEFT_PAREN multipartIdentifierPropertyList RIGHT_PAREN (KW_OPTIONS options=propertyList)? )? LEFT_PAREN multipartIdentifierPropertyList RIGHT_PAREN (KW_OPTIONS options=propertyList)?
| KW_DROP KW_INDEX (ifExists)? identifier KW_ON KW_TABLE? tableName | KW_DROP KW_INDEX (ifExists)? identifier KW_ON KW_TABLE? tableName
| KW_OPTIMIZE tableName whereClause? zorderClause
| unsupportedHiveNativeCommands .*? | unsupportedHiveNativeCommands .*?
; ;
@ -1233,6 +1255,10 @@ windowClause
: KW_WINDOW namedWindow (COMMA namedWindow)* : KW_WINDOW namedWindow (COMMA namedWindow)*
; ;
zorderClause
: KW_ZORDER KW_BY columnNameSeq
;
namedWindow namedWindow
: name=errorCapturingIdentifier KW_AS windowSpec : name=errorCapturingIdentifier KW_AS windowSpec
; ;
@ -1436,10 +1462,12 @@ ansiNonReserved
| KW_DFS | KW_DFS
| KW_DIRECTORIES | KW_DIRECTORIES
| KW_DIRECTORY | KW_DIRECTORY
| KW_DISABLE
| KW_DISTRIBUTE | KW_DISTRIBUTE
| KW_DIV | KW_DIV
| KW_DOUBLE | KW_DOUBLE
| KW_DROP | KW_DROP
| KW_ENABLE
| KW_ESCAPED | KW_ESCAPED
| KW_EXCHANGE | KW_EXCHANGE
| KW_EXCLUDE | KW_EXCLUDE
@ -1495,6 +1523,7 @@ ansiNonReserved
| KW_MACRO | KW_MACRO
| KW_MAP | KW_MAP
| KW_MATCHED | KW_MATCHED
| KW_MATERIALIZED
| KW_MERGE | KW_MERGE
| KW_MICROSECOND | KW_MICROSECOND
| KW_MICROSECONDS | KW_MICROSECONDS
@ -1514,6 +1543,7 @@ ansiNonReserved
| KW_NULLS | KW_NULLS
| KW_NUMERIC | KW_NUMERIC
| KW_OF | KW_OF
| KW_OPTIMIZE
| KW_OPTION | KW_OPTION
| KW_OPTIONS | KW_OPTIONS
| KW_OUT | KW_OUT
@ -1549,6 +1579,7 @@ ansiNonReserved
| KW_RESPECT | KW_RESPECT
| KW_RESTRICT | KW_RESTRICT
| KW_REVOKE | KW_REVOKE
| KW_REWRITE
| KW_RLIKE | KW_RLIKE
| KW_REGEXP | KW_REGEXP
| KW_ROLE | KW_ROLE
@ -1632,6 +1663,7 @@ ansiNonReserved
| KW_YEAR | KW_YEAR
| KW_YEARS | KW_YEARS
| KW_ZONE | KW_ZONE
| KW_ZORDER
//--ANSI-NON-RESERVED-END //--ANSI-NON-RESERVED-END
; ;
@ -1747,12 +1779,14 @@ nonReserved
| KW_DFS | KW_DFS
| KW_DIRECTORIES | KW_DIRECTORIES
| KW_DIRECTORY | KW_DIRECTORY
| KW_DISABLE
| KW_DISTINCT | KW_DISTINCT
| KW_DISTRIBUTE | KW_DISTRIBUTE
| KW_DIV | KW_DIV
| KW_DOUBLE | KW_DOUBLE
| KW_DROP | KW_DROP
| KW_ELSE | KW_ELSE
| KW_ENABLE
| KW_END | KW_END
| KW_ESCAPE | KW_ESCAPE
| KW_ESCAPED | KW_ESCAPED
@ -1824,6 +1858,7 @@ nonReserved
| KW_MACRO | KW_MACRO
| KW_MAP | KW_MAP
| KW_MATCHED | KW_MATCHED
| KW_MATERIALIZED
| KW_MERGE | KW_MERGE
| KW_MICROSECOND | KW_MICROSECOND
| KW_MICROSECONDS | KW_MICROSECONDS
@ -1847,6 +1882,7 @@ nonReserved
| KW_OF | KW_OF
| KW_OFFSET | KW_OFFSET
| KW_ONLY | KW_ONLY
| KW_OPTIMIZE
| KW_OPTION | KW_OPTION
| KW_OPTIONS | KW_OPTIONS
| KW_OR | KW_OR
@ -1890,6 +1926,7 @@ nonReserved
| KW_RESPECT | KW_RESPECT
| KW_RESTRICT | KW_RESTRICT
| KW_REVOKE | KW_REVOKE
| KW_REWRITE
| KW_RLIKE | KW_RLIKE
| KW_REGEXP | KW_REGEXP
| KW_ROLE | KW_ROLE
@ -1986,5 +2023,6 @@ nonReserved
| KW_YEAR | KW_YEAR
| KW_YEARS | KW_YEARS
| KW_ZONE | KW_ZONE
| KW_ZORDER
//--DEFAULT-NON-RESERVED-END //--DEFAULT-NON-RESERVED-END
; ;

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -179,6 +179,7 @@ import { ComplexColTypeListContext } from "./SparkSqlParser";
import { ComplexColTypeContext } from "./SparkSqlParser"; import { ComplexColTypeContext } from "./SparkSqlParser";
import { WhenClauseContext } from "./SparkSqlParser"; import { WhenClauseContext } from "./SparkSqlParser";
import { WindowClauseContext } from "./SparkSqlParser"; import { WindowClauseContext } from "./SparkSqlParser";
import { ZorderClauseContext } from "./SparkSqlParser";
import { NamedWindowContext } from "./SparkSqlParser"; import { NamedWindowContext } from "./SparkSqlParser";
import { WindowSpecContext } from "./SparkSqlParser"; import { WindowSpecContext } from "./SparkSqlParser";
import { WindowFrameContext } from "./SparkSqlParser"; import { WindowFrameContext } from "./SparkSqlParser";
@ -2144,6 +2145,17 @@ export interface SparkSqlParserListener extends ParseTreeListener {
*/ */
exitWindowClause?: (ctx: WindowClauseContext) => void; exitWindowClause?: (ctx: WindowClauseContext) => void;
/**
* Enter a parse tree produced by `SparkSqlParser.zorderClause`.
* @param ctx the parse tree
*/
enterZorderClause?: (ctx: ZorderClauseContext) => void;
/**
* Exit a parse tree produced by `SparkSqlParser.zorderClause`.
* @param ctx the parse tree
*/
exitZorderClause?: (ctx: ZorderClauseContext) => void;
/** /**
* Enter a parse tree produced by `SparkSqlParser.namedWindow`. * Enter a parse tree produced by `SparkSqlParser.namedWindow`.
* @param ctx the parse tree * @param ctx the parse tree

View File

@ -179,6 +179,7 @@ import { ComplexColTypeListContext } from "./SparkSqlParser";
import { ComplexColTypeContext } from "./SparkSqlParser"; import { ComplexColTypeContext } from "./SparkSqlParser";
import { WhenClauseContext } from "./SparkSqlParser"; import { WhenClauseContext } from "./SparkSqlParser";
import { WindowClauseContext } from "./SparkSqlParser"; import { WindowClauseContext } from "./SparkSqlParser";
import { ZorderClauseContext } from "./SparkSqlParser";
import { NamedWindowContext } from "./SparkSqlParser"; import { NamedWindowContext } from "./SparkSqlParser";
import { WindowSpecContext } from "./SparkSqlParser"; import { WindowSpecContext } from "./SparkSqlParser";
import { WindowFrameContext } from "./SparkSqlParser"; import { WindowFrameContext } from "./SparkSqlParser";
@ -1443,6 +1444,13 @@ export interface SparkSqlParserVisitor<Result> extends ParseTreeVisitor<Result>
*/ */
visitWindowClause?: (ctx: WindowClauseContext) => Result; visitWindowClause?: (ctx: WindowClauseContext) => Result;
/**
* Visit a parse tree produced by `SparkSqlParser.zorderClause`.
* @param ctx the parse tree
* @return the visitor result
*/
visitZorderClause?: (ctx: ZorderClauseContext) => Result;
/** /**
* Visit a parse tree produced by `SparkSqlParser.namedWindow`. * Visit a parse tree produced by `SparkSqlParser.namedWindow`.
* @param ctx the parse tree * @param ctx the parse tree

View File

@ -47,3 +47,21 @@ SELECT id, n FROM tbl GROUP BY ;
SELECT id, n FROM tbl ORDER BY name, i ; SELECT id, n FROM tbl ORDER BY name, i ;
SELECT id FROM tb1 GROUP BY ROLLUP( ); SELECT id FROM tb1 GROUP BY ROLLUP( );
CREATE MATERIALIZED VIEW db.mv;
DROP MATERIALIZED VIEW db.mv;
ALTER MATERIALIZED VIEW db.mv;
REFRESH MATERIALIZED VIEW db.mv;
SHOW CREATE MATERIALIZED VIEW db.mv;
SHOW MATERIALIZED VIEWS from db;
OPTIMIZE db.tb;
OPTIMIZE db.tb ZORDER BY ;
OPTIMIZE db.tb ZORDER BY name, i;

View File

@ -442,4 +442,157 @@ describe('Spark SQL Syntax Suggestion', () => {
expect(suggestion).not.toBeUndefined(); expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]); expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
}); });
test('Create materialized view', () => {
const pos: CaretPosition = {
lineNumber: 51,
column: 31,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW_CREATE
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
});
test('Drop materialized view', () => {
const pos: CaretPosition = {
lineNumber: 53,
column: 29,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
});
test('Alter materialized view', () => {
const pos: CaretPosition = {
lineNumber: 55,
column: 30,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
});
test('Refresh materialized view', () => {
const pos: CaretPosition = {
lineNumber: 57,
column: 32,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
});
test('Show create materialized view', () => {
const pos: CaretPosition = {
lineNumber: 59,
column: 36,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
});
test('Show all materialized from database', () => {
const pos: CaretPosition = {
lineNumber: 61,
column: 32,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.DATABASE
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db']);
});
test('Optimize table', () => {
const pos: CaretPosition = {
lineNumber: 63,
column: 15,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'tb']);
});
test('Optimize table zorder by empty', () => {
const pos: CaretPosition = {
lineNumber: 65,
column: 26,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
});
test('Optimize table zorder by columns', () => {
const pos: CaretPosition = {
lineNumber: 67,
column: 33,
};
const syntaxes = parser.getSuggestionAtCaretPosition(
commentOtherLine(syntaxSql, pos.lineNumber),
pos
)?.syntax;
const suggestion = syntaxes?.find(
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
);
expect(suggestion).not.toBeUndefined();
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['i']);
});
}); });

View File

@ -19,7 +19,15 @@ describe('Spark SQL Token Suggestion', () => {
pos pos
)?.keywords; )?.keywords;
expect(suggestion).toEqual(['TABLE', 'INDEX', 'VIEW', 'DATABASE', 'NAMESPACE', 'SCHEMA']); expect(suggestion).toEqual([
'TABLE',
'INDEX',
'VIEW',
'MATERIALIZED',
'DATABASE',
'NAMESPACE',
'SCHEMA',
]);
}); });
test('After CREATE', () => { test('After CREATE', () => {
@ -36,6 +44,7 @@ describe('Spark SQL Token Suggestion', () => {
'TEMPORARY', 'TEMPORARY',
'INDEX', 'INDEX',
'ROLE', 'ROLE',
'MATERIALIZED',
'FUNCTION', 'FUNCTION',
'OR', 'OR',
'GLOBAL', 'GLOBAL',
@ -102,6 +111,7 @@ describe('Spark SQL Token Suggestion', () => {
'INDEX', 'INDEX',
'ROLE', 'ROLE',
'FUNCTION', 'FUNCTION',
'MATERIALIZED',
'VIEW', 'VIEW',
'TABLE', 'TABLE',
'DATABASE', 'DATABASE',
@ -157,6 +167,7 @@ describe('Spark SQL Token Suggestion', () => {
'PRINCIPALS', 'PRINCIPALS',
'ROLE', 'ROLE',
'GRANT', 'GRANT',
'MATERIALIZED',
'CATALOGS', 'CATALOGS',
'FUNCTIONS', 'FUNCTIONS',
'ALL', 'ALL',

View File

@ -4,12 +4,13 @@ import { readSQL } from 'test/helper';
const parser = new SparkSQL(); const parser = new SparkSQL();
const features = { const features = {
alertDatabase: readSQL(__dirname, 'alertDatabase.sql'), alterDatabase: readSQL(__dirname, 'alterDatabase.sql'),
alertTable: readSQL(__dirname, 'alertTable.sql'), altertTable: readSQL(__dirname, 'alterTable.sql'),
alertView: readSQL(__dirname, 'alertView.sql'), alterView: readSQL(__dirname, 'alterView.sql'),
alterMaterializedView: readSQL(__dirname, 'alterMaterializedView.sql'),
}; };
describe('SparkSQL Alert Syntax Tests', () => { describe('SparkSQL Alter Syntax Tests', () => {
Object.keys(features).forEach((key) => { Object.keys(features).forEach((key) => {
features[key].forEach((sql) => { features[key].forEach((sql) => {
it(sql, () => { it(sql, () => {

View File

@ -7,6 +7,7 @@ const features = {
createDatabase: readSQL(__dirname, 'createDatabase.sql'), createDatabase: readSQL(__dirname, 'createDatabase.sql'),
createFunction: readSQL(__dirname, 'createFunction.sql'), createFunction: readSQL(__dirname, 'createFunction.sql'),
createView: readSQL(__dirname, 'createView.sql'), createView: readSQL(__dirname, 'createView.sql'),
createMaterializedView: readSQL(__dirname, 'createMaterializedView.sql'),
}; };
describe('SparkSQL Create Syntax Tests', () => { describe('SparkSQL Create Syntax Tests', () => {

View File

@ -8,6 +8,7 @@ const features = {
dropFunction: readSQL(__dirname, 'dropFunction.sql'), dropFunction: readSQL(__dirname, 'dropFunction.sql'),
dropTable: readSQL(__dirname, 'dropTable.sql'), dropTable: readSQL(__dirname, 'dropTable.sql'),
dropView: readSQL(__dirname, 'dropView.sql'), dropView: readSQL(__dirname, 'dropView.sql'),
dropMaterializedView: readSQL(__dirname, 'dropMaterializedView.sql'),
}; };
describe('SparkSQL Drop Syntax Tests', () => { describe('SparkSQL Drop Syntax Tests', () => {

View File

@ -0,0 +1,13 @@
-- ALTER MATERIALIZED VIEW view_identifier ENABLE|DISABLE REWRITE;
ALTER MATERIALIZED VIEW mv ENABLE REWRITE;
ALTER MATERIALIZED VIEW userDB.mv ENABLE REWRITE;
ALTER MATERIALIZED VIEW mv DISABLE REWRITE;
-- ALTER MATERIALIZED VIEW view_identifier SET TBLPROPERTIES ( property_name=property_value, ... );
ALTER MATERIALIZED VIEW mv SET TBLPROPERTIES ('mv.enableAutoRefresh'='true', 'mv.refreshInterval'='10min');
ALTER MATERIALIZED VIEW userDB.mv SET TBLPROPERTIES ('mv.enableAutoRefresh'='true', 'mv.refreshInterval'='10min');

View File

@ -0,0 +1,99 @@
/**
** Notes:
** 1. MATERIALIZED VIEW syntax has not been officially supported by Spark yet.
** 2. The support for the following syntax is based on the self-developed component of dtstack.
**/
-- CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] view_identifier
-- [ USING data_source ]
-- [ OPTIONS ( key1=val1, key2=val2, ... ) ]
-- [ PARTITIONED BY ( col_name1, col_name2, ... ) ]
-- [ SKEWED BY ( col_name, col_name, ... )
-- ON ( ( col_value, col_value, ... ), ( col_value, col_value, ... ), ... )
-- [ STORED AS DIRECTORIES ] ]
-- [ CLUSTERED BY ( col_name3, col_name4, ... )
-- [ SORTED BY ( col_name [ ASC | DESC ], ... ) ]
-- INTO num_buckets BUCKETS ]
-- [ ROW FORMAT row_format ]
-- [ [ STORED AS file_format ]
-- | STORED BY 'storage.handler.class.name' [ WITH SERDEPROPERTIES (...) ] ) ]
-- [ LOCATION hdfs_path ]
-- [ COMMENT table_comment ]
-- [ TBLPROPERTIES (property_name=property_value, ...) ]
-- AS select_statement;
CREATE MATERIALIZED VIEW mv AS SELECT id FROM students;
CREATE MATERIALIZED VIEW userDB.mv AS SELECT id FROM students;
CREATE MATERIALIZED VIEW IF NOT EXISTS mv AS SELECT id FROM students;
-- Use data source
CREATE MATERIALIZED VIEW mv USING CSV AS SELECT id FROM students;
-- Use parquet data source with parquet storage options
CREATE MATERIALIZED VIEW mv
USING PARQUET
OPTIONS (
'parquet.bloom.filter.enabled'='true',
'parquet.bloom.filter.enabled#age'='false'
)
AS SELECT id, age FROM students;
CREATE MATERIALIZED VIEW mv
PARTITIONED BY (id)
AS SELECT id FROM students;
CREATE MATERIALIZED VIEW mv
SKEWED BY (id) ON (1,5,6)
AS SELECT id FROM students;
CREATE MATERIALIZED VIEW mv
SKEWED BY (id) ON (1,5,6) STORED AS DIRECTORIES
AS SELECT id FROM students;
-- Create bucketed materialized view
CREATE MATERIALIZED VIEW mv
CLUSTERED BY (id) SORTED BY (id) INTO 3 BUCKETS
AS SELECT id FROM students;
-- Use row format
CREATE MATERIALIZED VIEW mv
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = ".*"
)
STORED AS TEXTFILE
AS SELECT id FROM students;
-- Use file format with 'stored as'
CREATE MATERIALIZED VIEW mv
STORED AS TEXTFILE
AS SELECT id FROM students;
-- Use file format with 'stored by'
CREATE MATERIALIZED VIEW mv
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "cf:string",
"hbase.table.name" = "hbase_table_0"
)
AS SELECT id FROM students;
-- Specify view storage path
CREATE MATERIALIZED VIEW mv
STORED AS PARQUET
LOCATION 'hdfs://mv/'
AS SELECT id FROM students;
-- Add mv comment
CREATE MATERIALIZED VIEW mv
STORED AS PARQUET
LOCATION 'hdfs://mv/'
COMMENT 'A materialized view'
AS SELECT id FROM students;
-- Set refresh properties
CREATE MATERIALIZED VIEW mv
TBLPROPERTIES("mv.enableAutoRefresh"="true", "mv.refreshInterval"="10min")
AS SELECT id FROM students;

View File

@ -0,0 +1,7 @@
-- DROP MATERIALIZED VIEW [ IF EXISTS ] view_identifier
DROP MATERIALIZED VIEW mv;
DROP MATERIALIZED VIEW userDB.mv;
DROP MATERIALIZED VIEW IF EXISTS mv;

View File

@ -0,0 +1,7 @@
-- OPTIMIZE view_identifier [ WHERE where_expression ] ZORDER BY col1, col2...
OPTIMIZE students ZORDER BY id, name;
OPTIMIZE userDB.students ZORDER BY id, name;
OPTIMIZE students WHERE id=1 ZORDER BY id, name;

View File

@ -15,3 +15,9 @@ REFRESH FUNCTION db1.func1;
REFRESH TABLE tbl1; REFRESH TABLE tbl1;
REFRESH TABLE tempDB.view1; REFRESH TABLE tempDB.view1;
-- REFRESH MATERIALIZED VIEW view_identifier
REFRESH MATERIALIZED VIEW mv;
REFRESH MATERIALIZED VIEW userDB.mv;

View File

@ -99,3 +99,29 @@ SHOW VIEWS IN global_temp;
SHOW VIEWS FROM default LIKE 'sam*'; SHOW VIEWS FROM default LIKE 'sam*';
SHOW VIEWS LIKE 'sam|suj|temp*'; SHOW VIEWS LIKE 'sam|suj|temp*';
-- SHOW MATERIALIZED VIEWS [ { FROM | IN } database_name ] [ LIKE? regex_pattern ];
SHOW MATERIALIZED VIEWS;
SHOW MATERIALIZED VIEWS IN userdb;
SHOW MATERIALIZED VIEWS FROM userdb;
SHOW MATERIALIZED VIEWS LIKE 'test_view1|test_view2';
SHOW MATERIALIZED VIEWS IN userdb LIKE 'test_view1|test_view2';
SHOW MATERIALIZED VIEWS FROM userdb LIKE 'test_view1|test_view2';
SHOW MATERIALIZED VIEWS "test_*";
SHOW MATERIALIZED VIEWS IN userdb "test_*";
-- SHOW CREATE MATERIALIZED VIEW view_identifier [ AS SERDE ];
SHOW CREATE MATERIALIZED VIEW mv;
SHOW CREATE MATERIALIZED VIEW userdb.mv;
SHOW CREATE MATERIALIZED VIEW mv AS SERDE;

View File

@ -0,0 +1,16 @@
import SparkSQL from 'src/parser/spark';
import { readSQL } from 'test/helper';
const parser = new SparkSQL();
const features = {
set: readSQL(__dirname, 'optimize.sql'),
};
describe('Spark Optimize Syntax Tests', () => {
features.set.forEach((itemSql) => {
it(itemSql, () => {
expect(parser.validate(itemSql).length).toBe(0);
});
});
});