feat(spark): support materialized view for spark sql (#262)
* feat(spark): support materialized view for spark sql * fix(spark): code review update * fix(spark): update spark materilized view and zorder grammar * test(spark): add syntaxSuggestion test of materialized view --------- Co-authored-by: jialan <jialan@dtstack.com>
This commit is contained in:
@ -46,4 +46,22 @@ SELECT id, n FROM tbl GROUP BY ;
|
||||
|
||||
SELECT id, n FROM tbl ORDER BY name, i ;
|
||||
|
||||
SELECT id FROM tb1 GROUP BY ROLLUP( );
|
||||
SELECT id FROM tb1 GROUP BY ROLLUP( );
|
||||
|
||||
CREATE MATERIALIZED VIEW db.mv;
|
||||
|
||||
DROP MATERIALIZED VIEW db.mv;
|
||||
|
||||
ALTER MATERIALIZED VIEW db.mv;
|
||||
|
||||
REFRESH MATERIALIZED VIEW db.mv;
|
||||
|
||||
SHOW CREATE MATERIALIZED VIEW db.mv;
|
||||
|
||||
SHOW MATERIALIZED VIEWS from db;
|
||||
|
||||
OPTIMIZE db.tb;
|
||||
|
||||
OPTIMIZE db.tb ZORDER BY ;
|
||||
|
||||
OPTIMIZE db.tb ZORDER BY name, i;
|
@ -442,4 +442,157 @@ describe('Spark SQL Syntax Suggestion', () => {
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
|
||||
});
|
||||
|
||||
test('Create materialized view', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 51,
|
||||
column: 31,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW_CREATE
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
|
||||
});
|
||||
|
||||
test('Drop materialized view', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 53,
|
||||
column: 29,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
|
||||
});
|
||||
|
||||
test('Alter materialized view', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 55,
|
||||
column: 30,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
|
||||
});
|
||||
|
||||
test('Refresh materialized view', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 57,
|
||||
column: 32,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
|
||||
});
|
||||
|
||||
test('Show create materialized view', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 59,
|
||||
column: 36,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.VIEW
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'mv']);
|
||||
});
|
||||
|
||||
test('Show all materialized from database', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 61,
|
||||
column: 32,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.DATABASE
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db']);
|
||||
});
|
||||
|
||||
test('Optimize table', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 63,
|
||||
column: 15,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.TABLE
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['db', '.', 'tb']);
|
||||
});
|
||||
|
||||
test('Optimize table zorder by empty', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 65,
|
||||
column: 26,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual([]);
|
||||
});
|
||||
|
||||
test('Optimize table zorder by columns', () => {
|
||||
const pos: CaretPosition = {
|
||||
lineNumber: 67,
|
||||
column: 33,
|
||||
};
|
||||
const syntaxes = parser.getSuggestionAtCaretPosition(
|
||||
commentOtherLine(syntaxSql, pos.lineNumber),
|
||||
pos
|
||||
)?.syntax;
|
||||
const suggestion = syntaxes?.find(
|
||||
(syn) => syn.syntaxContextType === SyntaxContextType.COLUMN
|
||||
);
|
||||
|
||||
expect(suggestion).not.toBeUndefined();
|
||||
expect(suggestion?.wordRanges.map((token) => token.text)).toEqual(['i']);
|
||||
});
|
||||
});
|
||||
|
@ -19,7 +19,15 @@ describe('Spark SQL Token Suggestion', () => {
|
||||
pos
|
||||
)?.keywords;
|
||||
|
||||
expect(suggestion).toEqual(['TABLE', 'INDEX', 'VIEW', 'DATABASE', 'NAMESPACE', 'SCHEMA']);
|
||||
expect(suggestion).toEqual([
|
||||
'TABLE',
|
||||
'INDEX',
|
||||
'VIEW',
|
||||
'MATERIALIZED',
|
||||
'DATABASE',
|
||||
'NAMESPACE',
|
||||
'SCHEMA',
|
||||
]);
|
||||
});
|
||||
|
||||
test('After CREATE', () => {
|
||||
@ -36,6 +44,7 @@ describe('Spark SQL Token Suggestion', () => {
|
||||
'TEMPORARY',
|
||||
'INDEX',
|
||||
'ROLE',
|
||||
'MATERIALIZED',
|
||||
'FUNCTION',
|
||||
'OR',
|
||||
'GLOBAL',
|
||||
@ -102,6 +111,7 @@ describe('Spark SQL Token Suggestion', () => {
|
||||
'INDEX',
|
||||
'ROLE',
|
||||
'FUNCTION',
|
||||
'MATERIALIZED',
|
||||
'VIEW',
|
||||
'TABLE',
|
||||
'DATABASE',
|
||||
@ -157,6 +167,7 @@ describe('Spark SQL Token Suggestion', () => {
|
||||
'PRINCIPALS',
|
||||
'ROLE',
|
||||
'GRANT',
|
||||
'MATERIALIZED',
|
||||
'CATALOGS',
|
||||
'FUNCTIONS',
|
||||
'ALL',
|
||||
|
@ -4,12 +4,13 @@ import { readSQL } from 'test/helper';
|
||||
const parser = new SparkSQL();
|
||||
|
||||
const features = {
|
||||
alertDatabase: readSQL(__dirname, 'alertDatabase.sql'),
|
||||
alertTable: readSQL(__dirname, 'alertTable.sql'),
|
||||
alertView: readSQL(__dirname, 'alertView.sql'),
|
||||
alterDatabase: readSQL(__dirname, 'alterDatabase.sql'),
|
||||
altertTable: readSQL(__dirname, 'alterTable.sql'),
|
||||
alterView: readSQL(__dirname, 'alterView.sql'),
|
||||
alterMaterializedView: readSQL(__dirname, 'alterMaterializedView.sql'),
|
||||
};
|
||||
|
||||
describe('SparkSQL Alert Syntax Tests', () => {
|
||||
describe('SparkSQL Alter Syntax Tests', () => {
|
||||
Object.keys(features).forEach((key) => {
|
||||
features[key].forEach((sql) => {
|
||||
it(sql, () => {
|
@ -7,6 +7,7 @@ const features = {
|
||||
createDatabase: readSQL(__dirname, 'createDatabase.sql'),
|
||||
createFunction: readSQL(__dirname, 'createFunction.sql'),
|
||||
createView: readSQL(__dirname, 'createView.sql'),
|
||||
createMaterializedView: readSQL(__dirname, 'createMaterializedView.sql'),
|
||||
};
|
||||
|
||||
describe('SparkSQL Create Syntax Tests', () => {
|
||||
|
@ -8,6 +8,7 @@ const features = {
|
||||
dropFunction: readSQL(__dirname, 'dropFunction.sql'),
|
||||
dropTable: readSQL(__dirname, 'dropTable.sql'),
|
||||
dropView: readSQL(__dirname, 'dropView.sql'),
|
||||
dropMaterializedView: readSQL(__dirname, 'dropMaterializedView.sql'),
|
||||
};
|
||||
|
||||
describe('SparkSQL Drop Syntax Tests', () => {
|
||||
|
13
test/parser/spark/syntax/fixtures/alterMaterializedView.sql
Normal file
13
test/parser/spark/syntax/fixtures/alterMaterializedView.sql
Normal file
@ -0,0 +1,13 @@
|
||||
-- ALTER MATERIALIZED VIEW view_identifier ENABLE|DISABLE REWRITE;
|
||||
|
||||
ALTER MATERIALIZED VIEW mv ENABLE REWRITE;
|
||||
|
||||
ALTER MATERIALIZED VIEW userDB.mv ENABLE REWRITE;
|
||||
|
||||
ALTER MATERIALIZED VIEW mv DISABLE REWRITE;
|
||||
|
||||
-- ALTER MATERIALIZED VIEW view_identifier SET TBLPROPERTIES ( property_name=property_value, ... );
|
||||
|
||||
ALTER MATERIALIZED VIEW mv SET TBLPROPERTIES ('mv.enableAutoRefresh'='true', 'mv.refreshInterval'='10min');
|
||||
|
||||
ALTER MATERIALIZED VIEW userDB.mv SET TBLPROPERTIES ('mv.enableAutoRefresh'='true', 'mv.refreshInterval'='10min');
|
99
test/parser/spark/syntax/fixtures/createMaterializedView.sql
Normal file
99
test/parser/spark/syntax/fixtures/createMaterializedView.sql
Normal file
@ -0,0 +1,99 @@
|
||||
/**
|
||||
** Notes:
|
||||
** 1. MATERIALIZED VIEW syntax has not been officially supported by Spark yet.
|
||||
** 2. The support for the following syntax is based on the self-developed component of dtstack.
|
||||
**/
|
||||
|
||||
-- CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] view_identifier
|
||||
-- [ USING data_source ]
|
||||
-- [ OPTIONS ( key1=val1, key2=val2, ... ) ]
|
||||
-- [ PARTITIONED BY ( col_name1, col_name2, ... ) ]
|
||||
-- [ SKEWED BY ( col_name, col_name, ... )
|
||||
-- ON ( ( col_value, col_value, ... ), ( col_value, col_value, ... ), ... )
|
||||
-- [ STORED AS DIRECTORIES ] ]
|
||||
-- [ CLUSTERED BY ( col_name3, col_name4, ... )
|
||||
-- [ SORTED BY ( col_name [ ASC | DESC ], ... ) ]
|
||||
-- INTO num_buckets BUCKETS ]
|
||||
-- [ ROW FORMAT row_format ]
|
||||
-- [ [ STORED AS file_format ]
|
||||
-- | STORED BY 'storage.handler.class.name' [ WITH SERDEPROPERTIES (...) ] ) ]
|
||||
-- [ LOCATION hdfs_path ]
|
||||
-- [ COMMENT table_comment ]
|
||||
-- [ TBLPROPERTIES (property_name=property_value, ...) ]
|
||||
-- AS select_statement;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv AS SELECT id FROM students;
|
||||
|
||||
CREATE MATERIALIZED VIEW userDB.mv AS SELECT id FROM students;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv AS SELECT id FROM students;
|
||||
|
||||
-- Use data source
|
||||
CREATE MATERIALIZED VIEW mv USING CSV AS SELECT id FROM students;
|
||||
|
||||
-- Use parquet data source with parquet storage options
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
USING PARQUET
|
||||
OPTIONS (
|
||||
'parquet.bloom.filter.enabled'='true',
|
||||
'parquet.bloom.filter.enabled#age'='false'
|
||||
)
|
||||
AS SELECT id, age FROM students;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
PARTITIONED BY (id)
|
||||
AS SELECT id FROM students;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
SKEWED BY (id) ON (1,5,6)
|
||||
AS SELECT id FROM students;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
SKEWED BY (id) ON (1,5,6) STORED AS DIRECTORIES
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Create bucketed materialized view
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
CLUSTERED BY (id) SORTED BY (id) INTO 3 BUCKETS
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Use row format
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
|
||||
WITH SERDEPROPERTIES (
|
||||
"input.regex" = ".*"
|
||||
)
|
||||
STORED AS TEXTFILE
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Use file format with 'stored as'
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
STORED AS TEXTFILE
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Use file format with 'stored by'
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
|
||||
WITH SERDEPROPERTIES (
|
||||
"hbase.columns.mapping" = "cf:string",
|
||||
"hbase.table.name" = "hbase_table_0"
|
||||
)
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Specify view storage path
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
STORED AS PARQUET
|
||||
LOCATION 'hdfs://mv/'
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Add mv comment
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
STORED AS PARQUET
|
||||
LOCATION 'hdfs://mv/'
|
||||
COMMENT 'A materialized view'
|
||||
AS SELECT id FROM students;
|
||||
|
||||
-- Set refresh properties
|
||||
CREATE MATERIALIZED VIEW mv
|
||||
TBLPROPERTIES("mv.enableAutoRefresh"="true", "mv.refreshInterval"="10min")
|
||||
AS SELECT id FROM students;
|
@ -0,0 +1,7 @@
|
||||
-- DROP MATERIALIZED VIEW [ IF EXISTS ] view_identifier
|
||||
|
||||
DROP MATERIALIZED VIEW mv;
|
||||
|
||||
DROP MATERIALIZED VIEW userDB.mv;
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS mv;
|
7
test/parser/spark/syntax/fixtures/optimize.sql
Normal file
7
test/parser/spark/syntax/fixtures/optimize.sql
Normal file
@ -0,0 +1,7 @@
|
||||
-- OPTIMIZE view_identifier [ WHERE where_expression ] ZORDER BY col1, col2...
|
||||
|
||||
OPTIMIZE students ZORDER BY id, name;
|
||||
|
||||
OPTIMIZE userDB.students ZORDER BY id, name;
|
||||
|
||||
OPTIMIZE students WHERE id=1 ZORDER BY id, name;
|
@ -15,3 +15,9 @@ REFRESH FUNCTION db1.func1;
|
||||
REFRESH TABLE tbl1;
|
||||
|
||||
REFRESH TABLE tempDB.view1;
|
||||
|
||||
-- REFRESH MATERIALIZED VIEW view_identifier
|
||||
|
||||
REFRESH MATERIALIZED VIEW mv;
|
||||
|
||||
REFRESH MATERIALIZED VIEW userDB.mv;
|
||||
|
@ -99,3 +99,29 @@ SHOW VIEWS IN global_temp;
|
||||
SHOW VIEWS FROM default LIKE 'sam*';
|
||||
|
||||
SHOW VIEWS LIKE 'sam|suj|temp*';
|
||||
|
||||
-- SHOW MATERIALIZED VIEWS [ { FROM | IN } database_name ] [ LIKE? regex_pattern ];
|
||||
|
||||
SHOW MATERIALIZED VIEWS;
|
||||
|
||||
SHOW MATERIALIZED VIEWS IN userdb;
|
||||
|
||||
SHOW MATERIALIZED VIEWS FROM userdb;
|
||||
|
||||
SHOW MATERIALIZED VIEWS LIKE 'test_view1|test_view2';
|
||||
|
||||
SHOW MATERIALIZED VIEWS IN userdb LIKE 'test_view1|test_view2';
|
||||
|
||||
SHOW MATERIALIZED VIEWS FROM userdb LIKE 'test_view1|test_view2';
|
||||
|
||||
SHOW MATERIALIZED VIEWS "test_*";
|
||||
|
||||
SHOW MATERIALIZED VIEWS IN userdb "test_*";
|
||||
|
||||
-- SHOW CREATE MATERIALIZED VIEW view_identifier [ AS SERDE ];
|
||||
|
||||
SHOW CREATE MATERIALIZED VIEW mv;
|
||||
|
||||
SHOW CREATE MATERIALIZED VIEW userdb.mv;
|
||||
|
||||
SHOW CREATE MATERIALIZED VIEW mv AS SERDE;
|
16
test/parser/spark/syntax/optimize.test.ts
Normal file
16
test/parser/spark/syntax/optimize.test.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import SparkSQL from 'src/parser/spark';
|
||||
import { readSQL } from 'test/helper';
|
||||
|
||||
const parser = new SparkSQL();
|
||||
|
||||
const features = {
|
||||
set: readSQL(__dirname, 'optimize.sql'),
|
||||
};
|
||||
|
||||
describe('Spark Optimize Syntax Tests', () => {
|
||||
features.set.forEach((itemSql) => {
|
||||
it(itemSql, () => {
|
||||
expect(parser.validate(itemSql).length).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
Reference in New Issue
Block a user