From d13a92914d877409ff896445446503520b072304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=90=89=E6=98=93?= Date: Mon, 9 Oct 2023 09:49:48 +0800 Subject: [PATCH] feat: spark sql DDL test (#170) Co-authored-by: liuyi --- test/parser/spark/syntax/alert.test.ts | 20 ++++ test/parser/spark/syntax/create.test.ts | 20 ++++ test/parser/spark/syntax/drop.test.ts | 21 ++++ .../spark/syntax/fixtures/alertDatabase.sql | 13 +++ .../spark/syntax/fixtures/alertTable.sql | 80 ++++++++++++++ .../spark/syntax/fixtures/alertView.sql | 22 ++++ .../syntax/fixtures/createDataSourceTable.sql | 68 ++++++++++++ .../spark/syntax/fixtures/createDatabase.sql | 23 ++++ .../spark/syntax/fixtures/createFunction.sql | 19 ++++ .../syntax/fixtures/createHiveFormatTable.sql | 101 ++++++++++++++++++ .../spark/syntax/fixtures/createTableLike.sql | 24 +++++ .../spark/syntax/fixtures/createView.sql | 10 ++ .../spark/syntax/fixtures/dropDatabase.sql | 14 +++ .../spark/syntax/fixtures/dropFunction.sql | 11 ++ .../spark/syntax/fixtures/dropTable.sql | 10 ++ .../parser/spark/syntax/fixtures/dropView.sql | 9 ++ .../spark/syntax/fixtures/repairTable.sql | 14 +++ .../spark/syntax/fixtures/truncateTable.sql | 6 ++ .../spark/syntax/fixtures/useDatabase.sql | 4 + test/parser/spark/syntax/table.test.ts | 23 ++++ test/parser/spark/syntax/useDatabase.test.ts | 18 ++++ 21 files changed, 530 insertions(+) create mode 100644 test/parser/spark/syntax/alert.test.ts create mode 100644 test/parser/spark/syntax/create.test.ts create mode 100644 test/parser/spark/syntax/drop.test.ts create mode 100644 test/parser/spark/syntax/fixtures/alertDatabase.sql create mode 100644 test/parser/spark/syntax/fixtures/alertTable.sql create mode 100644 test/parser/spark/syntax/fixtures/alertView.sql create mode 100644 test/parser/spark/syntax/fixtures/createDataSourceTable.sql create mode 100644 test/parser/spark/syntax/fixtures/createDatabase.sql create mode 100644 test/parser/spark/syntax/fixtures/createFunction.sql create mode 100644 test/parser/spark/syntax/fixtures/createHiveFormatTable.sql create mode 100644 test/parser/spark/syntax/fixtures/createTableLike.sql create mode 100644 test/parser/spark/syntax/fixtures/createView.sql create mode 100644 test/parser/spark/syntax/fixtures/dropDatabase.sql create mode 100644 test/parser/spark/syntax/fixtures/dropFunction.sql create mode 100644 test/parser/spark/syntax/fixtures/dropTable.sql create mode 100644 test/parser/spark/syntax/fixtures/dropView.sql create mode 100644 test/parser/spark/syntax/fixtures/repairTable.sql create mode 100644 test/parser/spark/syntax/fixtures/truncateTable.sql create mode 100644 test/parser/spark/syntax/fixtures/useDatabase.sql create mode 100644 test/parser/spark/syntax/table.test.ts create mode 100644 test/parser/spark/syntax/useDatabase.test.ts diff --git a/test/parser/spark/syntax/alert.test.ts b/test/parser/spark/syntax/alert.test.ts new file mode 100644 index 0000000..c0c7a29 --- /dev/null +++ b/test/parser/spark/syntax/alert.test.ts @@ -0,0 +1,20 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + alertDatabase: readSQL(__dirname, 'alertDatabase.sql'), + alertTable: readSQL(__dirname, 'alertTable.sql'), + alertView: readSQL(__dirname, 'alertView.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +}); diff --git a/test/parser/spark/syntax/create.test.ts b/test/parser/spark/syntax/create.test.ts new file mode 100644 index 0000000..da58b5d --- /dev/null +++ b/test/parser/spark/syntax/create.test.ts @@ -0,0 +1,20 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + createDatabase: readSQL(__dirname, 'createDatabase.sql'), + createFunction: readSQL(__dirname, 'createFunction.sql'), + createView: readSQL(__dirname, 'createView.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +}); diff --git a/test/parser/spark/syntax/drop.test.ts b/test/parser/spark/syntax/drop.test.ts new file mode 100644 index 0000000..399b96d --- /dev/null +++ b/test/parser/spark/syntax/drop.test.ts @@ -0,0 +1,21 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + dropDatabase: readSQL(__dirname, 'dropDatabase.sql'), + dropFunction: readSQL(__dirname, 'dropFunction.sql'), + dropTable: readSQL(__dirname, 'dropTable.sql'), + dropView: readSQL(__dirname, 'dropView.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +}); diff --git a/test/parser/spark/syntax/fixtures/alertDatabase.sql b/test/parser/spark/syntax/fixtures/alertDatabase.sql new file mode 100644 index 0000000..96ab74e --- /dev/null +++ b/test/parser/spark/syntax/fixtures/alertDatabase.sql @@ -0,0 +1,13 @@ +-- Syntax ALTER PROPERTIES +-- ALTER { DATABASE | SCHEMA | NAMESPACE } database_name SET { DBPROPERTIES | PROPERTIES } ( property_name = property_value [ , ... ] ) + +ALTER DATABASE inventory SET DBPROPERTIES ('Edited-by' = 'John', 'Edit-date' = '01/01/2001'); +ALTER DATABASE inventory SET PROPERTIES ('Edited-by' = 'John', 'Edit-date' = '01/01/2001'); + + +-- Syntax ALTER LOCATION +-- ALTER { DATABASE | SCHEMA | NAMESPACE } database_name SET LOCATION 'new_location' + +ALTER DATABASE inventory SET LOCATION 'file:/temp/spark-warehouse/new_inventory.db'; +ALTER SCHEMA inventory SET LOCATION 'file:/temp/spark-warehouse/new_inventory.db'; +ALTER NAMESPACE inventory SET LOCATION 'file:/temp/spark-warehouse/new_inventory.db'; diff --git a/test/parser/spark/syntax/fixtures/alertTable.sql b/test/parser/spark/syntax/fixtures/alertTable.sql new file mode 100644 index 0000000..083cfb2 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/alertTable.sql @@ -0,0 +1,80 @@ +-- Syntax RENAME +-- ALTER TABLE table_identifier RENAME TO table_identifier +ALTER TABLE Student RENAME TO StudentInfo; +-- ALTER TABLE table_identifier partition_spec RENAME TO partition_spec +ALTER TABLE default.StudentInfo PARTITION (age='10') RENAME TO PARTITION (age='15'); +ALTER TABLE default.StudentInfo PARTITION (age=10) RENAME TO PARTITION (age=12.323); + + +-- Syntax ADD COLUMNS +-- ALTER TABLE table_identifier ADD COLUMNS ( col_spec [ , ... ] ) +ALTER TABLE StudentInfo ADD COLUMNS (LastName string, DOB timestamp); + + +-- Syntax DROP COLUMNS +-- ALTER TABLE table_identifier DROP { COLUMN | COLUMNS } [ ( ] col_name [ , ... ] [ ) ] +ALTER TABLE StudentInfo DROP columns (LastName, DOB); + + +-- Syntax RENAME COLUMN +-- ALTER TABLE table_identifier RENAME COLUMN col_name TO col_name +ALTER TABLE StudentInfo RENAME COLUMN name TO FirstName; + + +-- Syntax ALTER OR CHANGE COLUMN +-- ALTER TABLE table_identifier { ALTER | CHANGE } [ COLUMN ] col_name alterColumnAction +ALTER TABLE StudentInfo ALTER COLUMN FirstName COMMENT "new comment"; +ALTER TABLE StudentInfo CHANGE COLUMN FirstName COMMENT "new comment"; +ALTER TABLE StudentInfo ALTER FirstName COMMENT "new comment"; +ALTER TABLE StudentInfo CHANGE FirstName COMMENT "new comment"; + + +-- Syntax REPLACE COLUMNS +-- ALTER TABLE table_identifier [ partition_spec ] REPLACE COLUMNS [ ( ] qualified_col_type_with_position_list [ ) ] +ALTER TABLE StudentInfo REPLACE COLUMNS (name string, ID int COMMENT 'new comment'); +ALTER TABLE StudentInfo REPLACE COLUMNS name string, ID int COMMENT 'new comment'; + + +-- Syntax ADD PARTITION +-- ALTER TABLE table_identifier ADD [IF NOT EXISTS] ( partition_spec [ partition_spec ... ] ) +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18); +ALTER TABLE StudentInfo ADD PARTITION (age=18); +-- Adding multiple partitions to the table +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18) PARTITION (age=20); +ALTER TABLE StudentInfo ADD PARTITION (age=18) PARTITION (age=20); + +-- Syntax DROP PARTITION +-- ALTER TABLE table_identifier DROP [ IF EXISTS ] partition_spec [PURGE] +ALTER TABLE StudentInfo DROP IF EXISTS PARTITION (age=18); +ALTER TABLE StudentInfo DROP PARTITION (age=18); + +-- Syntax SET TABLE PROPERTIES +-- ALTER TABLE table_identifier SET TBLPROPERTIES ( key1 = val1, key2 = val2, ... ) +ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('comment' = 'A table comment.'); + +-- Unset Table Properties +-- ALTER TABLE table_identifier UNSET TBLPROPERTIES [ IF EXISTS ] ( key1, key2, ... ) +ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES IF EXISTS ('winner'); +ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('winner'); + + +-- Syntax SET SERDE +-- ALTER TABLE table_identifier [ partition_spec ] SET SERDEPROPERTIES ( key1 = val1, key2 = val2, ... ) +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +-- ALTER TABLE table_identifier [ partition_spec ] SET SERDE serde_class_name [ WITH SERDEPROPERTIES ( key1 = val1, key2 = val2, ... ) ] +ALTER TABLE dbx.tab1 SET SERDE 'org.apache.hadoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee') + + +-- Syntax SET LOCATION And SET FILE FORMAT +-- ALTER TABLE table_identifier [ partition_spec ] SET FILEFORMAT file_format +ALTER TABLE loc_orc SET FILEFORMAT orc; +ALTER TABLE p1 partition (month=2, day=2) SET FILEFORMAT parquet; + +-- ALTER TABLE table_identifier [ partition_spec ] SET LOCATION 'new_location' +ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways' +ALTER TABLE dbx.tab1 SET LOCATION '/path/to/part/ways' + + +-- Syntax RECOVER PARTITIONS +-- ALTER TABLE table_identifier RECOVER PARTITIONS +ALTER TABLE dbx.tab1 RECOVER PARTITIONS; diff --git a/test/parser/spark/syntax/fixtures/alertView.sql b/test/parser/spark/syntax/fixtures/alertView.sql new file mode 100644 index 0000000..24d1bbb --- /dev/null +++ b/test/parser/spark/syntax/fixtures/alertView.sql @@ -0,0 +1,22 @@ +-- Syntax RENAME View +-- ALTER VIEW view_identifier RENAME TO view_identifier +ALTER VIEW tempdb1.v1 RENAME TO tempdb1.v2; + + +-- Syntax SET View Properties +-- ALTER VIEW view_identifier SET TBLPROPERTIES ( property_key = property_val [ , ... ] ) +ALTER VIEW tempdb1.v2 SET TBLPROPERTIES ('created.by.user' = "John", 'created.date' = '01-01-2001' ); + + +-- Syntax UNSET View Properties +-- ALTER VIEW view_identifier UNSET TBLPROPERTIES [ IF EXISTS ] ( property_key [ , ... ] ) +ALTER VIEW tempdb1.v2 UNSET TBLPROPERTIES IF EXISTS ('created.by.user', 'created.date'); +ALTER VIEW tempdb1.v2 UNSET TBLPROPERTIES ('created.by.user', 'created.date'); + + +-- Syntax ALTER View AS SELECT +-- ALTER VIEW view_identifier AS select_statement +ALTER VIEW tempdb1.v2 AS SELECT * FROM tempdb1.v1; + + + diff --git a/test/parser/spark/syntax/fixtures/createDataSourceTable.sql b/test/parser/spark/syntax/fixtures/createDataSourceTable.sql new file mode 100644 index 0000000..3e0f314 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createDataSourceTable.sql @@ -0,0 +1,68 @@ +-- Syntax +-- CREATE TABLE [ IF NOT EXISTS ] table_identifier +-- [ ( col_name1 col_type1 [ COMMENT col_comment1 ], ... ) ] +-- USING data_source +-- [ OPTIONS ( key1=val1, key2=val2, ... ) ] +-- [ PARTITIONED BY ( col_name1, col_name2, ... ) ] +-- [ CLUSTERED BY ( col_name3, col_name4, ... ) +-- [ SORTED BY ( col_name [ ASC | DESC ], ... ) ] +-- INTO num_buckets BUCKETS ] +-- [ LOCATION path ] +-- [ COMMENT table_comment ] +-- [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ] +-- [ AS select_statement ] + + +--Use data source +CREATE TABLE student (id INT, name STRING, age INT) USING CSV; +CREATE TABLE IF NOT EXISTS student (id INT, name STRING, age INT) USING CSV; + +--Use data from another table +CREATE TABLE student_copy USING CSV AS SELECT * FROM student; + +--Omit the USING clause, which uses the default data source (parquet by default) +CREATE TABLE student (id INT, name STRING, age INT); + +--Use parquet data source with parquet storage options +--The columns 'id' and 'name' enable the bloom filter during writing parquet file, +--column 'age' does not enable +CREATE TABLE student_parquet(id INT, name STRING, age INT) USING PARQUET + OPTIONS ( + 'parquet.bloom.filter.enabled'='true', + 'parquet.bloom.filter.enabled#age'='false' + ); + +--Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) USING CSV + LOCATION 'file:/temp/spark-warehouse/new_inventory.db' + COMMENT 'this is a comment' + TBLPROPERTIES ('foo'='bar'); + +--Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) USING CSV + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; + +--Create partitioned and bucketed table +CREATE TABLE student (id INT, name STRING, age INT) + USING CSV + PARTITIONED BY (age) + CLUSTERED BY (Id) + SORTED BY (Id ASC) INTO 4 buckets; + +--Create partitioned and bucketed table through CTAS +CREATE TABLE student_partition_bucket + USING parquet + PARTITIONED BY (age) + CLUSTERED BY (id) INTO 4 buckets + AS SELECT * FROM student; + +--Create bucketed table through CTAS and CTE +CREATE TABLE student_bucket + USING parquet + CLUSTERED BY (id) INTO 4 buckets ( + WITH tmpTable AS ( + SELECT * FROM student WHERE id > 100 + ) + SELECT * FROM tmpTable +); diff --git a/test/parser/spark/syntax/fixtures/createDatabase.sql b/test/parser/spark/syntax/fixtures/createDatabase.sql new file mode 100644 index 0000000..ff2a2f0 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createDatabase.sql @@ -0,0 +1,23 @@ +-- Syntax +-- CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name [ COMMENT database_comment ] [ LOCATION database_directory ] [ WITH DBPROPERTIES ( property_name = property_value [ , ... ] ) ] + +CREATE DATABASE IF NOT EXISTS customer_db; +CREATE DATABASE customer_db; + +CREATE SCHEMA IF NOT EXISTS customer_db; +CREATE SCHEMA customer_db; + + +CREATE DATABASE IF NOT EXISTS customer_db COMMENT 'This is customer database' LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE DATABASE IF NOT EXISTS customer_db LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE DATABASE IF NOT EXISTS customer_db WITH DBPROPERTIES (ID=001, Name='John'); +CREATE DATABASE customer_db COMMENT 'This is customer database' LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE DATABASE customer_db LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE DATABASE customer_db WITH DBPROPERTIES (ID=001, Name='John'); + +CREATE SCHEMA IF NOT EXISTS customer_db COMMENT 'This is customer database' LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE SCHEMA IF NOT EXISTS customer_db LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE SCHEMA IF NOT EXISTS customer_db WITH DBPROPERTIES (ID=001, Name='John'); +CREATE SCHEMA customer_db COMMENT 'This is customer database' LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE SCHEMA customer_db LOCATION '/user' WITH DBPROPERTIES (ID=001, Name='John'); +CREATE SCHEMA customer_db WITH DBPROPERTIES (ID=001, Name='John'); diff --git a/test/parser/spark/syntax/fixtures/createFunction.sql b/test/parser/spark/syntax/fixtures/createFunction.sql new file mode 100644 index 0000000..5f124c7 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createFunction.sql @@ -0,0 +1,19 @@ +-- Syntax +-- CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ] function_name AS class_name [ resource_locations ] + +CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS simple_udf AS 'SimpleUdfR' USING JAR '/tmp/SimpleUdfR.jar'; +CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS simple_udf AS 'SimpleUdfR'; + +CREATE OR REPLACE FUNCTION IF NOT EXISTS simple_udf AS 'SimpleUdfR'; +CREATE TEMPORARY FUNCTION IF NOT EXISTS simple_udf AS 'SimpleUdfR'; +CREATE FUNCTION IF NOT EXISTS simple_udf AS 'SimpleUdfR'; + +CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR'; +CREATE TEMPORARY FUNCTION simple_udf AS 'SimpleUdfR'; +CREATE FUNCTION simple_udf AS 'SimpleUdfR'; + + +CREATE FUNCTION simple_udf AS 'SimpleUdf' USING JAR '/tmp/SimpleUdf.jar'; + +CREATE TEMPORARY FUNCTION simple_temp_udf AS 'SimpleUdf' USING JAR '/tmp/SimpleUdf.jar'; + diff --git a/test/parser/spark/syntax/fixtures/createHiveFormatTable.sql b/test/parser/spark/syntax/fixtures/createHiveFormatTable.sql new file mode 100644 index 0000000..413ed17 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createHiveFormatTable.sql @@ -0,0 +1,101 @@ +-- Syntax +-- CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier +-- [ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ] +-- [ COMMENT table_comment ] +-- [ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... ) +-- | ( col_name1, col_name2, ... ) ] +-- [ CLUSTERED BY ( col_name1, col_name2, ...) +-- [ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ] +-- INTO num_buckets BUCKETS ] +-- [ ROW FORMAT row_format ] +-- [ STORED AS file_format ] +-- [ LOCATION path ] +-- [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ] +-- [ AS select_statement ] + + +--Use hive format +CREATE TABLE student (id INT, name STRING, age INT) STORED AS ORC; + +--Use data from another table +CREATE TABLE student_copy STORED AS ORC + AS SELECT * FROM student; + +--Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) + COMMENT 'this is a comment' + STORED AS ORC + TBLPROPERTIES ('foo'='bar'); + +--Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) + STORED AS ORC + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; + +--Create partitioned table +CREATE TABLE student (id INT, name STRING) + PARTITIONED BY (age INT) + STORED AS ORC; + +--Create partitioned table with different clauses order +CREATE TABLE student (id INT, name STRING) + STORED AS ORC + PARTITIONED BY (age INT); + +--Use Row Format and file format +CREATE TABLE IF NOT EXISTS student (id INT, name STRING) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + STORED AS TEXTFILE; + +--Use complex datatype +CREATE EXTERNAL TABLE family( + name STRING, + friends ARRAY, + children MAP, + address STRUCT + ) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ESCAPED BY '\\' + COLLECTION ITEMS TERMINATED BY '_' + MAP KEYS TERMINATED BY ':' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'foonull' + STORED AS TEXTFILE + LOCATION '/tmp/family/'; + +--Use predefined custom SerDe +CREATE TABLE avroExample + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", + "name": "first_schema", + "type": "record", + "fields": [ + { "name":"string1", "type":"string" }, + { "name":"string2", "type":"string" } + ] }'); + +--Use personalized custom SerDe(we may need to `ADD JAR xxx.jar` first to ensure we can find the serde_class, +--or you may run into `CLASSNOTFOUND` exception) +ADD JAR /tmp/hive_serde_example.jar; + +CREATE EXTERNAL TABLE family (id INT, name STRING) + ROW FORMAT SERDE 'com.ly.spark.serde.SerDeExample' + STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' + OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' + LOCATION '/tmp/family/'; + +--Use `CLUSTERED BY` clause to create bucket table without `SORTED BY` +CREATE TABLE clustered_by_test1 (ID INT, AGE STRING) + CLUSTERED BY (ID) + INTO 4 BUCKETS + STORED AS ORC + +--Use `CLUSTERED BY` clause to create bucket table with `SORTED BY` +CREATE TABLE clustered_by_test2 (ID INT, NAME STRING) + PARTITIONED BY (YEAR STRING) + CLUSTERED BY (ID, NAME) + SORTED BY (ID ASC) + INTO 3 BUCKETS + STORED AS PARQUET diff --git a/test/parser/spark/syntax/fixtures/createTableLike.sql b/test/parser/spark/syntax/fixtures/createTableLike.sql new file mode 100644 index 0000000..366bd6e --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createTableLike.sql @@ -0,0 +1,24 @@ +-- Syntax +-- CREATE TABLE [IF NOT EXISTS] table_identifier LIKE source_table_identifier +-- USING data_source +-- [ ROW FORMAT row_format ] +-- [ STORED AS file_format ] +-- [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ] +-- [ LOCATION path ] + + +-- Create table using an existing table +CREATE TABLE Student_Duple like Student; +CREATE TABLE IF NOT EXISTS Student_Duple like Student; + +-- Create table like using a data source +CREATE TABLE Student_Duple like Student USING CSV; + +-- Table is created as external table at the location specified +CREATE TABLE Student_Duple like Student location '/root1/home'; + +-- Create table like using a rowformat +CREATE TABLE Student_Duple like Student + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + STORED AS TEXTFILE + TBLPROPERTIES ('owner'='xxxx'); diff --git a/test/parser/spark/syntax/fixtures/createView.sql b/test/parser/spark/syntax/fixtures/createView.sql new file mode 100644 index 0000000..a1f3cdd --- /dev/null +++ b/test/parser/spark/syntax/fixtures/createView.sql @@ -0,0 +1,10 @@ +-- Syntax +-- CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_identifier create_view_clauses AS query + +CREATE OR REPLACE VIEW experienced_employee (ID COMMENT 'Unique identification number', Name) COMMENT 'View for experienced employees' AS SELECT id, name FROM all_employee WHERE working_years > 5; +CREATE VIEW experienced_employee (ID COMMENT 'Unique identification number', Name) COMMENT 'View for experienced employees' AS SELECT id, name FROM all_employee WHERE working_years > 5; + +CREATE OR REPLACE GLOBAL TEMPORARY VIEW IF NOT EXISTS subscribed_movies AS SELECT mo.member_id, mb.full_name, mo.movie_title FROM movies AS mo INNER JOIN members AS mb ON mo.member_id = mb.id; +CREATE OR REPLACE GLOBAL TEMPORARY VIEW subscribed_movies AS SELECT mo.member_id, mb.full_name, mo.movie_title FROM movies AS mo INNER JOIN members AS mb ON mo.member_id = mb.id; +CREATE GLOBAL TEMPORARY VIEW IF NOT EXISTS subscribed_movies AS SELECT mo.member_id, mb.full_name, mo.movie_title FROM movies AS mo INNER JOIN members AS mb ON mo.member_id = mb.id; +CREATE GLOBAL TEMPORARY VIEW subscribed_movies AS SELECT mo.member_id, mb.full_name, mo.movie_title FROM movies AS mo INNER JOIN members AS mb ON mo.member_id = mb.id; diff --git a/test/parser/spark/syntax/fixtures/dropDatabase.sql b/test/parser/spark/syntax/fixtures/dropDatabase.sql new file mode 100644 index 0000000..5725baa --- /dev/null +++ b/test/parser/spark/syntax/fixtures/dropDatabase.sql @@ -0,0 +1,14 @@ +-- Syntax +-- DROP { DATABASE | SCHEMA } [ IF EXISTS ] dbname [ RESTRICT | CASCADE ] + + +CREATE DATABASE inventory_db COMMENT 'This database is used to maintain Inventory'; +CREATE SCHEMA inventory_db COMMENT 'This database is used to maintain Inventory'; + +DROP DATABASE inventory_db CASCADE; +DROP SCHEMA inventory_db CASCADE; + +DROP DATABASE IF EXISTS inventory_db CASCADE; +DROP SCHEMA IF EXISTS inventory_db CASCADE; +DROP DATABASE inventory_db RESTRICT; +DROP SCHEMA inventory_db RESTRICT; diff --git a/test/parser/spark/syntax/fixtures/dropFunction.sql b/test/parser/spark/syntax/fixtures/dropFunction.sql new file mode 100644 index 0000000..e600b0b --- /dev/null +++ b/test/parser/spark/syntax/fixtures/dropFunction.sql @@ -0,0 +1,11 @@ +-- Syntax +-- DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] function_name + + +DROP FUNCTION test_avg; +DROP TEMPORARY FUNCTION test_avg; + +DROP TEMPORARY FUNCTION IF EXISTS test_avg; +DROP TEMPORARY FUNCTION test_avg; + + diff --git a/test/parser/spark/syntax/fixtures/dropTable.sql b/test/parser/spark/syntax/fixtures/dropTable.sql new file mode 100644 index 0000000..1e4a2fe --- /dev/null +++ b/test/parser/spark/syntax/fixtures/dropTable.sql @@ -0,0 +1,10 @@ +-- Syntax +-- DROP TABLE [ IF EXISTS ] table_identifier [ PURGE ] + +DROP TABLE userDB.employable; + +DROP TABLE IF EXISTS employable; +DROP TABLE employable; + +DROP TABLE IF EXISTS employable PURGE; +DROP TABLE employable PURGE; diff --git a/test/parser/spark/syntax/fixtures/dropView.sql b/test/parser/spark/syntax/fixtures/dropView.sql new file mode 100644 index 0000000..952bfa1 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/dropView.sql @@ -0,0 +1,9 @@ +-- Syntax +-- DROP VIEW [ IF EXISTS ] view_identifier + + +DROP VIEW employeeView; + +DROP VIEW userDB.employeeView; + +DROP VIEW IF EXISTS employeeView; diff --git a/test/parser/spark/syntax/fixtures/repairTable.sql b/test/parser/spark/syntax/fixtures/repairTable.sql new file mode 100644 index 0000000..7df97f7 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/repairTable.sql @@ -0,0 +1,14 @@ +-- Syntax +-- [MSCK] REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS] + + +REPAIR TABLE t1; +MSCK REPAIR TABLE t1; + +REPAIR TABLE t1 ADD PARTITIONS; +REPAIR TABLE t1 DROP PARTITIONS; +REPAIR TABLE t1 SYNC PARTITIONS; + +MSCK REPAIR TABLE t1 ADD PARTITIONS; +MSCK REPAIR TABLE t1 DROP PARTITIONS; +MSCK REPAIR TABLE t1 SYNC PARTITIONS; diff --git a/test/parser/spark/syntax/fixtures/truncateTable.sql b/test/parser/spark/syntax/fixtures/truncateTable.sql new file mode 100644 index 0000000..e046e8d --- /dev/null +++ b/test/parser/spark/syntax/fixtures/truncateTable.sql @@ -0,0 +1,6 @@ +-- Syntax +-- TRUNCATE TABLE table_identifier [ partition_spec ] + +TRUNCATE TABLE Student partition(age=10); + +TRUNCATE TABLE Student; diff --git a/test/parser/spark/syntax/fixtures/useDatabase.sql b/test/parser/spark/syntax/fixtures/useDatabase.sql new file mode 100644 index 0000000..7e284a3 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/useDatabase.sql @@ -0,0 +1,4 @@ +-- Syntax +-- USE database_name + +USE userDB; diff --git a/test/parser/spark/syntax/table.test.ts b/test/parser/spark/syntax/table.test.ts new file mode 100644 index 0000000..513d8a4 --- /dev/null +++ b/test/parser/spark/syntax/table.test.ts @@ -0,0 +1,23 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + createDataSourceTable: readSQL(__dirname, 'createDataSourceTable.sql'), + createHiveFormatTable: readSQL(__dirname, 'createHiveFormatTable.sql'), + createTableLike: readSQL(__dirname, 'createTableLike.sql'), + + repairTable: readSQL(__dirname, 'repairTable.sql'), + truncateTable: readSQL(__dirname, 'truncateTable.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +}); diff --git a/test/parser/spark/syntax/useDatabase.test.ts b/test/parser/spark/syntax/useDatabase.test.ts new file mode 100644 index 0000000..23c7233 --- /dev/null +++ b/test/parser/spark/syntax/useDatabase.test.ts @@ -0,0 +1,18 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + useDatabase: readSQL(__dirname, 'useDatabase.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +});