feat: support impala (#184)

* feat(impala): add impala sqlLexer

* feat(impala): add impala grammar

* feat(impala): add alter table sql

* feat(impala): update alter table sql

* feat(impala): add alter db sql

* feat(impala): add alter view sql

* feat(impala): add compute stats/comment statement and update partition_desc for alter table

* feat(impala): add drop statement sql

* feat(impala): add revoke and grant sql

* feat(impala): add create db/function/role/view sql

* feat(impala): add describe/explain/invalidata_metadata/load_data sql

* feat(impala): add refresh/set/shutdown sql

* feat(impala): add truncate_table/use/values sql

* fix(impala): update shutdown and invaliddate_metadata

* feat(impala): add show/update/upsert sql

* feat(impala): add create/insert sql

* feat(impala): add select and delete sql

* feat(impala): add impala tokens and fix todo

* feat(impala): update impalaparser and some test unit

* feat(impala): add syntax suggestion

* feat(impala): add syntax suggestion

* feat(impala): update test unit

* feat(impala): remove reference

* fix(impala): add statement for sqlname and collect tableName

* fix(impala): fix syntax suggestion unit test

* fix(impala): update syntax suggestion and collect column

* feat(impala): add collect column create
This commit is contained in:
霜序
2023-11-28 21:11:07 +08:00
committed by GitHub
parent db05cb3e4f
commit e203f1a48a
68 changed files with 38979 additions and 4 deletions

View File

@ -0,0 +1,2 @@
-- ALTER DATABASE database_name SET OWNER USER user_name;
ALTER DATABASE my_db SET OWNER USER 'impala';

View File

@ -0,0 +1,133 @@
-- ALTER TABLE [old_db_name.]old_table_name RENAME TO [new_db_name.]new_table_name
ALTER TABLE old_table_name RENAME TO new_table_name;
ALTER TABLE old_db_name.old_table_name RENAME TO new_table_name;
ALTER TABLE old_table_name RENAME TO new_db_name.new_table_name;
ALTER TABLE old_db_name.old_table_name RENAME TO new_db_name.new_table_name;
-- ALTER TABLE name ADD [IF NOT EXISTS] COLUMNS (col_spec[, col_spec ...])
ALTER TABLE my_table ADD COLUMNS (id INT COMMENT 'Identifier', name STRING COMMENT 'Name');
ALTER TABLE my_table ADD IF NOT EXISTS COLUMNS (age INT COMMENT 'Age');
ALTER TABLE my_table ADD COLUMNS (email STRING COMMENT 'Email');
-- ALTER TABLE name REPLACE COLUMNS (col_spec[, col_spec ...])
ALTER TABLE my_table REPLACE COLUMNS (age INT COMMENT 'Updated Age');
ALTER TABLE my_table REPLACE COLUMNS (email STRING COMMENT 'Updated Email', address STRING COMMENT 'Updated Address');
-- ALTER TABLE name ADD COLUMN [IF NOT EXISTS] col_spec
ALTER TABLE my_table ADD COLUMN age INT COMMENT 'Updated Age';
ALTER TABLE my_table ADD COLUMN IF NOT EXISTS age INT COMMENT 'Updated Age';
-- ALTER TABLE name DROP [COLUMN] column_name
ALTER TABLE my_table DROP COLUMN column_name;
ALTER TABLE my_table DROP column_name;
-- ALTER TABLE name CHANGE column_name col_spec
ALTER TABLE my_table CHANGE COLUMN age INT COMMENT 'Updated Age';
-- ALTER TABLE name SET OWNER USER user_name
ALTER TABLE my_table SET OWNER USER user_name;
/* Kudu tables only.
ALTER TABLE name ALTER [COLUMN] column_name
{ SET kudu_storage_attr attr_value
| DROP DEFAULT } */
ALTER TABLE my_table ALTER COLUMN size SET BLOCK_SIZE 1024 ;
ALTER TABLE my_table ALTER COLUMN id SET DEFAULT 0 ;
ALTER TABLE my_table ALTER COLUMN name SET ENCODING 'plain' ;
ALTER TABLE my_table ALTER COLUMN address SET COMPRESSION 'lz4';
ALTER TABLE my_table ALTER COLUMN id DROP DEFAULT;
/* Non-Kudu tables only.
ALTER TABLE name ALTER [COLUMN] column_name
SET COMMENT 'comment_text' */
ALTER TABLE my_table ALTER age SET COMMENT '年龄';
ALTER TABLE my_table ALTER COLUMN age SET COMMENT '年龄';
/* ALTER TABLE name ADD [IF NOT EXISTS] PARTITION (partition_spec)
[location_spec]
[cache_spec] */
ALTER TABLE my_table ADD PARTITION (date = '2023-01-01');
ALTER TABLE my_table ADD IF NOT EXISTS PARTITION (date = '2023-01-01');
ALTER TABLE my_table ADD PARTITION (country = 'USA') LOCATION '/path/to/partition';
ALTER TABLE my_table ADD IF NOT EXISTS PARTITION (category = 'Books') LOCATION '/path/to/books_partition';
ALTER TABLE my_table ADD PARTITION (date = '2023-01-01') CACHED IN 'pool_name' WITH REPLICATION = 3;
ALTER TABLE my_table ADD IF NOT EXISTS PARTITION (date = '2023-01-01') CACHED IN 'pool_name' WITH REPLICATION = 3;
ALTER TABLE my_table ADD PARTITION (country = 'USA') UNCACHED;
ALTER TABLE my_table ADD IF NOT EXISTS PARTITION (country = 'USA') UNCACHED;
-- ALTER TABLE name ADD [IF NOT EXISTS] RANGE PARTITION kudu_partition_spec
ALTER TABLE my_table ADD RANGE PARTITION VALUE = 100;
ALTER TABLE my_table ADD IF NOT EXISTS RANGE PARTITION 10 < VALUES < 20;
/* ALTER TABLE name DROP [IF EXISTS] PARTITION (partition_spec)
[PURGE] */
ALTER TABLE my_table DROP PARTITION (date = '2023-01-01');
ALTER TABLE my_table DROP IF EXISTS PARTITION (name = 'impala');
-- ALTER TABLE name DROP [IF EXISTS] RANGE PARTITION kudu_partition_spec
ALTER TABLE my_table DROP RANGE PARTITION VALUE != 100;
ALTER TABLE my_table DROP IF EXISTS RANGE PARTITION 10 >= VALUES < 20;
-- ALTER TABLE name RECOVER PARTITIONS
ALTER TABLE my_table RECOVER PARTITIONS
/* ALTER TABLE name [PARTITION (partition_spec)]
SET { FILEFORMAT file_format
| ROW FORMAT row_format
| LOCATION 'hdfs_path_of_directory'
| TBLPROPERTIES (table_properties)
| SERDEPROPERTIES (serde_properties) } */
ALTER TABLE my_table SET FILEFORMAT parquet ;
ALTER TABLE my_table PARTITION (date = '2023-01-01') SET FILEFORMAT orc;
ALTER TABLE my_table SET ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
ALTER TABLE my_table SET LOCATION '/path/to/new_location';
ALTER TABLE my_table PARTITION (region = 'Europe') SET LOCATION '/path/to/europe_partition';
ALTER TABLE my_table SET TBLPROPERTIES ('comment' = 'This is a table');
ALTER TABLE my_table PARTITION (category = 'Books') SET TBLPROPERTIES ('compression' = 'gzip');
ALTER TABLE my_table SET SERDEPROPERTIES ('field.delim' = '|');
ALTER TABLE my_table PARTITION (date = '2023-01-01') SET SERDEPROPERTIES ('serialization.null.format' = '\N');
/* ALTER TABLE name colname
('statsKey'='val', ...) */
alter table t1 set column stats x ('numDVs'='2','numNulls'='0');
alter table t1 set column stats s ('numdvs'='3','maxsize'='4');
-- ALTER TABLE name [PARTITION (partition_spec)] SET { CACHED IN 'pool_name' [WITH REPLICATION = integer] | UNCACHED }
ALTER TABLE my_table SET CACHED IN 'pool_name';
ALTER TABLE my_table PARTITION (date = '2023-01-01') SET CACHED IN 'pool_name';
ALTER TABLE my_table SET CACHED IN 'pool_name' WITH REPLICATION = 3 ;
ALTER TABLE my_table PARTITION (date = '2023-01-01') SET CACHED IN 'pool_name' WITH REPLICATION = 2;
ALTER TABLE my_table SET UNCACHED;
ALTER TABLE my_table PARTITION (date = '2023-01-01') SET UNCACHED;
-- example
alter table historical_data drop partition (year < 1995);
alter table historical_data drop partition (year < 1995, last_name like 'A%');
alter table historical_data drop partition (year = 1996 and month between 1 and 6);
alter table fast_growing_data partition (year = 2016, month in (10,11,12)) set fileformat parquet;
alter table d1.mobile rename to mobile;
alter table d2.mobile rename to d3.mobile;
alter table p1 add partition (month=1, day=1);
alter table p1 add partition (month=1, day=2);
alter table p1 add partition (month=2, day=1);
alter table p1 add partition (month=2, day=2);
alter table p1 partition (month=1, day=1) set location '/usr/external_data/new_years_day';
alter table t1 add partition (yy = 2016, mm = 2);
alter table t1 add partition (yy = 2016, mm = 3);
alter table t1 recover partitions;
alter table analysis_data set tblproperties('numRows'='1001000000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
alter table partitioned_data partition(year=2009, month=4) set tblproperties ('numRows'='30000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
alter table partitioned_data set tblproperties ('numRows'='1030000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
alter table t1 set column stats x ('numDVs'='2','numNulls'='0');
alter table t1 set column stats s ('numdvs'='3','maxsize'='4');
alter table optional_columns drop column a2;
alter table optional_columns drop column a1;
alter table p1 partition (month=2, day=2) set fileformat parquet;
alter table partition_t add partition (y=2000);
ALTER TABLE t1 ADD COLUMNS (y STRING ENCODING prefix_encoding);
ALTER TABLE t1 ADD COLUMNS (z INT DEFAULT 10);
ALTER TABLE t1 ADD COLUMNS (a STRING NOT NULL DEFAULT '', t TIMESTAMP COMPRESSION default_compression);
alter table kt alter column s set encoding prefix_encoding;
alter table kt alter x set block_size 2048;
alter table kt alter column t set compression zlib;

View File

@ -0,0 +1,43 @@
-- ALTER VIEW [database_name.]view_name [(column_name [COMMENT 'column_comment'][, ...])] AS select_statement;
ALTER VIEW my_view
AS SELECT column_name1, column_name2, column_name3 FROM table_name;
ALTER VIEW my_view
(column_name1 COMMENT 'Column 1', column_name2, column_name3 COMMENT 'Column 3')
AS SELECT column_name1, column_name2, column_name3 FROM table_name;
ALTER VIEW db1.my_view
(column_name1 COMMENT 'Column 1', column_name2, column_name3 COMMENT 'Column 3')
AS SELECT column_name1, column_name2, column_name3 FROM table_name;
ALTER VIEW my_view
(col1 COMMENT 'Description for Column 1', col2, col3 COMMENT 'Description for Column 3')
AS SELECT col1, col2, col3 FROM my_table;
-- ALTER VIEW [database_name.]view_name RENAME TO [database_name.]view_name;
ALTER VIEW db1.v1 RENAME TO db2.v2;
ALTER VIEW v1 RENAME TO db2.v2;
ALTER VIEW v1 RENAME TO v2;
ALTER VIEW db1.v1 RENAME TO v2;
-- ALTER VIEW [database_name.]view_name SET OWNER USER user_name;
ALTER VIEW my_view SET OWNER USER 'name';
ALTER VIEW db.my_view SET OWNER USER 'name';
-- ALTER VIEW [database_name.]view_name SET TBLPROPERTIES ('name' = 'value'[, 'name' = 'value' ...]);
ALTER VIEW v1 SET TBLPROPERTIES ('tblp1' = '1', 'tblp2' = '2', 'tblp3' = '3');
ALTER VIEW db.v1 SET TBLPROPERTIES ('tblp1' = '1', 'tblp2' = '2', 'tblp3' = '3');
ALTER VIEW db.v1 SET TBLPROPERTIES ('tblp1' = '1');
-- ALTER VIEW [database_name.]view_name UNSET TBLPROPERTIES ('name'[, ...]);
ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2');
ALTER VIEW db.v1 UNSET TBLPROPERTIES ('tblp1');
ALTER VIEW db.v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2', 'tblp3');
-- example
ALTER VIEW v1 AS SELECT x, UPPER(s) s FROM t2;
ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) s FROM t2;
ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1;
ALTER VIEW db1.v1 RENAME TO db2.v2;
ALTER VIEW db1.v1 RENAME TO db1.v2;
ALTER VIEW db1.v1 RENAME TO db2.v1;
ALTER VIEW v1 SET TBLPROPERTIES ('tblp1' = '1', 'tblp2' = '2');
ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2');

View File

@ -0,0 +1,15 @@
-- COMMENT ON DATABASE db_name IS {'comment' | NULL}
COMMENT ON DATABASE my_database IS 'This is my database.';
COMMENT ON DATABASE my_database IS NULL;
-- COMMENT ON TABLE [db_name.]table_name IS {'comment' | NULL}
COMMENT ON TABLE my_database.my_table IS 'This is my table.';
COMMENT ON TABLE my_database.my_table IS NULL;
COMMENT ON TABLE my_table IS 'This is my table.';
COMMENT ON TABLE my_table IS NULL;
-- COMMENT ON COLUMN [db_name.]table_name.column_name IS {'comment' | NULL}
COMMENT ON COLUMN my_database.my_table.age IS 'save for age.';
COMMENT ON COLUMN my_database.my_table.age IS NULL;
COMMENT ON COLUMN my_table.age IS 'This is my table.';
COMMENT ON COLUMN my_table.age IS NULL;

View File

@ -0,0 +1,20 @@
-- COMPUTE STATS [db_name.]table_name [ ( column_list ) ] [TABLESAMPLE SYSTEM(percentage) [REPEATABLE(seed)]]
COMPUTE STATS my_table;
COMPUTE STATS my_table (column1, column2);
COMPUTE STATS my_table TABLESAMPLE SYSTEM(20);
COMPUTE STATS my_table (column1, column2) TABLESAMPLE SYSTEM(2) REPEATABLE(456);
COMPUTE STATS my_table TABLESAMPLE SYSTEM(2) REPEATABLE(456);
-- COMPUTE INCREMENTAL STATS [db_name.]table_name [PARTITION (partition_spec)]
COMPUTE INCREMENTAL STATS my_table;
COMPUTE INCREMENTAL STATS my_table PARTITION (date='2023-11-14');
-- example
compute stats t1;
compute incremental stats int_partitions partition (x < 100);
compute incremental stats int_partitions partition (x in (100, 150, 200));
compute incremental stats int_partitions partition (x between 100 and 175);
compute incremental stats int_partitions partition (x in (100, 150, 200) or x < 100);
compute incremental stats int_partitions partition (x != 150);
compute incremental stats item_partitioned;

View File

@ -0,0 +1,17 @@
-- CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name[COMMENT 'database_comment'] [LOCATION hdfs_path];
CREATE DATABASE my_db;
CREATE DATABASE IF NOT EXISTS my_db1;
CREATE DATABASE my_db COMMENT 'my first db';
CREATE DATABASE my_db LOCATION '/path/to/partition';
CREATE DATABASE my_db COMMENT 'my first db' LOCATION '/path/to/partition';
CREATE DATABASE IF NOT EXISTS my_db COMMENT 'my first db' LOCATION '/path/to/partition';
CREATE SCHEMA my_schema;
CREATE SCHEMA IF NOT EXISTS my_schema1;
CREATE SCHEMA my_schema COMMENT 'my first schema';
CREATE SCHEMA my_schema LOCATION '/path/to/partition';
CREATE SCHEMA my_schema COMMENT 'my first schema' LOCATION '/path/to/partition';
CREATE SCHEMA IF NOT EXISTS my_schema COMMENT 'my first schema' LOCATION '/path/to/partition';
-- example
create database first_db;

View File

@ -0,0 +1,83 @@
/* CREATE FUNCTION [IF NOT EXISTS] [db_name.]function_name([arg_type[, arg_type...])
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name' */
CREATE FUNCTION function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION db_name.function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION IF NOT EXISTS function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION IF NOT EXISTS db_name.function_name(arg_type1)
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION IF NOT EXISTS db_name.function_name(arg_type1, arg_type2, arg_type3)
RETURNS return_type
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
/* CREATE FUNCTION [IF NOT EXISTS] [db_name.]function_name
LOCATION 'hdfs_path_to_jar'
SYMBOL='class_name' */
CREATE FUNCTION function_name
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION db_name.function_name
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION IF NOT EXISTS function_name
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
CREATE FUNCTION IF NOT EXISTS db_name.function_name
LOCATION 'hdfs_path_to_dot_so'
SYMBOL='symbol_name';
/* CREATE [AGGREGATE] FUNCTION [IF NOT EXISTS] [db_name.]function_name([arg_type[, arg_type...])
RETURNS return_type
[INTERMEDIATE type_spec]
LOCATION 'hdfs_path'
[INIT_FN='function]
UPDATE_FN='function
MERGE_FN='function
[PREPARE_FN='function]
[CLOSEFN='function]
[SERIALIZE_FN='function]
[FINALIZE_FN='function] */
CREATE AGGREGATE FUNCTION function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path'
UPDATE_FN='update_function'
MERGE_FN='merge_function';
CREATE AGGREGATE FUNCTION db_name.function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path'
UPDATE_FN='update_function'
MERGE_FN='merge_function';
CREATE AGGREGATE FUNCTION IF NOT EXISTS function_name(arg_type1, arg_type2)
RETURNS return_type
LOCATION 'hdfs_path'
UPDATE_FN='update_function'
MERGE_FN='merge_function';
CREATE AGGREGATE FUNCTION function_name(arg_type1, arg_type2)
RETURNS return_type
INTERMEDIATE intermediate_type
LOCATION 'hdfs_path'
INIT_FN ='init_function'
UPDATE_FN='update_function'
MERGE_FN='merge_function'
PREPARE_FN = 'prepare_fn'
CLOSEFN = 'closefn'
SERIALIZE_FN = 'serialize_function'
FINALIZE_FN = 'finalize_function';
-- example
create function my_func location '/user/impala/udfs/udf-examples.jar'
symbol='org.apache.impala.TestUdf';

View File

@ -0,0 +1,2 @@
-- CREATE ROLE role_name
CREATE ROLE 'impala';

View File

@ -0,0 +1,181 @@
CREATE EXTERNAL TABLE external_parquet (c1 INT, c2 STRING, c3 TIMESTAMP)
STORED AS PARQUET LOCATION '/user/etl/destination';
create table census (name string, census_year int) partitioned by (year int);
CREATE TABLE census_data (last_name STRING, first_name STRING, state STRING, address STRING)
SORT BY (last_name, state)
STORED AS PARQUET
TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
CREATE TABLE sorted_census_data
SORT BY (last_name, state)
STORED AS PARQUET
AS SELECT last_name, first_name, state, address
FROM unsorted_census_data;
CREATE TABLE yy2 (s STRING, year INT);
CREATE TABLE kudu_no_partition_by_clause
(
id bigint PRIMARY KEY, s STRING, b BOOLEAN
)
STORED AS KUDU;
CREATE TABLE kudu_t1 (id BIGINT PRIMARY key, s STRING, b BOOLEAN)
PARTITION BY HASH (id) PARTITIONS 20 STORED AS KUDU;
CREATE TABLE kudu_t2 (id BIGINT, s STRING, b BOOLEAN, PRIMARY KEY (id,s))
PARTITION BY HASH (s) PARTITIONS 30 STORED AS KUDU;
CREATE TABLE kudu_t3 (id BIGINT, year INT, s STRING,
b BOOLEAN, PRIMARY KEY (id,year))
PARTITION BY HASH (id) PARTITIONS 20,
RANGE (year) (PARTITION 1980 <= VALUES < 1990,
PARTITION 1990 <= VALUES < 2000,
PARTITION VALUE = 2001,
PARTITION 2001 < VALUES < 2003)
STORED AS KUDU;
CREATE EXTERNAL TABLE external_t1 STORED AS KUDU
TBLPROPERTIES ('kudu.table_name'='kudu_tbl_created_via_api');
CREATE EXTERNAL TABLE myextkudutbl (
id int PRIMARY KEY,
name string)
PARTITION BY HASH PARTITIONS 8
STORED AS KUDU
TBLPROPERTIES ('external.table.purge'='true');
CREATE TABLE ctas_t1
PRIMARY KEY (id) PARTITION BY HASH (id) PARTITIONS 10
STORED AS KUDU
AS SELECT id, s FROM kudu_t1;
CREATE TABLE pk_multiple_columns
(
col1 BIGINT,
col2 STRING,
col3 BOOLEAN,
PRIMARY KEY (col1, col2)
) PARTITION BY HASH(col2) PARTITIONS 2 STORED AS KUDU;
CREATE TABLE user.inline_pk_rewritten (
id BIGINT NOT NULL ENCODING AUTO_ENCODING COMPRESSION DEFAULT_COMPRESSION,
s STRING NULL ENCODING AUTO_ENCODING COMPRESSION DEFAULT_COMPRESSION,
PRIMARY KEY (id)
)
PARTITION BY HASH (id) PARTITIONS 2
STORED AS KUDU
TBLPROPERTIES ('kudu.master_addresses'='host.example.com');
CREATE TABLE default_vals
(
id BIGINT PRIMARY KEY,
name STRING NOT NULL DEFAULT 'unknown',
address STRING DEFAULT upper('no fixed address'),
age INT DEFAULT -1,
earthling BOOLEAN DEFAULT TRUE,
planet_of_origin STRING DEFAULT 'Earth',
optional_col STRING DEFAULT NULL
) PARTITION BY HASH(id) PARTITIONS 2 STORED AS KUDU;
CREATE TABLE various_encodings
(
id BIGINT PRIMARY KEY,
c1 BIGINT ENCODING PLAIN_ENCODING,
c2 BIGINT ENCODING AUTO_ENCODING,
c3 TINYINT ENCODING BIT_SHUFFLE,
c4 DOUBLE ENCODING BIT_SHUFFLE,
c5 BOOLEAN ENCODING RLE,
c6 STRING ENCODING DICT_ENCODING,
c7 STRING ENCODING PREFIX_ENCODING
) PARTITION BY HASH(id) PARTITIONS 2 STORED AS KUDU;
CREATE TABLE blog_posts
(
user_id STRING ENCODING DICT_ENCODING,
post_id BIGINT ENCODING BIT_SHUFFLE,
subject STRING ENCODING PLAIN_ENCODING,
body STRING COMPRESSION LZ4,
spanish_translation STRING COMPRESSION SNAPPY,
esperanto_translation STRING COMPRESSION ZLIB,
PRIMARY KEY (user_id, post_id)
) PARTITION BY HASH(user_id, post_id) PARTITIONS 2 STORED AS KUDU;
CREATE TABLE various_encodings
(
id BIGINT PRIMARY KEY,
c1 BIGINT ENCODING PLAIN_ENCODING,
c2 BIGINT ENCODING AUTO_ENCODING,
c3 TINYINT ENCODING BIT_SHUFFLE,
c4 DOUBLE ENCODING BIT_SHUFFLE,
c5 BOOLEAN ENCODING RLE,
c6 STRING ENCODING DICT_ENCODING,
c7 STRING ENCODING PREFIX_ENCODING
) PARTITION BY HASH(id) PARTITIONS 2 STORED AS KUDU;
create table million_rows_one_range (id string primary key, s string)
partition by hash(id) partitions 50,
range (partition 'a' <= values < '{')
stored as kudu;
create table million_rows_two_ranges (id string primary key, s string)
partition by hash(id) partitions 50,
range (partition 'a' <= values < '{', partition 'A' <= values < '[', partition value = '00000')
stored as kudu;
create table million_rows_three_ranges (id string primary key, s string)
partition by hash (school) partitions 10,
range (letter_grade) (partition value = 'A', partition value = 'B',
partition value = 'C', partition value = 'D', partition value = 'F')
stored as kudu;
create table hash_t5 (x bigint, y bigint, s string, primary key (x,y))
partition by hash (x) partitions 10, hash (y) partitions 20, hash (y) partitions 20
stored as kudu;
create table range_t1 (x bigint, s string, s2 string, primary key (x, s))
partition by range (x)
(
partition 0 <= values <= 49, partition 50 <= values <= 100,
partition value < 0, partition 100 < values <= 500,
partition values < 0, partition 100 < values
)
stored as kudu;
create table combined_t1 (x bigint, s string, s2 string, primary key (x, s))
partition by hash (x) partitions 10, range (x)
(
partition 0 <= values <= 49, partition 50 <= values <= 100
)
stored as kudu;
CREATE TABLE t5 AS SELECT upper(y) AS s, x+1 AS a, 'Entirely new column' AS n FROM t1;
CREATE TABLE parquet_version_of_t1 STORED AS PARQUET AS SELECT * FROM t1;
create table partitions_yes partitioned by (year, month)
as select s, year, month from partitions_no;
CREATE TABLE my_first_table (id BIGINT, name STRING, PRIMARY KEY (id)) PARTITION BY HASH PARTITIONS 16 STORED AS KUDU TBLPROPERTIES ('kudu.num_tablet_replicas' = '1');
CREATE TABLE parquet_table (id INT) STORED AS PARQUET;
CREATE TABLE games3 (id BIGINT, play ARRAY < MAP < STRING, BIGINT > >) STORED AS PARQUET;
CREATE TABLE games3 (id BIGINT, play ARRAY < MAP < STRING, BIGINT > >) STORED AS PARQUET;
create table unsorted (x bigint);
CREATE TABLE pk(col1 INT, col2 STRING, PRIMARY KEY(col1, col2));
CREATE TABLE fk(id INT, col1 INT, col2 STRING, PRIMARY KEY(id),
FOREIGN KEY(col1, col2) REFERENCES pk(col1, col2));
CREATE TABLE pk(id INT, PRIMARY KEY(id) DISABLE, NOVALIDATE, RELY);
CREATE TABLE fk(id INT, col1 INT, col2 STRING, PRIMARY KEY(id),
FOREIGN KEY(col1, col2) REFERENCES pk(col1, col2));

View File

@ -0,0 +1,22 @@
/* CREATE VIEW [IF NOT EXISTS] view_name
[(column_name [COMMENT 'column_comment'][, ...])]
[COMMENT 'view_comment']
[TBLPROPERTIES ('name' = 'value'[, ...])]
AS select_statement */
CREATE VIEW my_view AS SELECT * FROM my_table;
CREATE VIEW IF NOT EXISTS my_view AS SELECT * FROM my_table;
CREATE VIEW IF NOT EXISTS my_view (age COMMENT 'this is number col') AS SELECT * FROM my_table;
CREATE VIEW IF NOT EXISTS my_view (age COMMENT 'this is number col') COMMENT 'this is test view' AS SELECT * FROM my_table;
CREATE VIEW my_view (age COMMENT 'this is number col') COMMENT 'this is test view' TBLPROPERTIES ('tblp1' = '1', 'aaa' = '2') AS SELECT * FROM my_table;
CREATE VIEW my_view COMMENT 'this is test view' TBLPROPERTIES ('tblp1' = '1', 'aaa' = '2') AS SELECT * FROM my_table;
CREATE VIEW my_view (age COMMENT 'this is number col', age1 COMMENT 'this is number col') TBLPROPERTIES ('tblp1' = '1') AS SELECT * FROM my_table;
-- example
CREATE VIEW v1 AS SELECT * FROM t1;
CREATE VIEW v2 AS SELECT c1, c3, c7 FROM t1;
CREATE VIEW v3 AS SELECT DISTINCT c1, c3, c7 FROM t1 WHERE c1 IS NOT NULL AND c5 > 0;
CREATE VIEW v4 AS SELECT c4 AS last_name, c6 AS address, c2 AS birth_date FROM t1;
CREATE VIEW v5 AS SELECT c1, CAST(c3 AS STRING) c3, CONCAT(c4,c5) c5, TRIM(c6) c6, "Constant" c8 FROM t1;
CREATE VIEW v6 AS SELECT t1.c1, t2.c2 FROM t1 JOIN t2 ON t1.id = t2.id;
CREATE VIEW v7 (c1 COMMENT 'Comment for c1', c2) COMMENT 'Comment for v7' AS SELECT t1.c1, t1.c2 FROM t1;
CREATE VIEW v7 (c1 , c2) TBLPROPERTIES ('tblp1' = '1') AS SELECT t1.c1, t1.c2 FROM t1;

View File

@ -0,0 +1,42 @@
-- DELETE [FROM] [database_name.]table_name [ WHERE where_conditions ]
DELETE FROM my_database.my_table;
DELETE my_database.my_table;
DELETE my_table;
DELETE FROM my_table;
DELETE FROM my_table WHERE col1 LIKE 'prefix%';
DELETE FROM my_table WHERE col1 IN (SELECT col2 FROM other_table WHERE col3 = 'value1');
-- example
DELETE FROM my_table;
DELETE my_table;
DELETE FROM my_table WHERE c1 = 100;
DELETE FROM my_table WHERE
(c1 > c2 OR c3 IN ('hello','world')) AND c4 IS NOT NULL;
DELETE FROM t1 WHERE
(c1 IN (1,2,3) AND c2 > c3) OR c4 IS NOT NULL;
DELETE FROM time_series WHERE
year = 2016 AND month IN (11,12) AND day > 15;
DELETE FROM t1 WHERE
c5 IN (SELECT DISTINCT other_col FROM other_my_table);
DELETE FROM my_table WHERE 1 = 0;
DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x;
DELETE t1 FROM t1 JOIN t2 ON t1.x = t2.x
WHERE t1.y = FALSE and t2.z > 100;
DELETE t1 FROM my_table t1 JOIN other_table t2 ON t1.x = t2.x;
DELETE t2 FROM non_kudu_non_ice_table t1 JOIN kudu_or_ice_table t2 ON t1.x = t2.x;

View File

@ -0,0 +1,17 @@
-- DESCRIBE [DATABASE] [FORMATTED|EXTENDED] object_name
DESCRIBE my_table;
DESCRIBE DATABASE my_table;
DESCRIBE DATABASE FORMATTED my_table;
DESCRIBE DATABASE EXTENDED my_table;
DESCRIBE FORMATTED my_table;
DESCRIBE EXTENDED my_table;
DESCRIBE FORMATTED my_table;
DESCRIBE EXTENDED my_db_name.my_table.my_col;
DESCRIBE FORMATTED my_db;
-- example
describe my_table;
describe my_database.my_table;
describe formatted my_table;
describe customer.c_orders.item;
describe customer.c_orders.item.o_lineitems;

View File

@ -0,0 +1,13 @@
-- DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT | CASCADE];
DROP DATABASE my_database;
DROP DATABASE IF EXISTS my_database;
DROP DATABASE my_database RESTRICT;
DROP DATABASE IF EXISTS my_database CASCADE;
DROP SCHEMA my_database;
DROP SCHEMA IF EXISTS my_database;
DROP SCHEMA my_database RESTRICT;
DROP SCHEMA IF EXISTS my_database CASCADE;
-- example
drop database temp;
drop database temp cascade;

View File

@ -0,0 +1,16 @@
-- DROP [AGGREGATE] FUNCTION [IF EXISTS] [db_name.]function_name(type[, type...])
DROP FUNCTION my_function(integer, text);
DROP FUNCTION IF EXISTS my_function(integer, boolean);
DROP FUNCTION my_database.my_function(integer);
DROP AGGREGATE FUNCTION my_function(integer, text);
DROP AGGREGATE FUNCTION IF EXISTS my_function(integer, text, binary);
DROP AGGREGATE FUNCTION my_database.my_function(integer);
-- DROP FUNCTION [IF EXISTS] [db_name.]function_name
DROP FUNCTION my_db.my_func;
DROP FUNCTION IF EXISTS my_func;
DROP FUNCTION my_func;
DROP FUNCTION IF EXISTS my_db.my_func;
-- example
drop function my_func;

View File

@ -0,0 +1,2 @@
-- DROP ROLE role_name
DROP ROLE 'impala';

View File

@ -0,0 +1,12 @@
-- DROP STATS [database_name.]table_name
DROP STATS my_table;
DROP STATS my_db.my_table;
-- DROP INCREMENTAL STATS [database_name.]table_name PARTITION (partition_spec)
DROP INCREMENTAL STATS my_table PARTITION (date = "1111-11-11");
DROP INCREMENTAL STATS my_db.my_table PARTITION (year < 1995 and last_name like 'A%');
-- example
drop incremental stats item_partitioned partition (i_category='Sports');
drop incremental stats item_partitioned partition (i_category='Electronics');
drop stats item_partitioned;

View File

@ -0,0 +1,10 @@
-- DROP TABLE [IF EXISTS] [db_name.]table_name [PURGE]
DROP TABLE my_table;
DROP TABLE my_db.my_table;
DROP TABLE IF EXISTS my_table;
DROP TABLE IF EXISTS my_db.my_table;
DROP TABLE IF EXISTS my_db.my_table PURGE;
-- example
drop table unimportant;
drop table my_db.trivial;

View File

@ -0,0 +1,9 @@
-- DROP VIEW [IF EXISTS] [db_name.]view_name
DROP VIEW my_view;
DROP VIEW my_db.my_view;
DROP VIEW IF EXISTS my_view;
DROP VIEW IF EXISTS my_db.my_view;
-- example
drop view unimportant;
drop view my_db.trivial;

View File

@ -0,0 +1,11 @@
-- EXPLAIN { select_query | ctas_stmt | insert_stmt }
EXPLAIN SELECT * FROM my_table;
EXPLAIN CREATE TABLE t1 (x INT, y STRING);
EXPLAIN INSERT INTO t1 VALUES (1, 'one'), (2, 'two'), (3, 'three');
-- example
EXPLAIN SELECT COUNT(*) FROM customer_address;
EXPLAIN SELECT * FROM functional_kudu.alltypestiny WHERE bigint_col < 1000 / 100;
EXPLAIN SELECT x,y from kudu_table WHERE
x = 1 AND y NOT IN (2,3) AND z = 1
AND a IS NOT NULL AND b > 0 AND length(s) > 5;

View File

@ -0,0 +1,35 @@
-- GRANT ROLE role_name TO GROUP group_name
GRANT ROLE role_name TO GROUP group_name
-- GRANT privilege ON object_type object_name TO USER user_name
GRANT ALL ON SERVER my_server TO USER 'impala';
GRANT ALTER ON DATABASE my_database TO USER 'impala';
GRANT CREATE ON TABLE my_table TO USER 'impala';
GRANT DROP ON URI my_uri TO USER 'impala';
GRANT INSERT ON URI my_uri TO USER 'impala';
GRANT REFRESH ON TABLE my_table TO USER 'impala';
GRANT SELECT ON TABLE my_table TO USER 'impala';
GRANT SELECT(column_name) ON TABLE my_table TO USER 'impala';
-- GRANT privilege ON object_type object_name TO GROUP group_name
GRANT ALL ON SERVER my_server TO GROUP 'impala_group';
GRANT ALTER ON DATABASE my_database TO GROUP 'impala_group';
GRANT CREATE ON TABLE my_table TO GROUP 'impala_group';
GRANT DROP ON URI my_uri TO GROUP 'impala_group';
GRANT INSERT ON URI my_uri TO GROUP 'impala_group';
GRANT REFRESH ON TABLE my_table TO GROUP 'impala_group';
GRANT SELECT ON TABLE my_table TO GROUP 'impala_group';
GRANT SELECT(column_name) ON TABLE my_table TO GROUP 'impala_group';
-- GRANT privilege ON object_type object_name TO ROLE group_name
GRANT ALL ON SERVER my_server TO ROLE 'impala_role';
GRANT ALTER ON DATABASE my_database TO ROLE 'impala_role';
GRANT CREATE ON TABLE my_table TO ROLE 'impala_role';
GRANT DROP ON URI my_uri TO ROLE 'impala_role';
GRANT INSERT ON URI my_uri TO ROLE 'impala_role';
GRANT REFRESH ON TABLE my_table TO ROLE 'impala_role';
GRANT SELECT ON TABLE my_table TO ROLE 'impala_role';
GRANT SELECT(column_name) ON TABLE my_table TO ROLE 'impala_role';
-- example
GRANT ALL ON SERVER TO ROLE foo_role;

View File

@ -0,0 +1,199 @@
/* [with_clause]
INSERT [hint_clause] { INTO | OVERWRITE } [TABLE] table_name
[(column_list)]
[ PARTITION (partition_clause)]
{
[hint_clause] select_statement
| VALUES (value [, value ...]) [, (value [, value ...]) ...]
} */
-- INSERT [hint_clause] { INTO | OVERWRITE } table_name [hint_clause] select_statement
INSERT INTO my_table
SELECT
*
from
my_table1;
INSERT OVERWRITE my_table
SELECT
*
from
my_table1;
INSERT
INTO target_table
SELECT
val1,
val2,
val3
FROM
dual
WHERE
NOT EXISTS (
SELECT
1
FROM
target_table
WHERE
col1 = val1
);
-- INSERT { INTO | OVERWRITE } table_name [(column_list)] [ PARTITION (partition_clause)] select_statement
INSERT INTO
target_table (col1, col2, col3) PARTITION (year = 2016, month IN (10, 11, 12))
SELECT
*
FROM
dual;
INSERT OVERWRITE target_table (col1, col2, col3) PARTITION (year BETWEEN 2016 AND 2030, month IN (10, 11, 12))
SELECT
*
FROM
dual;
-- INSERT [hint_clause] { INTO | OVERWRITE } table_name [hint_clause] VALUES (value [, value ...]) [, (value [, value ...]) ...]
INSERT
OVERWRITE target_table (col1, col2, col3) PARTITION (year BETWEEN 2016 AND 2030, month IN (10, 11, 12))
VALUES
(col1, 'a'),
(col2, 'b'),
(col3, 'xyzzy');
WITH
cte AS (
SELECT
col1,
col2
FROM
source_table
WHERE
condition
)
INSERT INTO
target_table (col1, col2)
SELECT
col1,
col2
FROM
cte;
-- example
insert into
table text_table
select
*
from
default.tab1;
insert overwrite table parquet_table
select
*
from
default.tab1
limit
3;
insert into
val_test_1
values
(100, 99.9 / 10, 'abc', true, now ());
insert overwrite val_test_2
values
(1, 'a'),
(2, 'b'),
(-1, 'xyzzy');
insert into
t2
select
*
from
t1;
insert into
t2
select
c1,
c2
from
t1;
insert into
t2 (c1)
select
c1
from
t1;
insert into
t2 (c2, c1)
select
c1,
c2
from
t1;
insert into
t2 (y)
select
c1
from
t1;
INSERT INTO
t1 PARTITION (x = 10, y = 'a')
SELECT
c1
FROM
some_other_table;
INSERT INTO
t1 PARTITION (x, y)
VALUES
(1, 2, ' c ');
INSERT INTO
t1 (w) PARTITION (x, y)
VALUES
(1, 2, ' c ');
INSERT INTO
t1 PARTITION (x, y = 'c')
VALUES
(1, 2);
INSERT INTO
t1 PARTITION (x = 20, y)
SELECT
*
FROM
source;
with
t1 as (
select
1
),
t2 as (
select
2
)
insert into
tab
select
*
from
t1
union all
select
*
from
t2;
insert into
sample_demo_partitions partition (n = 3)
select
*
from
sample_demo;

View File

@ -0,0 +1,6 @@
-- INVALIDATE METADATA [[db_name.]table_name]
INVALIDATE METADATA new_table_from_hive;
INVALIDATE METADATA new_db_from_hive.new_table_from_hive;
-- example
INVALIDATE METADATA new_db_from_hive.new_table_from_hive;

View File

@ -0,0 +1,9 @@
/* LOAD DATA INPATH 'hdfs_file_or_directory_path' [OVERWRITE] INTO TABLE tablename
[PARTITION (partcol1=val1, partcol2=val2 ...)] */
LOAD DATA INPATH 'hdfs_file_or_directory_path' INTO TABLE my_table;
LOAD DATA INPATH 'hdfs_file_or_directory_path' OVERWRITE INTO TABLE my_table;
LOAD DATA INPATH 'hdfs_file_or_directory_path' INTO TABLE my_table PARTITION (country = 'USA', age = 18);
LOAD DATA INPATH 'hdfs_file_or_directory_path' OVERWRITE INTO TABLE my_table PARTITION (country = 'USA', age = 18);
-- example
load data inpath '/user/doc_demo/thousand_strings.txt' into table t1;

View File

@ -0,0 +1,9 @@
-- REFRESH [db_name.]table_name [PARTITION (key_col1=val1 [, key_col2=val2...])]
REFRESH my_table;
REFRESH my_db.my_table;
REFRESH my_db.my_table PARTITION (age BETWEEN 100 AND 200, key_col2=val2, date = "1110-10-10");
REFRESH table_name PARTITION (key_col1=val1, key_col2=val2, date = "1110-10-10");
-- example
refresh p2 partition (y=0, z=3);
refresh p2 partition (y=0, z=-1);

View File

@ -0,0 +1,2 @@
-- REFRESH AUTHORIZATION
REFRESH AUTHORIZATION;

View File

@ -0,0 +1,2 @@
--REFRESH FUNCTIONS db_name
REFRESH FUNCTIONS my_db;

View File

@ -0,0 +1,32 @@
-- REVOKE ROLE role_name FROM GROUP group_name
REVOKE ROLE my_role FROM GROUP my_group;
-- REVOKE privilege ON object_type object_name FROM USER user_name
REVOKE ALL ON SERVER my_server FROM USER 'impala';
REVOKE ALTER ON DATABASE my_database FROM USER 'impala';
REVOKE CREATE ON TABLE my_table FROM USER 'impala';
REVOKE DROP ON URI my_uri FROM USER 'impala';
REVOKE INSERT ON URI my_uri FROM USER 'impala';
REVOKE REFRESH ON TABLE my_table FROM USER 'impala';
REVOKE SELECT ON TABLE my_table FROM USER 'impala';
REVOKE SELECT(column_name) ON TABLE my_table FROM USER 'impala';
-- REVOKE privilege ON object_type object_name FROM GROUP group_name
REVOKE ALL ON SERVER my_server FROM GROUP 'impala_group';
REVOKE ALTER ON DATABASE my_database FROM GROUP 'impala_group';
REVOKE CREATE ON TABLE my_table FROM GROUP 'impala_group';
REVOKE DROP ON URI my_uri FROM GROUP 'impala_group';
REVOKE INSERT ON URI my_uri FROM GROUP 'impala_group';
REVOKE REFRESH ON TABLE my_table FROM GROUP 'impala_group';
REVOKE SELECT ON TABLE my_table FROM GROUP 'impala';
REVOKE SELECT(column_name) ON TABLE my_table FROM GROUP 'impala_group';
-- REVOKE [GRANT OPTION FOR] privilege ON object_type object_name FROM [ROLE] role_name
REVOKE ALL ON SERVER my_server FROM 'impala_role';
REVOKE GRANT OPTION FOR ALL ON SERVER my_server FROM ROLE 'impala_role';
REVOKE SELECT ON DATABASE my_table FROM ROLE 'impala_role';
REVOKE GRANT OPTION FOR SELECT(column_name) ON TABLE my_table FROM 'impala_role';
-- example
REVOKE GRANT OPTION FOR ALL ON SERVER FROM ROLE foo_role;
REVOKE ALL ON SERVER FROM ROLE foo_role;

View File

@ -0,0 +1,191 @@
-- example
-- JOINS
SELECT * from a;
SELECT t1.c1, t2.c2 FROM t1 JOIN t2
ON t1.id = t2.id and t1.type_flag = t2.type_flag
WHERE t1.c1 > 100;
SELECT t1.c1, t2.c2 FROM t1 JOIN t2
USING (id, type_flag)
WHERE t1.c1 > 100;
SELECT t1.c1, t2.c2 FROM t1, t2
WHERE
t1.id = t2.id AND t1.type_flag = t2.type_flag
AND t1.c1 > 100;
SELECT lhs.id, rhs.parent, lhs.c1, rhs.c2 FROM tree_data lhs, tree_data rhs WHERE lhs.id = rhs.parent;
SELECT t1.id, c1, c2 FROM t1, t2 WHERE t1.id = t2.id;
SELECT t1.id, c1, c2 FROM t1 JOIN t2 ON t1.id = t2.id;
SELECT t1.id, c1, c2 FROM t1 INNER JOIN t2 ON t1.id = t2.id;
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.id;
SELECT * FROM t1 RIGHT OUTER JOIN t2 ON t1.id = t2.id;
SELECT * FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id;
SELECT * FROM t1 CROSS JOIN t2 WHERE t1.total > t2.maximum_price;
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.int_col < t2.int_col;
SELECT t1.c1, t1.c2, t1.c2 FROM t1 LEFT SEMI JOIN t2 ON t1.id = t2.id;
select t1.c1 as first_id, t2.c2 as second_id from
t1 join t2 on first_id = second_id;
select fact.custno, dimension.custno from
customer_data as fact join customer_address as dimension
using (custno);
-- ORDER BY
SELECT id FROM games ORDER BY score DESC;
SELECT id, item FROM games, games.score
WHERE item > 1000000
ORDER BY id, item desc;
SELECT id, info.key1 AS k, info.value1 AS v from games3, games3.play AS plays, games3.item AS info
WHERE info.KEY1 = 'score' AND info.VALUE1 > 1000000
ORDER BY id, info.value1 desc;
SELECT user_id AS "Top 10 Visitors", SUM(page_views) FROM web_stats
GROUP BY page_views, user_id
ORDER BY SUM(page_views) DESC LIMIT 10;
SELECT page_title AS "Page 3 of search results", page_url FROM search_content
WHERE LOWER(page_title) LIKE '%game%'
ORDER BY page_title LIMIT 10 OFFSET 20;
select x from numbers order by x desc nulls last;
-- GROUP BY
select
ss_item_sk as Item,
count(ss_item_sk) as Times_Purchased,
sum(ss_quantity) as Total_Quantity_Purchased
from store_sales
group by ss_item_sk
order by sum(ss_quantity) desc
limit 5;
select
ss_item_sk as Item,
count(ss_item_sk) as Times_Purchased,
sum(ss_quantity) as Total_Quantity_Purchased
from store_sales
group by ss_item_sk
having times_purchased >= 100
order by sum(ss_quantity)
limit 5;
select ss_wholesale_cost, avg(ss_quantity * ss_sales_price) as avg_revenue_per_sale
from sales
group by ss_wholesale_cost
order by avg_revenue_per_sale desc
limit 5;
select x as "Top 3" from numbers order by x desc limit 3;
SELECT X FROM T1 LIMIT LENGTH('HELLO WORLD');
SELECT x FROM t1 LIMIT cast(truncate(9.9) AS INT);
-- UNION
select * from (select x from few_ints union all select x from few_ints) as t1 order by x;
-- Subqueries
SELECT employee_name, employee_id FROM employees one WHERE
salary > (SELECT avg(salary) FROM employees two WHERE one.dept_id = two.dept_id);
SELECT avg(t1.x), max(t2.y) FROM
(SELECT id, cast(a AS DECIMAL(10,5)) AS x FROM raw_data WHERE a BETWEEN 0 AND 100) AS t1
JOIN
(SELECT id, length(s) AS y FROM raw_data WHERE s LIKE 'A%') AS t2
USING (id);
SELECT count(x) FROM t1 WHERE EXISTS(SELECT 1 FROM t2 WHERE t1.x = t2.y * 10);
SELECT x FROM t1 WHERE x IN (SELECT y FROM t2 WHERE state = 'CA');
SELECT x FROM t1 WHERE y = (SELECT max(z) FROM t2);
SELECT x FROM t1 WHERE y > (SELECT count(z) FROM t2);
SELECT * FROM t1 one WHERE id IN (SELECT parent FROM t1 two WHERE t1.parent = t2.id);
-- TABLESAMPLE
select distinct x from sample_demo tablesample system(50);
select distinct x from sample_demo
tablesample system(50) repeatable (12345);
select count(*) from sample_demo_partitions
tablesample system(50) where n = 1;
-- WITH
with t1 as (select 1), t2 as (select 2) insert into tab select * from t1 union all select * from t2;
-- DISTINCT
SELECT COUNT(DISTINCT c_salutation, c_last_name) FROM customer;
SELECT DISTINCT c_salutation, c_last_name FROM customer;
-- OTHERS
select
r_name,
count(r_nations.item.n_nationkey) as count,
sum(r_nations.item.n_nationkey) as sum,
avg(r_nations.item.n_nationkey) as avg,
min(r_nations.item.n_name) as minimum,
max(r_nations.item.n_name) as maximum,
ndv(r_nations.item.n_nationkey) as distinct_vals
from
region, region.r_nations as r_nations
group by r_name
order by r_name;
select "contains an even number" as assertion from t3 where exists (select z from t3 where z % 2 = 0) limit 1;
select null is distinct from null, null != null;
select
'x' is distinct from 'x ' as string_with_trailing_spaces,
cast('x' as char(5)) is distinct from cast('x ' as char(5)) as char_with_trailing_spaces;
select c_first_name, c_last_name from customer where c_first_name regexp '^J.*';
SELECT
t1.transaction_id as transaction_id1,
t1.customer_id,
t1.transaction_date,
t1.transaction_amount,
t2.transaction_id as subsequent_transaction_id
FROM
transactions t1
LEFT JOIN (
SELECT
transaction_id,
customer_id,
transaction_date,
transaction_amount,
LEAD (transaction_id) OVER (
PARTITION BY
customer_id
ORDER BY
transaction_date
) AS transaction_id
FROM
transactions
) t2 ON t1.transaction_id = t2.transaction_id
AND t1.customer_id = t2.customer_id
AND t1.transaction_date = t2.transaction_date
AND t1.transaction_amount = t2.transaction_amount;
select appx_median(x) from million_numbers;
select count(x) as higher from million_numbers where x > (select appx_median(x) from million_numbers);
select avg(length(s)) from t1;

View File

@ -0,0 +1,7 @@
-- SET
SET;
-- SET ALL
SET ALL;
-- SET query_option=option_value
set compression_codec=gzip;
set mt_dop = 0;

View File

@ -0,0 +1,167 @@
-- SHOW DATABASES [[LIKE] 'pattern']
SHOW DATABASES;
SHOW DATABASES 'xxx';
SHOW DATABASES LIKE 'xxx';
-- SHOW SCHEMAS [[LIKE] 'pattern']
SHOW SCHEMAS;
SHOW SCHEMAS 'xxx';
SHOW SCHEMAS LIKE 'xxx';
-- SHOW TABLES [IN database_name] [[LIKE] 'pattern']
SHOW TABLES;
SHOW TABLES 'xxx';
SHOW TABLES LIKE 'xxx';
SHOW TABLES IN my_db;
SHOW TABLES IN my_db 'xxx';
SHOW TABLES IN my_db LIKE 'xxx';
-- SHOW [AGGREGATE | ANALYTIC] FUNCTIONS [IN database_name] [[LIKE] 'pattern']
SHOW FUNCTIONS;
SHOW FUNCTIONS 'xxx';
SHOW FUNCTIONS LIKE 'xxx';
SHOW FUNCTIONS IN my_db;
SHOW FUNCTIONS IN my_db 'xxx';
SHOW FUNCTIONS IN my_db LIKE 'xxx';
SHOW ANALYTIC FUNCTIONS IN my_db LIKE 'xxx';
SHOW AGGREGATE FUNCTIONS IN my_db LIKE 'xxx';
-- SHOW CREATE TABLE [database_name].table_name
SHOW CREATE TABLE my_table;
SHOW CREATE TABLE my_db.my_table;
-- SHOW CREATE VIEW [database_name].view_name
SHOW CREATE VIEW my_table;
SHOW CREATE VIEW my_db.my_table;
-- SHOW TABLE STATS [database_name.]table_name
SHOW TABLE STATS my_table;
SHOW TABLE STATS my_db.my_table;
-- SHOW COLUMN STATS [database_name.]table_name
SHOW COLUMN STATS my_table;
SHOW COLUMN STATS my_db.my_table;
-- SHOW PARTITIONS [database_name.]table_name
SHOW PARTITIONS my_table;
SHOW PARTITIONS my_db.my_table;
-- SHOW [RANGE] PARTITIONS [database_name.]table_name
SHOW RANGE PARTITIONS my_table;
SHOW RANGE PARTITIONS my_db.my_table;
-- SHOW FILES IN [database_name.]table_name [PARTITION (key_col_expression [, key_col_expression]]
SHOW FILES IN my_table;
SHOW FILES IN my_db.my_table;
SHOW FILES IN my_db.my_table PARTITION (
"date" = "1110-11-11",
age BETWEEN 100
AND 200
);
SHOW ROLES;
SHOW CURRENT ROLES;
SHOW ROLE GRANT GROUP group_name;
SHOW GRANT USER user_name;
SHOW GRANT USER user_name ON SERVER;
SHOW GRANT USER user_name ON DATABASE database_name;
SHOW GRANT USER user_name ON TABLE database_name.table_name;
SHOW GRANT USER user_name ON URI my_uri;
SHOW GRANT USER user_name ON COLUMN database_name.table_name.column_name;
SHOW GRANT ROLE role_name;
SHOW GRANT ROLE role_name ON SERVER;
SHOW GRANT ROLE role_name ON DATABASE database_name;
SHOW GRANT ROLE role_name ON TABLE database_name.table_name;
SHOW GRANT ROLE role_name ON URI my_uri;
SHOW GRANT ROLE role_name ON COLUMN database_name.table_name.column_name;
SHOW GRANT GROUP group_name ON SERVER;
SHOW GRANT GROUP group_name ON DATABASE database_name;
SHOW GRANT GROUP group_name ON TABLE database_name.table_name;
SHOW GRANT GROUP group_name ON URI my_uri;
SHOW GRANT GROUP group_name ON COLUMN database_name.table_name.column_name;
-- example
SHOW DATABASES 'a*';
SHOW DATABASES LIKE 'a*';
SHOW TABLES IN some_db LIKE '*fact*';
SHOW TABLES '*dim*|*fact*';
show files in sample_table partition (j < 5);
show files in sample_table partition (
k = 3,
l between 1
and 10
);
show files in sample_table partition (month like 'J%');
show files in unpart_text;
show partitions part_text;
show files in s3_testing.sample_data_s3;
show roles;
show current roles;
show tables in full_db like 'dim*';
show CREATE TABLE numeric_grades_default_letter;
show range partitions numeric_grades_default_letter;
show table stats kudu_table;
show column stats customer;
show functions in _impala_builtins;
show functions in _impala_builtins like '*week*';

View File

@ -0,0 +1,14 @@
-- :SHUTDOWN();
:SHUTDOWN();
-- :SHUTDOWN([host_name[:port_number] )
:SHUTDOWN("hostname:1234");
:SHUTDOWN('hostname');
-- :SHUTDOWN(deadline)
:SHUTDOWN(0);
:SHUTDOWN(10);
-- :SHUTDOWN([host_name[:port_number], deadline)
:SHUTDOWN('hostname', 10);
:SHUTDOWN('hostname:11', 10 * 60);

View File

@ -0,0 +1,10 @@
-- TRUNCATE [TABLE] [IF EXISTS] [db_name.]table_name
TRUNCATE my_table;
TRUNCATE my_db.my_table;
TRUNCATE TABLE my_table;
TRUNCATE IF EXISTS my_db.my_table;
TRUNCATE TABLE IF EXISTS my_db.my_table;
-- example
TRUNCATE TABLE truncate_demo;
TRUNCATE TABLE IF EXISTS staging_table2;

View File

@ -0,0 +1,146 @@
/* UPDATE [database_name.]table_name SET col = val [, col = val ... ]
[ FROM joined_table_refs ]
[ WHERE where_conditions ] */
UPDATE
my_table
SET
col1 = 1,
col2 = 2,
col3 = 3;
UPDATE
my_db.my_table
SET
col1 = 1;
UPDATE
my_db.my_table
SET
col1 = 1
WHERE
col2 = 1;
UPDATE
my_db.my_table
SET
col1 = 1
WHERE
col2 = 1
AND col3 BETWEEN 100
AND 300;
UPDATE
my_table
SET
col1 = 'new_value'
FROM
other_table
WHERE
my_table.id = other_table.id;
UPDATE
my_table
SET
col1 = (
SELECT
MAX(col2)
FROM
other_table
WHERE
other_table.id = my_table.id
)
WHERE
col3 = 'value';
UPDATE
my_table
SET
col1 = CASE
WHEN col2 > 10 THEN 'High'
WHEN col2 > 5 THEN 'Medium'
ELSE 'Low'
END
WHERE
col3 = 'value';
UPDATE
my_table
SET
col1 = (
SELECT
AVG(col2)
FROM
other_table
WHERE
other_table.id = my_table.id
GROUP BY
other_table.id
)
WHERE
col3 = 'value';
UPDATE
my_table
SET
col1 = other_table.val1
FROM
my_table
JOIN other_table ON my_table.id = other_table.id
WHERE
my_table.col2 = 'value';
--example
UPDATE
kudu_table
SET
c3 = 'not applicable';
UPDATE
kudu_table
SET
c3 = NULL
WHERE
c1 > 100
AND c3 IS NULL;
UPDATE
kudu_table
SET
c3 = 'impossible'
WHERE
1 = 0;
UPDATE
kudu_table
SET
c3 = upper(c3),
c4 = FALSE,
c5 = 0
WHERE
c6 = TRUE;
UPDATE
kudu_table
SET
c3 = upper(c3)
FROM
kudu_table
JOIN non_kudu_table ON kudu_table.id = non_kudu_table.id;
UPDATE
t1
SET
c3 = upper(c3)
FROM
kudu_table t1
JOIN non_kudu_table t2 ON t1.id = t2.id;
UPDATE
t1
SET
c3 = upper(c3)
FROM
kudu_table t1
JOIN non_kudu_table t2 ON t1.id = t2.id
WHERE
c3 != upper(c3);

View File

@ -0,0 +1,89 @@
/* UPSERT [hint_clause] INTO [TABLE] [db_name.]table_name
[(column_list)]
{
[hint_clause] select_statement
| VALUES (value [, value ...]) [, (value [, value ...]) ...]
} */
UPSERT INTO my_table
SELECT
id,
'new_value1',
'new_value2'
FROM
other_table
WHERE
condition;
UPSERT -- +NOSHUFFLE -- +CLUSTERED
INTO my_table
/* +NOSHUFFLE */
SELECT
id,
'new_value1',
'new_value2'
FROM
other_table
WHERE
condition;
UPSERT INTO my_table -- +SHUFFLE
SELECT
id,
'new_value1',
'new_value2'
FROM
other_table
WHERE
condition;
UPSERT INTO my_table (id, col1, col2)
SELECT
id,
'new_value1',
'new_value2'
FROM
other_table
WHERE
condition;
UPSERT INTO TABLE my_db.my_table (id, col1, col2)
SELECT
id,
'new_value1',
'new_value2'
FROM
other_table
WHERE
condition;
UPSERT INTO my_table (id, col1, col2)
VALUES
(1, 'new_value1', 'new_value2'),
(2, 'new_value3', 'new_value4');
UPSERT -- +NOSHUFFLE -- +CLUSTERED
INTO my_db.my_table
VALUES
(1, 'new_value1', 'new_value2'),
(2, 'new_value3', 'new_value4');
-- example
UPSERT INTO kudu_table (pk, c1, c2, c3)
VALUES
(0, 'hello', 50, true),
(1, 'world', -1, false);
UPSERT INTO production_table
SELECT
*
FROM
staging_table;
UPSERT INTO production_table
SELECT
*
FROM
staging_table
WHERE
c1 IS NOT NULL
AND c2 > 0;

View File

@ -0,0 +1,2 @@
-- USE db_name
USE my_db;

View File

@ -0,0 +1,10 @@
-- VALUES (row)[, (row), ...];
VALUES ('r1_c1', 'r1_c2', 'r1_c3');
VALUES ('r1_c1', 'r1_c2', 'r1_c3'), ('r1_c1', 'r1_c2', 'r1_c3');
VALUES ('r1_c1', 'r1_c2', 'r1_c3'), ('r1_c1', 'r1_c2', 'r1_c3'), ('r1_c1', 'r1_c2', 'r1_c3');
VALUES ('r1_c1' as 'hi', 'r1_c2', 'r1_c3'), ('r1_c1', 'r1_c2', 'r1_c3' as 'hi');
-- example
VALUES(4,5,6),(7,8,9);
VALUES(1 AS c1, true AS c2, 'abc' AS c3), (100,false,'xyz');
VALUES (CAST('2019-01-01' AS TIMESTAMP)), ('2019-02-02');