a99721162b
* feat: add text and word utils * feat: add entity collector class * refactor: rename SyntaxContextType to EntityContextType * refactor: improve EntityCollector * feat: improve mysql parser grammar * feat: add mysql entity collector * test: mysql entity collector tests * feat: remove useless method * feat: improve spark grammar file * feat: add spark entity collector * test: spark entity collector unit tests * feat: remove useless code * feat: add queryStatement label * feat: add crateDatabaseStmt * feat: add trino entity collector * feat: rename trinosql to trino * test: trino collect entity unit tests * test: fix spark test * feat(impala): support impale entity collector (#256) * Feat/collect entity hive (#263) * feat(hive): support hive collect entity * feat(hive): update tableAllColumns * feat: replace antlr4ts with antlr4ng * feat(pgsql): pgsql collect entity (#268) * feat(pgsql): pgsql collect entity * feat(pgsql): optimize some name --------- Co-authored-by: zhaoge <> * feat: get word text by token.text * feat: supprt collect db/function and add splitListener (#270) * feat: supprt collect db/function and add splitListner * feat: remove SplitListener interface in baseParser to use SplitListener in root * fix(mysql): fix show create xxx not celloct as createXXXEntity type * test: fix pgsql unit tests * Feat/error recover predicate (#274) * feat: optimize pgsql grammar * feat: add sql parser base * feat: apply SQLParserBase * feat: add geAllEntities method * test: test collect table when missing column * feat: compose collect and suggestion (#276) * feat: mark stmt which contain caret * test: correct name of getAllEntities * test: remove misscolumn unit tests * test: add suggestionWithEntity tests * feat: flink collect entity (#277) * feat: improve flink sql parser * feat: support flink entity collector * test: flink entity collect unit test * feat: move combine entities to parent class --------- Co-authored-by: 霜序 <976060700@qq.com> Co-authored-by: XCynthia <942884029@qq.com>
246 lines
7.2 KiB
SQL
246 lines
7.2 KiB
SQL
-- Select Syntax
|
|
SELECT col1, col2 FROM table_name_1;
|
|
|
|
SELECT ALL col3, col4 FROM table_name_2 WHERE amount > 10 AND region = "US" GROUP BY col5 ORDER BY col6 CLUSTER BY col7 LIMIT 2,5;
|
|
|
|
-- WHERE Clause
|
|
SELECT * FROM sales WHERE amount > 10 AND region = "US";
|
|
|
|
-- ALL and DISTINCT Clauses
|
|
SELECT DISTINCT col1 FROM t1;
|
|
|
|
-- Partition Based Queries
|
|
SELECT page_views.*
|
|
FROM page_views
|
|
WHERE page_views.`date` >= '2018-03-01' AND page_views.`date` <= '2018-03-31';
|
|
|
|
SELECT page_views.*
|
|
FROM page_views JOIN dim_users
|
|
ON (page_views.user_id = dim_users.id AND page_views.adate >= '2008-03-01' AND page_views.adate <= '2008-03-31');
|
|
|
|
-- HAVING Clause
|
|
SELECT col1 FROM t1 GROUP BY col1 HAVING SUM(col2) > 10;
|
|
|
|
SELECT col1 FROM (SELECT col1, SUM(col2) AS col2sum FROM t1 GROUP BY col1) t2 WHERE t2.col2sum > 10;
|
|
|
|
-- LIMIT Clause
|
|
SELECT * FROM customers LIMIT 5;
|
|
|
|
SELECT * FROM customers ORDER BY create_date LIMIT 5;
|
|
|
|
SELECT * FROM customers ORDER BY create_date LIMIT 2,5;
|
|
|
|
-- REGEX Column Specification
|
|
SELECT `(ds|hr)?+.+` FROM sales;
|
|
|
|
-- Syntax of Sort By
|
|
SELECT key, value FROM src SORT BY key ASC, value DESC;
|
|
|
|
-- Distribute By
|
|
SELECT col1, col2 FROM t1 DISTRIBUTE BY col1 SORT BY col1 ASC, col2 DESC;
|
|
|
|
-- Join Syntax
|
|
SELECT a.* FROM a JOIN b ON (a.id = b.id AND a.department = b.department);
|
|
|
|
-- MapJoin Restrictions
|
|
SELECT /*+ MAPJOIN(b) */ a.key, a.value
|
|
FROM a JOIN b ON a.key = b.key;
|
|
|
|
-- Union Syntax
|
|
-- UNION within a FROM Clause
|
|
SELECT u.id, actions.adate
|
|
FROM (
|
|
SELECT av.uid AS uid
|
|
FROM action_video av
|
|
WHERE av.adate = '2008-06-03'
|
|
UNION ALL
|
|
SELECT ac.uid AS uid
|
|
FROM action_comment ac
|
|
WHERE ac.adate = '2008-06-03'
|
|
) actions JOIN users u ON (u.id = actions.uid);
|
|
|
|
-- Unions in DDL and Insert Statements--Applying Subclauses
|
|
SELECT key FROM (SELECT key FROM src ORDER BY key LIMIT 10)subq1
|
|
UNION
|
|
SELECT key FROM (SELECT key FROM src1 ORDER BY key LIMIT 10)subq2;
|
|
|
|
SELECT key FROM src
|
|
UNION
|
|
SELECT key FROM src1
|
|
ORDER BY key LIMIT 10;
|
|
|
|
-- Unions in DDL and Insert Statements--Column Aliases for Schema Matching
|
|
INSERT OVERWRITE TABLE target_table
|
|
SELECT name, id, category FROM source_table_1
|
|
UNION ALL
|
|
SELECT name, id, "Category159" as category FROM source_table_2;
|
|
|
|
-- Unions in DDL and Insert Statements--Column Type Conversion
|
|
SELECT name, id, cast('2001-01-01' as date) d FROM source_table_1
|
|
UNION ALL
|
|
SELECT name, id, hiredate as d FROM source_table_2;
|
|
|
|
-- Sampling Bucketized Table
|
|
SELECT *
|
|
FROM source TABLESAMPLE(BUCKET 3 OUT OF 32 ON rand()) s;
|
|
|
|
-- Block Sampling
|
|
SELECT *
|
|
FROM source TABLESAMPLE(0.1 PERCENT) s;
|
|
|
|
SELECT *
|
|
FROM source TABLESAMPLE(100M) s;
|
|
|
|
-- Subqueries in the FROM Clause
|
|
SELECT t3.col
|
|
FROM (
|
|
SELECT a+b AS col
|
|
FROM t1
|
|
UNION ALL
|
|
SELECT c+d AS col
|
|
FROM t2
|
|
) t3;
|
|
|
|
-- Subqueries in the WHERE Clause
|
|
SELECT A
|
|
FROM T1
|
|
WHERE EXISTS (SELECT B FROM T2 WHERE T1.X = T2.Y);
|
|
|
|
-- explode (array)
|
|
SELECT tf.* FROM (SELECT 0) t lateral view explode(ARRAY('A','B','C')) tf AS col;
|
|
-- explode (map)
|
|
SELECT tf.* FROM (SELECT 0) t lateral view explode(map('A',10,'B',20,'C',30)) tf AS KEY,VALUE;
|
|
-- posexplode (array)
|
|
SELECT tf.* FROM (SELECT 0) t lateral view posexplode(ARRAY('A','B','C')) tf AS pos,val;
|
|
-- inline (array of structs)
|
|
SELECT tf.* FROM (SELECT 0) t lateral view inline(ARRAY(STRUCT('A',10,DATE '2015-01-01'),STRUCT('B',20,DATE '2016-02-02'))) tf AS col1,col2,col3;
|
|
-- stack (values)
|
|
SELECT tf.* FROM (SELECT 0) t lateral view stack(2,'A',10,DATE '2015-01-01','B',20,DATE '2016-01-01') tf AS col0,col1,col2;
|
|
|
|
|
|
-- Lateral View Syntax
|
|
SELECT adid, count(1)
|
|
FROM pageAds LATERAL VIEW explode(adid_list) adTable AS adid
|
|
GROUP BY adid;
|
|
|
|
-- Multiple Lateral Views
|
|
SELECT * FROM exampleTable
|
|
LATERAL VIEW explode(col1) myTable1 AS myCol1
|
|
LATERAL VIEW explode(myCol1) myTable2 AS myCol2;
|
|
|
|
-- Outer Lateral Views
|
|
SELECT * FROM src LATERAL VIEW explode(array()) C AS a limit 10;
|
|
SELECT * FROM src LATERAL VIEW OUTER explode(array()) C AS a limit 10;
|
|
|
|
-- Windowing and Analytics Functions
|
|
SELECT a, COUNT(b) OVER (PARTITION BY c, d)
|
|
FROM T;
|
|
|
|
SELECT a, SUM(b) OVER (PARTITION BY c, d ORDER BY e, f)
|
|
FROM T;
|
|
|
|
SELECT a, AVG(b) OVER (PARTITION BY c ORDER BY d ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
|
|
FROM T;
|
|
|
|
SELECT a, SUM(b) OVER w
|
|
FROM T
|
|
WINDOW w AS (PARTITION BY c ORDER BY d ROWS UNBOUNDED PRECEDING);
|
|
|
|
SELECT a, LEAD(a) OVER (PARTITION BY b ORDER BY C)
|
|
FROM T;
|
|
|
|
SELECT a, LAG(a, 3, 0) OVER (PARTITION BY b ORDER BY C)
|
|
FROM T;
|
|
|
|
SELECT a, COUNT(distinct a) OVER (PARTITION BY b)
|
|
FROM T;
|
|
|
|
-- Common Table Expression
|
|
-- chaining CTEs
|
|
WITH q1 AS ( SELECT key FROM q2 WHERE key = '5'),
|
|
q2 AS ( SELECT key FROM src WHERE key = '5')
|
|
SELECT * FROM (SELECT key FROM q1) a;
|
|
|
|
-- union example
|
|
WITH q1 AS (SELECT * FROM src WHERE key= '5'),
|
|
q2 AS (SELECT * FROM src s2 WHERE key = '4')
|
|
SELECT * FROM q1 union all SELECT * FROM q2;
|
|
|
|
-- view example
|
|
CREATE VIEW v1 AS
|
|
WITH q1 AS ( SELECT key FROM src WHERE key = '5')
|
|
SELECT * FROM q1;
|
|
SELECT * FROM v1;
|
|
|
|
-- view example, name collision
|
|
CREATE VIEW v1 AS
|
|
WITH q1 AS ( SELECT key FROM src WHERE key = '5')
|
|
SELECT * FROM q1;
|
|
WITH q1 AS ( SELECT key FROM src WHERE key = '4')
|
|
SELECT * FROM v1;
|
|
|
|
-- GROUPING SETS clause
|
|
-- GROUPING SET queries and the equivalent GROUP BY queries
|
|
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ( (a,b) );
|
|
|
|
SELECT a, b, SUM( c ) FROM tab1 GROUP BY a, b GROUPING SETS ( (a, b), a, b, ( ) );
|
|
|
|
-- Grouping__ID function
|
|
SELECT key, value, GROUPING__ID, count(*)
|
|
FROM T1
|
|
GROUP BY key, value WITH ROLLUP;
|
|
|
|
-- Grouping function
|
|
SELECT key, value, GROUPING__ID,
|
|
grouping(key, value), grouping(value, key), grouping(key), grouping(value),
|
|
count(*)
|
|
FROM T1
|
|
GROUP BY key, value WITH ROLLUP;
|
|
|
|
-- LanguageManual Explain -- EXPLAIN Syntax
|
|
EXPLAIN
|
|
FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,4)) GROUP BY src.key;
|
|
|
|
-- LanguageManual Explain -- The CBO Clause
|
|
EXPLAIN CBO
|
|
WITH customer_total_return AS
|
|
(SELECT sr_customer_sk AS ctr_customer_sk,
|
|
sr_store_sk AS ctr_store_sk,
|
|
SUM(SR_FEE) AS ctr_total_return
|
|
FROM store_returns, date_dim
|
|
WHERE sr_returned_date_sk = d_date_sk
|
|
AND d_year =2000
|
|
GROUP BY sr_customer_sk, sr_store_sk)
|
|
SELECT c_customer_id
|
|
FROM customer_total_return ctr1, store, customer
|
|
WHERE ctr1.ctr_total_return > (SELECT AVG(ctr_total_return)*1.2
|
|
FROM customer_total_return ctr2
|
|
WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
|
|
AND s_store_sk = ctr1.ctr_store_sk
|
|
AND s_state = 'NM'
|
|
AND ctr1.ctr_customer_sk = c_customer_sk
|
|
ORDER BY c_customer_id
|
|
LIMIT 100;
|
|
|
|
-- LanguageManual Explain -- The AST Clause
|
|
EXPLAIN AST
|
|
FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,4)) GROUP BY src.key;
|
|
|
|
-- LanguageManual Explain -- The DEPENDENCY Clause
|
|
EXPLAIN DEPENDENCY
|
|
SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key;
|
|
|
|
-- LanguageManual Explain -- The AUTHORIZATION Clause
|
|
EXPLAIN AUTHORIZATION
|
|
SELECT * FROM src JOIN srcpart;
|
|
|
|
-- LanguageManual Explain -- The LOCKS Clause
|
|
EXPLAIN LOCKS UPDATE target SET b = 1 WHERE p IN (SELECT t.q1 FROM source t WHERE t.a1=5);
|
|
|
|
-- LanguageManual Explain -- User-level Explain Output
|
|
EXPLAIN select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31';
|
|
|
|
-- FROM xx SELECT
|
|
FROM table_name_1 SELECT col1, col2;
|
|
|
|
FROM a JOIN b ON (a.id = b.id AND a.department = b.department) SELECT a.*; |