feat: refactoring hive SQL lexer

This commit is contained in:
chenfeng 2020-11-25 14:49:33 +08:00
parent 654d96028c
commit 557e9a3246
2 changed files with 611 additions and 599 deletions

View File

@ -1,3 +1,4 @@
lexer grammar HiveSqlLexer; lexer grammar HiveSqlLexer;
// Lexer rules // Lexer rules
@ -347,9 +348,12 @@ T_COLON : ':' ;
T_COMMA : ',' ; T_COMMA : ',' ;
T_PIPE : '||' ; T_PIPE : '||' ;
T_DIV : '/' ; T_DIV : '/' ;
T_DOT : '.' ;
T_DOT2 : '..' ; T_DOT2 : '..' ;
T_EQUAL : '=' ; T_EQUAL : '=' ;
T_EQUAL2 : '==' ; T_EQUAL2 : '==' ;
T_SHARP : '#' ;
T_NOTE : '!' ;
T_NOTEQUAL : '<>' ; T_NOTEQUAL : '<>' ;
T_NOTEQUAL2 : '!=' ; T_NOTEQUAL2 : '!=' ;
T_GREATER : '>' ; T_GREATER : '>' ;
@ -357,6 +361,8 @@ T_GREATEREQUAL : '>=' ;
T_LESS : '<' ; T_LESS : '<' ;
T_LESSEQUAL : '<=' ; T_LESSEQUAL : '<=' ;
T_MUL : '*' ; T_MUL : '*' ;
T_PRECENT : '%' ;
T_CALLS : '@' ;
T_OPEN_B : '{' ; T_OPEN_B : '{' ;
T_OPEN_P : '(' ; T_OPEN_P : '(' ;
T_OPEN_SB : '[' ; T_OPEN_SB : '[' ;

View File

@ -5,7 +5,9 @@
The ASF licenses this file to You under the Apache License, Version 2.0 The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,14 +15,17 @@
limitations under the License. limitations under the License.
*/ */
// HPL/SQL Procedural SQL Extension Grammar parser grammar HiveSqlParser;
grammar HiveSql;
options { options {
tokenVocab=HiveSqlLexer; tokenVocab=HiveSqlLexer;
} }
program: block EOF; @parser::members {
this._input = input;
}
program : block EOF;
block : ((begin_end_block | stmt) T_GO?)+ ; // Multiple consecutive blocks/statements block : ((begin_end_block | stmt) T_GO?)+ ; // Multiple consecutive blocks/statements
@ -34,7 +39,7 @@ single_block_stmt : // Single BEGIN END blo
; ;
block_end : block_end :
{!this._input.LT(2).getText().equalsIgnoreCase("TRANSACTION")}? T_END {!this._input.LT(2).text.toUpperCase() === "TRANSACTION"}? T_END
; ;
proc_block : proc_block :
@ -108,7 +113,7 @@ stmt :
semicolon_stmt : semicolon_stmt :
T_SEMICOLON T_SEMICOLON
| '@' | '#' | '/' | T_CALLS | T_SHARP | T_DIV
; ;
exception_block : // Exception block exception_block : // Exception block
@ -124,7 +129,7 @@ null_stmt : // NULL statement (no operation)
; ;
expr_stmt : // Standalone expression expr_stmt : // Standalone expression
{!this._input.LT(1).getText().equalsIgnoreCase("GO")}? expr {this._input.LT(1).text.toUpperCase() !== "GO"}? expr
; ;
assignment_stmt : // Assignment statement assignment_stmt : // Assignment statement
@ -229,7 +234,7 @@ create_local_temp_table_stmt :
; ;
create_table_definition : create_table_definition :
(T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P | T_LIKE table_name) create_table_options? (T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P) create_table_options?
; ;
create_table_columns : create_table_columns :
@ -365,7 +370,6 @@ alter_table_add_constraint_item :
dtype : // Data types dtype : // Data types
T_CHAR T_CHAR
| T_CHARACTER
| T_BIGINT | T_BIGINT
| T_BINARY_DOUBLE | T_BINARY_DOUBLE
| T_BINARY_FLOAT | T_BINARY_FLOAT
@ -401,7 +405,7 @@ dtype : // Data types
| T_VARCHAR | T_VARCHAR
| T_VARCHAR2 | T_VARCHAR2
| T_XML | T_XML
| ident ('%' (T_TYPE | T_ROWTYPE))? // User-defined or derived data type | ident (T_PRECENT (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
; ;
dtype_len : // Data type length or size specification dtype_len : // Data type length or size specification
@ -471,9 +475,9 @@ create_procedure_stmt :
create_routine_params : create_routine_params :
T_OPEN_P T_CLOSE_P T_OPEN_P T_CLOSE_P
| T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P | T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P
| {!this._input.LT(1).getText().equalsIgnoreCase("IS") && | {this._input.LT(1).text.toUpperCase() !== "IS" &&
!this._input.LT(1).getText().equalsIgnoreCase("AS") && this._input.LT(1).text.toUpperCase() !== "AS" &&
!(this._input.LT(1).getText().equalsIgnoreCase("DYNAMIC") && this._input.LT(2).getText().equalsIgnoreCase("RESULT")) !(this._input.LT(1).text.toUpperCase() ==="DYNAMIC" && this._input.LT(2).text.toUpperCase() === "RESULT")
}? }?
create_routine_param_item (T_COMMA create_routine_param_item)* create_routine_param_item (T_COMMA create_routine_param_item)*
; ;
@ -520,7 +524,7 @@ if_tsql_stmt :
; ;
if_bteq_stmt : if_bteq_stmt :
'.' T_IF bool_expr T_THEN single_block_stmt T_DOT T_IF bool_expr T_THEN single_block_stmt
; ;
elseif_block : elseif_block :
@ -675,7 +679,7 @@ print_stmt : // PRINT statement
; ;
quit_stmt : quit_stmt :
'.'? T_QUIT expr? T_DOT? T_QUIT expr?
; ;
raise_stmt : raise_stmt :
@ -811,7 +815,7 @@ select_list_item :
; ;
select_list_alias : select_list_alias :
{!this._input.LT(1).getText().equalsIgnoreCase("INTO") && !this._input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident {this._input.LT(1).text.toUpperCase() !== "INTO" && this._input.LT(1).text.toUpperCase() !== "FROM"}? T_AS? ident
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P | T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
; ;
@ -861,14 +865,15 @@ from_table_values_row:
; ;
from_alias_clause : from_alias_clause :
{!this._input.LT(1).getText().equalsIgnoreCase("EXEC") && {this._input.LT(1).text.toUpperCase() !== "EXEC" &&
!this._input.LT(1).getText().equalsIgnoreCase("EXECUTE") && this._input.LT(1).text.toUpperCase() !== "EXECUTE" &&
!this._input.LT(1).getText().equalsIgnoreCase("INNER") && this._input.LT(1).text.toUpperCase() !== "INNER" &&
!this._input.LT(1).getText().equalsIgnoreCase("LEFT") && this._input.LT(1).text.toUpperCase() !== "LEFT" &&
!this._input.LT(1).getText().equalsIgnoreCase("GROUP") && this._input.LT(1).text.toUpperCase() !== "GROUP" &&
!this._input.LT(1).getText().equalsIgnoreCase("ORDER") && this._input.LT(1).text.toUpperCase() !== "ORDER" &&
!this._input.LT(1).getText().equalsIgnoreCase("LIMIT") && this._input.LT(1).text.toUpperCase() !== "LIMIT" &&
!this._input.LT(1).getText().equalsIgnoreCase("WITH")}? this._input.LT(1).text.toUpperCase() !== "WITH" &&
this._input.LT(1).text.toUpperCase() !== "JOIN"}?
T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)? T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
; ;
@ -945,7 +950,7 @@ delete_stmt :
; ;
delete_alias : delete_alias :
{!this._input.LT(1).getText().equalsIgnoreCase("ALL")}? {this._input.LT(1).text.toUpperCase() !== "ALL"}?
T_AS? ident T_AS? ident
; ;
@ -1070,7 +1075,7 @@ expr_case_searched :
; ;
expr_cursor_attribute : expr_cursor_attribute :
ident '%' (T_ISOPEN | T_FOUND | T_NOTFOUND) ident T_PRECENT (T_ISOPEN | T_FOUND | T_NOTFOUND)
; ;
expr_agg_window_func : expr_agg_window_func :
@ -1136,7 +1141,7 @@ expr_func_params :
; ;
func_param : func_param :
{!this._input.LT(1).getText().equalsIgnoreCase("INTO")}? (ident T_EQUAL T_GREATER?)? expr {this._input.LT(1).text.toUpperCase() !== "INTO"}? (ident T_EQUAL T_GREATER?)? expr
; ;
expr_select : expr_select :
@ -1160,7 +1165,7 @@ hive_item :
; ;
host : host :
'!' host_cmd ';' // OS command T_NOTE host_cmd ';' // OS command
| host_stmt | host_stmt
; ;
@ -1173,7 +1178,7 @@ host_stmt :
; ;
file_name : file_name :
L_FILE | ('/' | '.' '/')? ident ('/' ident)* L_FILE | ('/' | T_DOT '/')? ident ('/' ident)*
; ;
date_literal : // DATE 'YYYY-MM-DD' literal date_literal : // DATE 'YYYY-MM-DD' literal
@ -1185,7 +1190,7 @@ timestamp_literal : // TIMESTAMP 'YYYY-MM-DD HH:MI:SS.FFF'
; ;
ident : ident :
'-'? (L_ID | non_reserved_words) ('.' (L_ID | non_reserved_words))* (L_ID | non_reserved_words) (T_DOT (L_ID | non_reserved_words))*
; ;
string : // String literal (single or double quoted) string : // String literal (single or double quoted)
@ -1538,3 +1543,4 @@ non_reserved_words : // Tokens that are not reserved words
| T_XML | T_XML
| T_YES | T_YES
; ;