feat: refactoring hive SQL lexer

This commit is contained in:
chenfeng 2020-11-25 14:49:33 +08:00
parent 654d96028c
commit 557e9a3246
2 changed files with 611 additions and 599 deletions

View File

@ -1,3 +1,4 @@
lexer grammar HiveSqlLexer;
// Lexer rules
@ -347,9 +348,12 @@ T_COLON : ':' ;
T_COMMA : ',' ;
T_PIPE : '||' ;
T_DIV : '/' ;
T_DOT : '.' ;
T_DOT2 : '..' ;
T_EQUAL : '=' ;
T_EQUAL2 : '==' ;
T_SHARP : '#' ;
T_NOTE : '!' ;
T_NOTEQUAL : '<>' ;
T_NOTEQUAL2 : '!=' ;
T_GREATER : '>' ;
@ -357,6 +361,8 @@ T_GREATEREQUAL : '>=' ;
T_LESS : '<' ;
T_LESSEQUAL : '<=' ;
T_MUL : '*' ;
T_PRECENT : '%' ;
T_CALLS : '@' ;
T_OPEN_B : '{' ;
T_OPEN_P : '(' ;
T_OPEN_SB : '[' ;

View File

@ -5,7 +5,9 @@
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,13 +15,16 @@
limitations under the License.
*/
// HPL/SQL Procedural SQL Extension Grammar
grammar HiveSql;
parser grammar HiveSqlParser;
options {
tokenVocab=HiveSqlLexer;
}
@parser::members {
this._input = input;
}
program : block EOF;
block : ((begin_end_block | stmt) T_GO?)+ ; // Multiple consecutive blocks/statements
@ -34,7 +39,7 @@ single_block_stmt : // Single BEGIN END blo
;
block_end :
{!this._input.LT(2).getText().equalsIgnoreCase("TRANSACTION")}? T_END
{!this._input.LT(2).text.toUpperCase() === "TRANSACTION"}? T_END
;
proc_block :
@ -108,7 +113,7 @@ stmt :
semicolon_stmt :
T_SEMICOLON
| '@' | '#' | '/'
| T_CALLS | T_SHARP | T_DIV
;
exception_block : // Exception block
@ -124,7 +129,7 @@ null_stmt : // NULL statement (no operation)
;
expr_stmt : // Standalone expression
{!this._input.LT(1).getText().equalsIgnoreCase("GO")}? expr
{this._input.LT(1).text.toUpperCase() !== "GO"}? expr
;
assignment_stmt : // Assignment statement
@ -229,7 +234,7 @@ create_local_temp_table_stmt :
;
create_table_definition :
(T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P | T_LIKE table_name) create_table_options?
(T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P) create_table_options?
;
create_table_columns :
@ -365,7 +370,6 @@ alter_table_add_constraint_item :
dtype : // Data types
T_CHAR
| T_CHARACTER
| T_BIGINT
| T_BINARY_DOUBLE
| T_BINARY_FLOAT
@ -401,7 +405,7 @@ dtype : // Data types
| T_VARCHAR
| T_VARCHAR2
| T_XML
| ident ('%' (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
| ident (T_PRECENT (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
;
dtype_len : // Data type length or size specification
@ -471,9 +475,9 @@ create_procedure_stmt :
create_routine_params :
T_OPEN_P T_CLOSE_P
| T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P
| {!this._input.LT(1).getText().equalsIgnoreCase("IS") &&
!this._input.LT(1).getText().equalsIgnoreCase("AS") &&
!(this._input.LT(1).getText().equalsIgnoreCase("DYNAMIC") && this._input.LT(2).getText().equalsIgnoreCase("RESULT"))
| {this._input.LT(1).text.toUpperCase() !== "IS" &&
this._input.LT(1).text.toUpperCase() !== "AS" &&
!(this._input.LT(1).text.toUpperCase() ==="DYNAMIC" && this._input.LT(2).text.toUpperCase() === "RESULT")
}?
create_routine_param_item (T_COMMA create_routine_param_item)*
;
@ -520,7 +524,7 @@ if_tsql_stmt :
;
if_bteq_stmt :
'.' T_IF bool_expr T_THEN single_block_stmt
T_DOT T_IF bool_expr T_THEN single_block_stmt
;
elseif_block :
@ -675,7 +679,7 @@ print_stmt : // PRINT statement
;
quit_stmt :
'.'? T_QUIT expr?
T_DOT? T_QUIT expr?
;
raise_stmt :
@ -811,7 +815,7 @@ select_list_item :
;
select_list_alias :
{!this._input.LT(1).getText().equalsIgnoreCase("INTO") && !this._input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
{this._input.LT(1).text.toUpperCase() !== "INTO" && this._input.LT(1).text.toUpperCase() !== "FROM"}? T_AS? ident
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
;
@ -861,14 +865,15 @@ from_table_values_row:
;
from_alias_clause :
{!this._input.LT(1).getText().equalsIgnoreCase("EXEC") &&
!this._input.LT(1).getText().equalsIgnoreCase("EXECUTE") &&
!this._input.LT(1).getText().equalsIgnoreCase("INNER") &&
!this._input.LT(1).getText().equalsIgnoreCase("LEFT") &&
!this._input.LT(1).getText().equalsIgnoreCase("GROUP") &&
!this._input.LT(1).getText().equalsIgnoreCase("ORDER") &&
!this._input.LT(1).getText().equalsIgnoreCase("LIMIT") &&
!this._input.LT(1).getText().equalsIgnoreCase("WITH")}?
{this._input.LT(1).text.toUpperCase() !== "EXEC" &&
this._input.LT(1).text.toUpperCase() !== "EXECUTE" &&
this._input.LT(1).text.toUpperCase() !== "INNER" &&
this._input.LT(1).text.toUpperCase() !== "LEFT" &&
this._input.LT(1).text.toUpperCase() !== "GROUP" &&
this._input.LT(1).text.toUpperCase() !== "ORDER" &&
this._input.LT(1).text.toUpperCase() !== "LIMIT" &&
this._input.LT(1).text.toUpperCase() !== "WITH" &&
this._input.LT(1).text.toUpperCase() !== "JOIN"}?
T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
;
@ -945,7 +950,7 @@ delete_stmt :
;
delete_alias :
{!this._input.LT(1).getText().equalsIgnoreCase("ALL")}?
{this._input.LT(1).text.toUpperCase() !== "ALL"}?
T_AS? ident
;
@ -1070,7 +1075,7 @@ expr_case_searched :
;
expr_cursor_attribute :
ident '%' (T_ISOPEN | T_FOUND | T_NOTFOUND)
ident T_PRECENT (T_ISOPEN | T_FOUND | T_NOTFOUND)
;
expr_agg_window_func :
@ -1136,7 +1141,7 @@ expr_func_params :
;
func_param :
{!this._input.LT(1).getText().equalsIgnoreCase("INTO")}? (ident T_EQUAL T_GREATER?)? expr
{this._input.LT(1).text.toUpperCase() !== "INTO"}? (ident T_EQUAL T_GREATER?)? expr
;
expr_select :
@ -1160,7 +1165,7 @@ hive_item :
;
host :
'!' host_cmd ';' // OS command
T_NOTE host_cmd ';' // OS command
| host_stmt
;
@ -1173,7 +1178,7 @@ host_stmt :
;
file_name :
L_FILE | ('/' | '.' '/')? ident ('/' ident)*
L_FILE | ('/' | T_DOT '/')? ident ('/' ident)*
;
date_literal : // DATE 'YYYY-MM-DD' literal
@ -1185,7 +1190,7 @@ timestamp_literal : // TIMESTAMP 'YYYY-MM-DD HH:MI:SS.FFF'
;
ident :
'-'? (L_ID | non_reserved_words) ('.' (L_ID | non_reserved_words))*
(L_ID | non_reserved_words) (T_DOT (L_ID | non_reserved_words))*
;
string : // String literal (single or double quoted)
@ -1538,3 +1543,4 @@ non_reserved_words : // Tokens that are not reserved words
| T_XML
| T_YES
;