feat: refactoring hive SQL lexer
This commit is contained in:
		@ -1,3 +1,4 @@
 | 
			
		||||
 | 
			
		||||
lexer grammar HiveSqlLexer;
 | 
			
		||||
 | 
			
		||||
// Lexer rules
 | 
			
		||||
@ -347,9 +348,12 @@ T_COLON        : ':' ;
 | 
			
		||||
T_COMMA        : ',' ;
 | 
			
		||||
T_PIPE         : '||' ;
 | 
			
		||||
T_DIV          : '/' ;
 | 
			
		||||
T_DOT          : '.' ;
 | 
			
		||||
T_DOT2         : '..' ;
 | 
			
		||||
T_EQUAL        : '=' ;
 | 
			
		||||
T_EQUAL2       : '==' ;
 | 
			
		||||
T_SHARP        : '#'  ;
 | 
			
		||||
T_NOTE         : '!'  ;
 | 
			
		||||
T_NOTEQUAL     : '<>' ;
 | 
			
		||||
T_NOTEQUAL2    : '!=' ;
 | 
			
		||||
T_GREATER      : '>' ;
 | 
			
		||||
@ -357,6 +361,8 @@ T_GREATEREQUAL : '>=' ;
 | 
			
		||||
T_LESS         : '<' ;
 | 
			
		||||
T_LESSEQUAL    : '<=' ;
 | 
			
		||||
T_MUL          : '*' ;
 | 
			
		||||
T_PRECENT      : '%' ;
 | 
			
		||||
T_CALLS        : '@' ;
 | 
			
		||||
T_OPEN_B       : '{' ;
 | 
			
		||||
T_OPEN_P       : '(' ;
 | 
			
		||||
T_OPEN_SB      : '[' ;
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,9 @@
 | 
			
		||||
   The ASF licenses this file to You under the Apache License, Version 2.0
 | 
			
		||||
   (the "License"); you may not use this file except in compliance with
 | 
			
		||||
   the License.  You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
       http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
   Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
@ -13,13 +15,16 @@
 | 
			
		||||
   limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
// HPL/SQL Procedural SQL Extension Grammar 
 | 
			
		||||
grammar HiveSql;
 | 
			
		||||
parser grammar HiveSqlParser;
 | 
			
		||||
 | 
			
		||||
options {
 | 
			
		||||
    tokenVocab=HiveSqlLexer;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@parser::members {
 | 
			
		||||
this._input = input;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
program : block EOF;
 | 
			
		||||
 | 
			
		||||
block : ((begin_end_block | stmt) T_GO?)+ ;               // Multiple consecutive blocks/statements
 | 
			
		||||
@ -34,7 +39,7 @@ single_block_stmt :                                      // Single BEGIN END blo
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
block_end :
 | 
			
		||||
       {!this._input.LT(2).getText().equalsIgnoreCase("TRANSACTION")}? T_END 
 | 
			
		||||
       {!this._input.LT(2).text.toUpperCase() === "TRANSACTION"}? T_END
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
proc_block :
 | 
			
		||||
@ -108,7 +113,7 @@ stmt :
 | 
			
		||||
 | 
			
		||||
semicolon_stmt :
 | 
			
		||||
       T_SEMICOLON
 | 
			
		||||
     | '@' | '#' | '/' 
 | 
			
		||||
     | T_CALLS | T_SHARP | T_DIV
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
exception_block :       // Exception block
 | 
			
		||||
@ -124,7 +129,7 @@ null_stmt :             // NULL statement (no operation)
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
expr_stmt :             // Standalone expression
 | 
			
		||||
       {!this._input.LT(1).getText().equalsIgnoreCase("GO")}? expr
 | 
			
		||||
       {this._input.LT(1).text.toUpperCase() !== "GO"}? expr
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
assignment_stmt :       // Assignment statement
 | 
			
		||||
@ -229,7 +234,7 @@ create_local_temp_table_stmt :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
create_table_definition :
 | 
			
		||||
      (T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P | T_LIKE table_name) create_table_options?
 | 
			
		||||
      (T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P) create_table_options?
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
create_table_columns :
 | 
			
		||||
@ -365,7 +370,6 @@ alter_table_add_constraint_item :
 | 
			
		||||
 | 
			
		||||
dtype :                  // Data types
 | 
			
		||||
       T_CHAR
 | 
			
		||||
     | T_CHARACTER
 | 
			
		||||
     | T_BIGINT
 | 
			
		||||
     | T_BINARY_DOUBLE
 | 
			
		||||
     | T_BINARY_FLOAT
 | 
			
		||||
@ -401,7 +405,7 @@ dtype :                  // Data types
 | 
			
		||||
     | T_VARCHAR
 | 
			
		||||
     | T_VARCHAR2
 | 
			
		||||
     | T_XML
 | 
			
		||||
     | ident ('%' (T_TYPE | T_ROWTYPE))?             // User-defined or derived data type
 | 
			
		||||
     | ident (T_PRECENT (T_TYPE | T_ROWTYPE))?             // User-defined or derived data type
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
dtype_len :             // Data type length or size specification
 | 
			
		||||
@ -471,9 +475,9 @@ create_procedure_stmt :
 | 
			
		||||
create_routine_params :
 | 
			
		||||
       T_OPEN_P T_CLOSE_P
 | 
			
		||||
     | T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P
 | 
			
		||||
     | {!this._input.LT(1).getText().equalsIgnoreCase("IS") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("AS") &&
 | 
			
		||||
        !(this._input.LT(1).getText().equalsIgnoreCase("DYNAMIC") && this._input.LT(2).getText().equalsIgnoreCase("RESULT"))
 | 
			
		||||
     | {this._input.LT(1).text.toUpperCase() !== "IS" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "AS" &&
 | 
			
		||||
        !(this._input.LT(1).text.toUpperCase() ==="DYNAMIC" && this._input.LT(2).text.toUpperCase() === "RESULT")
 | 
			
		||||
        }?
 | 
			
		||||
       create_routine_param_item (T_COMMA create_routine_param_item)*
 | 
			
		||||
     ;
 | 
			
		||||
@ -520,7 +524,7 @@ if_tsql_stmt :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
if_bteq_stmt :
 | 
			
		||||
       '.' T_IF bool_expr T_THEN single_block_stmt 
 | 
			
		||||
       T_DOT T_IF bool_expr T_THEN single_block_stmt
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
elseif_block :
 | 
			
		||||
@ -675,7 +679,7 @@ print_stmt :            // PRINT statement
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
quit_stmt :
 | 
			
		||||
       '.'? T_QUIT expr?
 | 
			
		||||
       T_DOT? T_QUIT expr?
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
raise_stmt :
 | 
			
		||||
@ -811,7 +815,7 @@ select_list_item :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
select_list_alias :
 | 
			
		||||
       {!this._input.LT(1).getText().equalsIgnoreCase("INTO") && !this._input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
 | 
			
		||||
       {this._input.LT(1).text.toUpperCase() !== "INTO" && this._input.LT(1).text.toUpperCase() !== "FROM"}? T_AS? ident
 | 
			
		||||
     | T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
@ -861,14 +865,15 @@ from_table_values_row:
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
from_alias_clause :
 | 
			
		||||
       {!this._input.LT(1).getText().equalsIgnoreCase("EXEC") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("EXECUTE") && 
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("INNER") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("LEFT") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("GROUP") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("ORDER") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("LIMIT") &&
 | 
			
		||||
        !this._input.LT(1).getText().equalsIgnoreCase("WITH")}?
 | 
			
		||||
       {this._input.LT(1).text.toUpperCase() !== "EXEC" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "EXECUTE" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "INNER" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "LEFT" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "GROUP" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "ORDER" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "LIMIT" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "WITH" &&
 | 
			
		||||
        this._input.LT(1).text.toUpperCase() !== "JOIN"}?
 | 
			
		||||
       T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
@ -945,7 +950,7 @@ delete_stmt :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
delete_alias :
 | 
			
		||||
       {!this._input.LT(1).getText().equalsIgnoreCase("ALL")}?
 | 
			
		||||
       {this._input.LT(1).text.toUpperCase() !== "ALL"}?
 | 
			
		||||
       T_AS? ident
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
@ -1070,7 +1075,7 @@ expr_case_searched :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
expr_cursor_attribute :
 | 
			
		||||
      ident '%' (T_ISOPEN | T_FOUND | T_NOTFOUND)
 | 
			
		||||
      ident T_PRECENT (T_ISOPEN | T_FOUND | T_NOTFOUND)
 | 
			
		||||
    ;
 | 
			
		||||
 | 
			
		||||
expr_agg_window_func :
 | 
			
		||||
@ -1136,7 +1141,7 @@ expr_func_params :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
func_param :
 | 
			
		||||
       {!this._input.LT(1).getText().equalsIgnoreCase("INTO")}? (ident T_EQUAL T_GREATER?)? expr  
 | 
			
		||||
       {this._input.LT(1).text.toUpperCase() !== "INTO"}? (ident T_EQUAL T_GREATER?)? expr
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
expr_select :
 | 
			
		||||
@ -1160,7 +1165,7 @@ hive_item :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
host :
 | 
			
		||||
       '!' host_cmd  ';'                   // OS command
 | 
			
		||||
       T_NOTE host_cmd  ';'                   // OS command
 | 
			
		||||
     | host_stmt
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
@ -1173,7 +1178,7 @@ host_stmt :
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
file_name :
 | 
			
		||||
       L_FILE | ('/' | '.' '/')? ident ('/' ident)*
 | 
			
		||||
       L_FILE | ('/' | T_DOT '/')? ident ('/' ident)*
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
date_literal :                             // DATE 'YYYY-MM-DD' literal
 | 
			
		||||
@ -1185,7 +1190,7 @@ timestamp_literal :                       // TIMESTAMP 'YYYY-MM-DD HH:MI:SS.FFF'
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
ident :
 | 
			
		||||
       '-'? (L_ID | non_reserved_words) ('.' (L_ID | non_reserved_words))*
 | 
			
		||||
       (L_ID | non_reserved_words) (T_DOT (L_ID | non_reserved_words))*
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
string :                                   // String literal (single or double quoted)
 | 
			
		||||
@ -1538,3 +1543,4 @@ non_reserved_words :                      // Tokens that are not reserved words
 | 
			
		||||
     | T_XML
 | 
			
		||||
     | T_YES
 | 
			
		||||
     ;
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user