feat: refactoring hive SQL lexer
This commit is contained in:
parent
654d96028c
commit
557e9a3246
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
lexer grammar HiveSqlLexer;
|
lexer grammar HiveSqlLexer;
|
||||||
|
|
||||||
// Lexer rules
|
// Lexer rules
|
||||||
@ -347,9 +348,12 @@ T_COLON : ':' ;
|
|||||||
T_COMMA : ',' ;
|
T_COMMA : ',' ;
|
||||||
T_PIPE : '||' ;
|
T_PIPE : '||' ;
|
||||||
T_DIV : '/' ;
|
T_DIV : '/' ;
|
||||||
|
T_DOT : '.' ;
|
||||||
T_DOT2 : '..' ;
|
T_DOT2 : '..' ;
|
||||||
T_EQUAL : '=' ;
|
T_EQUAL : '=' ;
|
||||||
T_EQUAL2 : '==' ;
|
T_EQUAL2 : '==' ;
|
||||||
|
T_SHARP : '#' ;
|
||||||
|
T_NOTE : '!' ;
|
||||||
T_NOTEQUAL : '<>' ;
|
T_NOTEQUAL : '<>' ;
|
||||||
T_NOTEQUAL2 : '!=' ;
|
T_NOTEQUAL2 : '!=' ;
|
||||||
T_GREATER : '>' ;
|
T_GREATER : '>' ;
|
||||||
@ -357,6 +361,8 @@ T_GREATEREQUAL : '>=' ;
|
|||||||
T_LESS : '<' ;
|
T_LESS : '<' ;
|
||||||
T_LESSEQUAL : '<=' ;
|
T_LESSEQUAL : '<=' ;
|
||||||
T_MUL : '*' ;
|
T_MUL : '*' ;
|
||||||
|
T_PRECENT : '%' ;
|
||||||
|
T_CALLS : '@' ;
|
||||||
T_OPEN_B : '{' ;
|
T_OPEN_B : '{' ;
|
||||||
T_OPEN_P : '(' ;
|
T_OPEN_P : '(' ;
|
||||||
T_OPEN_SB : '[' ;
|
T_OPEN_SB : '[' ;
|
||||||
|
@ -5,7 +5,9 @@
|
|||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
(the "License"); you may not use this file except in compliance with
|
(the "License"); you may not use this file except in compliance with
|
||||||
the License. You may obtain a copy of the License at
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
@ -13,14 +15,17 @@
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// HPL/SQL Procedural SQL Extension Grammar
|
parser grammar HiveSqlParser;
|
||||||
grammar HiveSql;
|
|
||||||
|
|
||||||
options {
|
options {
|
||||||
tokenVocab=HiveSqlLexer;
|
tokenVocab=HiveSqlLexer;
|
||||||
}
|
}
|
||||||
|
|
||||||
program: block EOF;
|
@parser::members {
|
||||||
|
this._input = input;
|
||||||
|
}
|
||||||
|
|
||||||
|
program : block EOF;
|
||||||
|
|
||||||
block : ((begin_end_block | stmt) T_GO?)+ ; // Multiple consecutive blocks/statements
|
block : ((begin_end_block | stmt) T_GO?)+ ; // Multiple consecutive blocks/statements
|
||||||
|
|
||||||
@ -34,7 +39,7 @@ single_block_stmt : // Single BEGIN END blo
|
|||||||
;
|
;
|
||||||
|
|
||||||
block_end :
|
block_end :
|
||||||
{!this._input.LT(2).getText().equalsIgnoreCase("TRANSACTION")}? T_END
|
{!this._input.LT(2).text.toUpperCase() === "TRANSACTION"}? T_END
|
||||||
;
|
;
|
||||||
|
|
||||||
proc_block :
|
proc_block :
|
||||||
@ -108,7 +113,7 @@ stmt :
|
|||||||
|
|
||||||
semicolon_stmt :
|
semicolon_stmt :
|
||||||
T_SEMICOLON
|
T_SEMICOLON
|
||||||
| '@' | '#' | '/'
|
| T_CALLS | T_SHARP | T_DIV
|
||||||
;
|
;
|
||||||
|
|
||||||
exception_block : // Exception block
|
exception_block : // Exception block
|
||||||
@ -124,7 +129,7 @@ null_stmt : // NULL statement (no operation)
|
|||||||
;
|
;
|
||||||
|
|
||||||
expr_stmt : // Standalone expression
|
expr_stmt : // Standalone expression
|
||||||
{!this._input.LT(1).getText().equalsIgnoreCase("GO")}? expr
|
{this._input.LT(1).text.toUpperCase() !== "GO"}? expr
|
||||||
;
|
;
|
||||||
|
|
||||||
assignment_stmt : // Assignment statement
|
assignment_stmt : // Assignment statement
|
||||||
@ -229,7 +234,7 @@ create_local_temp_table_stmt :
|
|||||||
;
|
;
|
||||||
|
|
||||||
create_table_definition :
|
create_table_definition :
|
||||||
(T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P | T_LIKE table_name) create_table_options?
|
(T_AS? T_OPEN_P select_stmt T_CLOSE_P | T_AS? select_stmt | T_OPEN_P create_table_columns T_CLOSE_P) create_table_options?
|
||||||
;
|
;
|
||||||
|
|
||||||
create_table_columns :
|
create_table_columns :
|
||||||
@ -365,7 +370,6 @@ alter_table_add_constraint_item :
|
|||||||
|
|
||||||
dtype : // Data types
|
dtype : // Data types
|
||||||
T_CHAR
|
T_CHAR
|
||||||
| T_CHARACTER
|
|
||||||
| T_BIGINT
|
| T_BIGINT
|
||||||
| T_BINARY_DOUBLE
|
| T_BINARY_DOUBLE
|
||||||
| T_BINARY_FLOAT
|
| T_BINARY_FLOAT
|
||||||
@ -401,7 +405,7 @@ dtype : // Data types
|
|||||||
| T_VARCHAR
|
| T_VARCHAR
|
||||||
| T_VARCHAR2
|
| T_VARCHAR2
|
||||||
| T_XML
|
| T_XML
|
||||||
| ident ('%' (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
|
| ident (T_PRECENT (T_TYPE | T_ROWTYPE))? // User-defined or derived data type
|
||||||
;
|
;
|
||||||
|
|
||||||
dtype_len : // Data type length or size specification
|
dtype_len : // Data type length or size specification
|
||||||
@ -471,9 +475,9 @@ create_procedure_stmt :
|
|||||||
create_routine_params :
|
create_routine_params :
|
||||||
T_OPEN_P T_CLOSE_P
|
T_OPEN_P T_CLOSE_P
|
||||||
| T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P
|
| T_OPEN_P create_routine_param_item (T_COMMA create_routine_param_item)* T_CLOSE_P
|
||||||
| {!this._input.LT(1).getText().equalsIgnoreCase("IS") &&
|
| {this._input.LT(1).text.toUpperCase() !== "IS" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("AS") &&
|
this._input.LT(1).text.toUpperCase() !== "AS" &&
|
||||||
!(this._input.LT(1).getText().equalsIgnoreCase("DYNAMIC") && this._input.LT(2).getText().equalsIgnoreCase("RESULT"))
|
!(this._input.LT(1).text.toUpperCase() ==="DYNAMIC" && this._input.LT(2).text.toUpperCase() === "RESULT")
|
||||||
}?
|
}?
|
||||||
create_routine_param_item (T_COMMA create_routine_param_item)*
|
create_routine_param_item (T_COMMA create_routine_param_item)*
|
||||||
;
|
;
|
||||||
@ -520,7 +524,7 @@ if_tsql_stmt :
|
|||||||
;
|
;
|
||||||
|
|
||||||
if_bteq_stmt :
|
if_bteq_stmt :
|
||||||
'.' T_IF bool_expr T_THEN single_block_stmt
|
T_DOT T_IF bool_expr T_THEN single_block_stmt
|
||||||
;
|
;
|
||||||
|
|
||||||
elseif_block :
|
elseif_block :
|
||||||
@ -675,7 +679,7 @@ print_stmt : // PRINT statement
|
|||||||
;
|
;
|
||||||
|
|
||||||
quit_stmt :
|
quit_stmt :
|
||||||
'.'? T_QUIT expr?
|
T_DOT? T_QUIT expr?
|
||||||
;
|
;
|
||||||
|
|
||||||
raise_stmt :
|
raise_stmt :
|
||||||
@ -811,7 +815,7 @@ select_list_item :
|
|||||||
;
|
;
|
||||||
|
|
||||||
select_list_alias :
|
select_list_alias :
|
||||||
{!this._input.LT(1).getText().equalsIgnoreCase("INTO") && !this._input.LT(1).getText().equalsIgnoreCase("FROM")}? T_AS? ident
|
{this._input.LT(1).text.toUpperCase() !== "INTO" && this._input.LT(1).text.toUpperCase() !== "FROM"}? T_AS? ident
|
||||||
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
|
| T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -861,14 +865,15 @@ from_table_values_row:
|
|||||||
;
|
;
|
||||||
|
|
||||||
from_alias_clause :
|
from_alias_clause :
|
||||||
{!this._input.LT(1).getText().equalsIgnoreCase("EXEC") &&
|
{this._input.LT(1).text.toUpperCase() !== "EXEC" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("EXECUTE") &&
|
this._input.LT(1).text.toUpperCase() !== "EXECUTE" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("INNER") &&
|
this._input.LT(1).text.toUpperCase() !== "INNER" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("LEFT") &&
|
this._input.LT(1).text.toUpperCase() !== "LEFT" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("GROUP") &&
|
this._input.LT(1).text.toUpperCase() !== "GROUP" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("ORDER") &&
|
this._input.LT(1).text.toUpperCase() !== "ORDER" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("LIMIT") &&
|
this._input.LT(1).text.toUpperCase() !== "LIMIT" &&
|
||||||
!this._input.LT(1).getText().equalsIgnoreCase("WITH")}?
|
this._input.LT(1).text.toUpperCase() !== "WITH" &&
|
||||||
|
this._input.LT(1).text.toUpperCase() !== "JOIN"}?
|
||||||
T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
|
T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)?
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -945,7 +950,7 @@ delete_stmt :
|
|||||||
;
|
;
|
||||||
|
|
||||||
delete_alias :
|
delete_alias :
|
||||||
{!this._input.LT(1).getText().equalsIgnoreCase("ALL")}?
|
{this._input.LT(1).text.toUpperCase() !== "ALL"}?
|
||||||
T_AS? ident
|
T_AS? ident
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -1070,7 +1075,7 @@ expr_case_searched :
|
|||||||
;
|
;
|
||||||
|
|
||||||
expr_cursor_attribute :
|
expr_cursor_attribute :
|
||||||
ident '%' (T_ISOPEN | T_FOUND | T_NOTFOUND)
|
ident T_PRECENT (T_ISOPEN | T_FOUND | T_NOTFOUND)
|
||||||
;
|
;
|
||||||
|
|
||||||
expr_agg_window_func :
|
expr_agg_window_func :
|
||||||
@ -1136,7 +1141,7 @@ expr_func_params :
|
|||||||
;
|
;
|
||||||
|
|
||||||
func_param :
|
func_param :
|
||||||
{!this._input.LT(1).getText().equalsIgnoreCase("INTO")}? (ident T_EQUAL T_GREATER?)? expr
|
{this._input.LT(1).text.toUpperCase() !== "INTO"}? (ident T_EQUAL T_GREATER?)? expr
|
||||||
;
|
;
|
||||||
|
|
||||||
expr_select :
|
expr_select :
|
||||||
@ -1160,7 +1165,7 @@ hive_item :
|
|||||||
;
|
;
|
||||||
|
|
||||||
host :
|
host :
|
||||||
'!' host_cmd ';' // OS command
|
T_NOTE host_cmd ';' // OS command
|
||||||
| host_stmt
|
| host_stmt
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -1173,7 +1178,7 @@ host_stmt :
|
|||||||
;
|
;
|
||||||
|
|
||||||
file_name :
|
file_name :
|
||||||
L_FILE | ('/' | '.' '/')? ident ('/' ident)*
|
L_FILE | ('/' | T_DOT '/')? ident ('/' ident)*
|
||||||
;
|
;
|
||||||
|
|
||||||
date_literal : // DATE 'YYYY-MM-DD' literal
|
date_literal : // DATE 'YYYY-MM-DD' literal
|
||||||
@ -1185,7 +1190,7 @@ timestamp_literal : // TIMESTAMP 'YYYY-MM-DD HH:MI:SS.FFF'
|
|||||||
;
|
;
|
||||||
|
|
||||||
ident :
|
ident :
|
||||||
'-'? (L_ID | non_reserved_words) ('.' (L_ID | non_reserved_words))*
|
(L_ID | non_reserved_words) (T_DOT (L_ID | non_reserved_words))*
|
||||||
;
|
;
|
||||||
|
|
||||||
string : // String literal (single or double quoted)
|
string : // String literal (single or double quoted)
|
||||||
@ -1538,3 +1543,4 @@ non_reserved_words : // Tokens that are not reserved words
|
|||||||
| T_XML
|
| T_XML
|
||||||
| T_YES
|
| T_YES
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user