feat: migrate to antlr4ng (#267)

* feat: replace antlr4ts with antlr4ng

* feat: switch caseInsensitive option on

* feat: recompile all g4 file

* feat:  update parser to fit antlr4ng

* test: update test to fit antlr4ng
This commit is contained in:
Hayden
2024-02-26 20:25:09 +08:00
committed by GitHub
parent 5ce89cb421
commit 195878da9b
112 changed files with 648433 additions and 659067 deletions

View File

@ -5,6 +5,10 @@
lexer grammar FlinkSqlLexer;
options {
caseInsensitive= true;
}
// SKIP
SPACE : [ \t\r\n]+ -> channel(HIDDEN);
@ -585,9 +589,9 @@ ID_LITERAL : ID_LITERAL_FRAG;
fragment JAR_FILE_PARTTARN : '`' ( '\\' . | '``' | ~('`' | '\\'))* '`';
fragment EXPONENT_NUM_PART : 'E' [-+]? DEC_DIGIT+;
fragment ID_LITERAL_FRAG : [A-Z_0-9a-z]*? [A-Z_a-z]+? [A-Z_0-9a-z]*;
fragment ID_LITERAL_FRAG : [A-Z_0-9]*? [A-Z_]+? [A-Z_0-9]*;
fragment DEC_DIGIT : [0-9];
fragment DEC_LETTER : [A-Za-z];
fragment DEC_LETTER : [A-Z];
fragment DQUOTA_STRING : '"' ( '\\' . | '""' | ~('"' | '\\'))* '"';
fragment SQUOTA_STRING : '\'' ('\\' . | '\'\'' | ~('\'' | '\\'))* '\'';
fragment BIT_STRING_L : 'B' '\'' [01]+ '\'';

View File

@ -6,6 +6,7 @@ parser grammar FlinkSqlParser;
options {
tokenVocab=FlinkSqlLexer;
caseInsensitive= true;
}
program

View File

@ -27,8 +27,9 @@
lexer grammar HiveSqlLexer;
// unsupported option caseInsensitive in antlr4@4.9
// options { caseInsensitive = true; }
options {
caseInsensitive= true;
}
// Keywords
KW_ABORT : 'ABORT';
@ -502,7 +503,7 @@ Identifier: (Letter | Digit) (Letter | Digit | '_')* | QuotedIdentifier | '`' Re
fragment QuotedIdentifier: '`' ('``' | ~'`')* '`';
fragment Letter: 'A' ..'Z' | 'a' ..'z';
fragment Letter: 'A' ..'Z';
fragment HexDigit: 'A' ..'F';

View File

@ -29,6 +29,7 @@ parser grammar HiveSqlParser;
options
{
tokenVocab=HiveSqlLexer;
caseInsensitive= true;
}
program

View File

@ -27,7 +27,7 @@ THE SOFTWARE.
lexer grammar ImpalaSqlLexer;
options {
caseInsensitive=true;
caseInsensitive= true;
}
KW_ADD : 'ADD';

View File

@ -21,6 +21,7 @@ parser grammar ImpalaSqlParser;
options
{
tokenVocab=ImpalaSqlLexer;
caseInsensitive= true;
}
program
@ -873,7 +874,7 @@ booleanExpression
| left=booleanExpression operator=KW_OR right=booleanExpression # logicalBinary
;
predicate[ParserRuleContext value]
predicate[antlr.ParserRuleContext value]
: comparisonOperator right=valueExpression # comparison
| comparisonOperator comparisonQuantifier subQueryRelation # quantifiedComparison
| KW_NOT? KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression # between

View File

@ -35,6 +35,7 @@ parser grammar MySqlParser;
options {
tokenVocab= MySqlLexer;
caseInsensitive= true;
}
// Top Level Description

View File

@ -36,6 +36,10 @@
lexer grammar PostgreSQLLexer;
options {
caseInsensitive= true;
}
/**
* Reference Doc: https://www.postgresql.org/docs/16.1/sql-commands.html
*/
@ -673,9 +677,9 @@ KW_BUFFER_USAGE_LIMIT : 'BUFFER_USAGE_LIMIT';
Identifier: IdentifierStartChar IdentifierChar*;
fragment IdentifierStartChar: // these are the valid identifier start characters below 0x7F
[a-zA-Z_]
[A-Z_]
| // these are the valid characters from 0x80 to 0xFF
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00F8-\u00FF]
| // these are the letters above 0xFF which only need a single UTF-16 code unit
[\u0100-\uD7FF\uE000-\uFFFF]
| // letters which require multiple UTF-16 code units
@ -771,7 +775,7 @@ InvalidUnterminatedBinaryStringConstant: 'B' UnterminatedStringConstant;
HexadecimalStringConstant: UnterminatedHexadecimalStringConstant '\'';
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9a-fA-F]*;
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9A-F]*;
InvalidHexadecimalStringConstant: InvalidUnterminatedHexadecimalStringConstant '\'';
@ -791,7 +795,7 @@ Numeric:
fragment Digits: [0-9]+;
PLSQLVARIABLENAME: ':' [a-zA-Z_] [a-zA-Z_0-9$]*;
PLSQLVARIABLENAME: ':' [A-Z_] [A-Z_0-9$]*;
PLSQLIDENTIFIER: ':"' ('\\' . | '""' | ~ ('"' | '\\'))* '"';
//
@ -861,13 +865,13 @@ fragment EscapeStringText: (
'\'\''
| '\\' (
// two-digit hex escapes are still valid when treated as single-digit escapes
'x' [0-9a-fA-F]
| 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
| 'U' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
'x' [0-9A-F]
| 'u' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
| 'U' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
|
// Any character other than the Unicode escapes can follow a backslash. Some have
// special meaning, but that doesn't affect the syntax.
~ [xuU]
~ [xu]
)
| ~ ['\\]
)*;

View File

@ -41,6 +41,7 @@ parser grammar PostgreSQLParser;
options {
tokenVocab= PostgreSQLLexer;
caseInsensitive= true;
}
program

View File

@ -2366,7 +2366,7 @@ BIT_STRING_LIT: 'B' ('\'' [01]* '\'')+;
// Rule #284 <HEX_STRING_LIT> - subtoken typecast in <REGULAR_ID>
// Lowercase 'x' is a usual addition to the standard
HEX_STRING_LIT : 'X' ('\'' [A-Fa-f0-9]* '\'')+;
HEX_STRING_LIT : 'X' ('\'' [A-F0-9]* '\'')+;
DOUBLE_PERIOD : '..';
PERIOD : '.';
@ -2468,10 +2468,8 @@ REMARK_COMMENT:
PROMPT_MESSAGE: 'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
// TODO: should starts with newline
START_CMD
//: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF
: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
START_CMD: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*;
@ -2481,7 +2479,7 @@ SPACES: [ \t\r\n]+ -> channel(HIDDEN);
fragment NEWLINE_EOF : NEWLINE | EOF;
fragment QUESTION_MARK : '?';
fragment SIMPLE_LETTER : [a-zA-Z];
fragment SIMPLE_LETTER : [A-Z];
fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+;
fragment NEWLINE : '\r'? '\n';
fragment SPACE : [ \t];

View File

@ -32,6 +32,7 @@ parser grammar PlSqlParser;
options {
tokenVocab=PlSqlLexer;
superClass=PlSqlBaseParser;
caseInsensitive= true;
}
@parser::header {

View File

@ -25,6 +25,10 @@
lexer grammar SparkSqlLexer;
options {
caseInsensitive= true;
}
@members {
/**
* When true, parser should throw ParseException for unclosed bracketed comment.
@ -469,7 +473,7 @@ fragment EXPONENT: 'E' [+-]? DIGIT+;
fragment DIGIT: [0-9];
fragment LETTER: [A-Za-z];
fragment LETTER: [A-Z];
SIMPLE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);

View File

@ -26,6 +26,7 @@ parser grammar SparkSqlParser;
options {
tokenVocab=SparkSqlLexer;
caseInsensitive= true;
}
program

View File

@ -23,6 +23,10 @@
grammar TrinoSql;
options {
caseInsensitive= true;
}
tokens {
DELIMITER
}
@ -419,7 +423,7 @@ booleanExpression
;
// workaround for https://github.com/antlr/antlr4/issues/780
predicate[ParserRuleContext value]
predicate[antlr.ParserRuleContext value]
: comparisonOperator right= valueExpression # comparison
| comparisonOperator comparisonQuantifier '(' query ')' # quantifiedComparison
| KW_NOT? KW_BETWEEN lower= valueExpression KW_AND upper= valueExpression # between
@ -1231,7 +1235,7 @@ fragment EXPONENT: 'E' [+-]? DIGIT+;
fragment DIGIT: [0-9];
fragment LETTER: [A-Za-z];
fragment LETTER: [A-Z];
SIMPLE_COMMENT: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN);