feat: migrate to antlr4ng (#267)
* feat: replace antlr4ts with antlr4ng * feat: switch caseInsensitive option on * feat: recompile all g4 file * feat: update parser to fit antlr4ng * test: update test to fit antlr4ng
This commit is contained in:
@ -5,6 +5,10 @@
|
||||
|
||||
lexer grammar FlinkSqlLexer;
|
||||
|
||||
options {
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
// SKIP
|
||||
|
||||
SPACE : [ \t\r\n]+ -> channel(HIDDEN);
|
||||
@ -585,9 +589,9 @@ ID_LITERAL : ID_LITERAL_FRAG;
|
||||
|
||||
fragment JAR_FILE_PARTTARN : '`' ( '\\' . | '``' | ~('`' | '\\'))* '`';
|
||||
fragment EXPONENT_NUM_PART : 'E' [-+]? DEC_DIGIT+;
|
||||
fragment ID_LITERAL_FRAG : [A-Z_0-9a-z]*? [A-Z_a-z]+? [A-Z_0-9a-z]*;
|
||||
fragment ID_LITERAL_FRAG : [A-Z_0-9]*? [A-Z_]+? [A-Z_0-9]*;
|
||||
fragment DEC_DIGIT : [0-9];
|
||||
fragment DEC_LETTER : [A-Za-z];
|
||||
fragment DEC_LETTER : [A-Z];
|
||||
fragment DQUOTA_STRING : '"' ( '\\' . | '""' | ~('"' | '\\'))* '"';
|
||||
fragment SQUOTA_STRING : '\'' ('\\' . | '\'\'' | ~('\'' | '\\'))* '\'';
|
||||
fragment BIT_STRING_L : 'B' '\'' [01]+ '\'';
|
||||
|
@ -6,6 +6,7 @@ parser grammar FlinkSqlParser;
|
||||
|
||||
options {
|
||||
tokenVocab=FlinkSqlLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
program
|
||||
|
@ -27,8 +27,9 @@
|
||||
|
||||
lexer grammar HiveSqlLexer;
|
||||
|
||||
// unsupported option caseInsensitive in antlr4@4.9
|
||||
// options { caseInsensitive = true; }
|
||||
options {
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
// Keywords
|
||||
KW_ABORT : 'ABORT';
|
||||
@ -502,7 +503,7 @@ Identifier: (Letter | Digit) (Letter | Digit | '_')* | QuotedIdentifier | '`' Re
|
||||
|
||||
fragment QuotedIdentifier: '`' ('``' | ~'`')* '`';
|
||||
|
||||
fragment Letter: 'A' ..'Z' | 'a' ..'z';
|
||||
fragment Letter: 'A' ..'Z';
|
||||
|
||||
fragment HexDigit: 'A' ..'F';
|
||||
|
||||
|
@ -29,6 +29,7 @@ parser grammar HiveSqlParser;
|
||||
options
|
||||
{
|
||||
tokenVocab=HiveSqlLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
program
|
||||
|
@ -27,7 +27,7 @@ THE SOFTWARE.
|
||||
lexer grammar ImpalaSqlLexer;
|
||||
|
||||
options {
|
||||
caseInsensitive=true;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
KW_ADD : 'ADD';
|
||||
|
@ -21,6 +21,7 @@ parser grammar ImpalaSqlParser;
|
||||
options
|
||||
{
|
||||
tokenVocab=ImpalaSqlLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
program
|
||||
@ -873,7 +874,7 @@ booleanExpression
|
||||
| left=booleanExpression operator=KW_OR right=booleanExpression # logicalBinary
|
||||
;
|
||||
|
||||
predicate[ParserRuleContext value]
|
||||
predicate[antlr.ParserRuleContext value]
|
||||
: comparisonOperator right=valueExpression # comparison
|
||||
| comparisonOperator comparisonQuantifier subQueryRelation # quantifiedComparison
|
||||
| KW_NOT? KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression # between
|
||||
|
@ -35,6 +35,7 @@ parser grammar MySqlParser;
|
||||
|
||||
options {
|
||||
tokenVocab= MySqlLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
// Top Level Description
|
||||
|
@ -36,6 +36,10 @@
|
||||
|
||||
lexer grammar PostgreSQLLexer;
|
||||
|
||||
options {
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reference Doc: https://www.postgresql.org/docs/16.1/sql-commands.html
|
||||
*/
|
||||
@ -673,9 +677,9 @@ KW_BUFFER_USAGE_LIMIT : 'BUFFER_USAGE_LIMIT';
|
||||
Identifier: IdentifierStartChar IdentifierChar*;
|
||||
|
||||
fragment IdentifierStartChar: // these are the valid identifier start characters below 0x7F
|
||||
[a-zA-Z_]
|
||||
[A-Z_]
|
||||
| // these are the valid characters from 0x80 to 0xFF
|
||||
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
|
||||
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00F8-\u00FF]
|
||||
| // these are the letters above 0xFF which only need a single UTF-16 code unit
|
||||
[\u0100-\uD7FF\uE000-\uFFFF]
|
||||
| // letters which require multiple UTF-16 code units
|
||||
@ -771,7 +775,7 @@ InvalidUnterminatedBinaryStringConstant: 'B' UnterminatedStringConstant;
|
||||
|
||||
HexadecimalStringConstant: UnterminatedHexadecimalStringConstant '\'';
|
||||
|
||||
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9a-fA-F]*;
|
||||
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9A-F]*;
|
||||
|
||||
InvalidHexadecimalStringConstant: InvalidUnterminatedHexadecimalStringConstant '\'';
|
||||
|
||||
@ -791,7 +795,7 @@ Numeric:
|
||||
|
||||
fragment Digits: [0-9]+;
|
||||
|
||||
PLSQLVARIABLENAME: ':' [a-zA-Z_] [a-zA-Z_0-9$]*;
|
||||
PLSQLVARIABLENAME: ':' [A-Z_] [A-Z_0-9$]*;
|
||||
|
||||
PLSQLIDENTIFIER: ':"' ('\\' . | '""' | ~ ('"' | '\\'))* '"';
|
||||
//
|
||||
@ -861,13 +865,13 @@ fragment EscapeStringText: (
|
||||
'\'\''
|
||||
| '\\' (
|
||||
// two-digit hex escapes are still valid when treated as single-digit escapes
|
||||
'x' [0-9a-fA-F]
|
||||
| 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
|
||||
| 'U' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
|
||||
'x' [0-9A-F]
|
||||
| 'u' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
|
||||
| 'U' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
|
||||
|
|
||||
// Any character other than the Unicode escapes can follow a backslash. Some have
|
||||
// special meaning, but that doesn't affect the syntax.
|
||||
~ [xuU]
|
||||
~ [xu]
|
||||
)
|
||||
| ~ ['\\]
|
||||
)*;
|
||||
|
@ -41,6 +41,7 @@ parser grammar PostgreSQLParser;
|
||||
|
||||
options {
|
||||
tokenVocab= PostgreSQLLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
program
|
||||
|
@ -2366,7 +2366,7 @@ BIT_STRING_LIT: 'B' ('\'' [01]* '\'')+;
|
||||
// Rule #284 <HEX_STRING_LIT> - subtoken typecast in <REGULAR_ID>
|
||||
// Lowercase 'x' is a usual addition to the standard
|
||||
|
||||
HEX_STRING_LIT : 'X' ('\'' [A-Fa-f0-9]* '\'')+;
|
||||
HEX_STRING_LIT : 'X' ('\'' [A-F0-9]* '\'')+;
|
||||
DOUBLE_PERIOD : '..';
|
||||
PERIOD : '.';
|
||||
|
||||
@ -2468,10 +2468,8 @@ REMARK_COMMENT:
|
||||
PROMPT_MESSAGE: 'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
|
||||
|
||||
// TODO: should starts with newline
|
||||
START_CMD
|
||||
//: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF
|
||||
: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
|
||||
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
|
||||
START_CMD: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
|
||||
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
|
||||
|
||||
REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*;
|
||||
|
||||
@ -2481,7 +2479,7 @@ SPACES: [ \t\r\n]+ -> channel(HIDDEN);
|
||||
|
||||
fragment NEWLINE_EOF : NEWLINE | EOF;
|
||||
fragment QUESTION_MARK : '?';
|
||||
fragment SIMPLE_LETTER : [a-zA-Z];
|
||||
fragment SIMPLE_LETTER : [A-Z];
|
||||
fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+;
|
||||
fragment NEWLINE : '\r'? '\n';
|
||||
fragment SPACE : [ \t];
|
@ -32,6 +32,7 @@ parser grammar PlSqlParser;
|
||||
options {
|
||||
tokenVocab=PlSqlLexer;
|
||||
superClass=PlSqlBaseParser;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
@parser::header {
|
||||
|
@ -25,6 +25,10 @@
|
||||
|
||||
lexer grammar SparkSqlLexer;
|
||||
|
||||
options {
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
@members {
|
||||
/**
|
||||
* When true, parser should throw ParseException for unclosed bracketed comment.
|
||||
@ -469,7 +473,7 @@ fragment EXPONENT: 'E' [+-]? DIGIT+;
|
||||
|
||||
fragment DIGIT: [0-9];
|
||||
|
||||
fragment LETTER: [A-Za-z];
|
||||
fragment LETTER: [A-Z];
|
||||
|
||||
SIMPLE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);
|
||||
|
||||
|
@ -26,6 +26,7 @@ parser grammar SparkSqlParser;
|
||||
|
||||
options {
|
||||
tokenVocab=SparkSqlLexer;
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
program
|
||||
|
@ -23,6 +23,10 @@
|
||||
|
||||
grammar TrinoSql;
|
||||
|
||||
options {
|
||||
caseInsensitive= true;
|
||||
}
|
||||
|
||||
tokens {
|
||||
DELIMITER
|
||||
}
|
||||
@ -419,7 +423,7 @@ booleanExpression
|
||||
;
|
||||
|
||||
// workaround for https://github.com/antlr/antlr4/issues/780
|
||||
predicate[ParserRuleContext value]
|
||||
predicate[antlr.ParserRuleContext value]
|
||||
: comparisonOperator right= valueExpression # comparison
|
||||
| comparisonOperator comparisonQuantifier '(' query ')' # quantifiedComparison
|
||||
| KW_NOT? KW_BETWEEN lower= valueExpression KW_AND upper= valueExpression # between
|
||||
@ -1231,7 +1235,7 @@ fragment EXPONENT: 'E' [+-]? DIGIT+;
|
||||
|
||||
fragment DIGIT: [0-9];
|
||||
|
||||
fragment LETTER: [A-Za-z];
|
||||
fragment LETTER: [A-Z];
|
||||
|
||||
SIMPLE_COMMENT: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN);
|
||||
|
||||
|
Reference in New Issue
Block a user