diff --git a/src/index.ts b/src/index.ts index 9104a84..68ca30c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,4 @@ export * from './parser'; -export * from './utils'; export * from './lib/flinksql/FlinkSqlParserListener'; export * from './lib/flinksql/FlinkSqlParserVisitor'; export * from './lib/mysql/MySqlParserVisitor'; @@ -20,3 +19,8 @@ export { SyntaxContextType } from './parser/common/basic-parser-types'; export type * from './parser/common/basic-parser-types'; export type { SyntaxError, ParseError, ErrorHandler } from './parser/common/parseErrorListener'; + +/** + * @deprecated legacy, will be removed. + */ +export * from './utils'; diff --git a/src/utils/index.ts b/src/utils/index.ts index 12bfb1d..9ee2a6c 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,20 +1,19 @@ -import { TokenType, Token, TokenReg } from './token'; +import { Legacy_TokenType, Legacy_Token, Legacy_TokenReg } from './token'; /** - * 获取 注释 以及 分隔符 等词法信息 * @param {String} sql + * @deprecated use parser.createLexer() instead. */ -function lexer(input: string): Token[] { - // 记录当前字符的位置 +function legacy_lexer(input: string): Legacy_Token[] { let current = 0; let line = 1; - // 最终的 TokenTypes 结果 - const tokens: Token[] = []; + const tokens: Legacy_Token[] = []; - /** - * 提取 TokenType - */ - const extract = (currentChar: string, validator: RegExp, TokenType: TokenType): Token => { + const extract = ( + currentChar: string, + validator: RegExp, + TokenType: Legacy_TokenType + ): Legacy_Token => { let value = ''; const start = current; while (validator.test(currentChar)) { @@ -30,9 +29,6 @@ function lexer(input: string): Token[] { }; }; - /** - * 过滤函数内容 - */ const matchFunction = () => { const bracketNum = [current]; for (let i = current + 1; i < input.length; i++) { @@ -40,16 +36,16 @@ function lexer(input: string): Token[] { if (currentChar === '\n') { line++; } - if (TokenReg.LeftSmallBracket.test(currentChar)) { + if (Legacy_TokenReg.LeftSmallBracket.test(currentChar)) { bracketNum.push(i); } - if (TokenReg.RightSmallBracket.test(currentChar)) { + if (Legacy_TokenReg.RightSmallBracket.test(currentChar)) { const start = bracketNum.pop(); const end = i + 1; if (bracketNum.length === 0) { current = end; tokens.push({ - type: TokenType.FunctionArguments, + type: Legacy_TokenType.FunctionArguments, value: input.slice(start, end), start, lineNumber: line, @@ -61,10 +57,11 @@ function lexer(input: string): Token[] { } }; - /** - * 过滤(提取) 引号中的内容 - */ - const matchQuotation = (currentChar: string, validator: RegExp, TokenType: TokenType) => { + const matchQuotation = ( + currentChar: string, + validator: RegExp, + TokenType: Legacy_TokenType + ) => { do { if (currentChar === '\n') { line++; @@ -78,36 +75,32 @@ function lexer(input: string): Token[] { while (current < input.length) { let char = input[current]; - // 按顺序处理 括号函数 换行符 反引号 单引号 双引号 注释 分号 - // 引号内 可能包含注释包含的符号以及分号 所以优先处理引号里面的内容 去除干扰信息 - if (char === '\n') { line++; current++; continue; } - if (TokenReg.LeftSmallBracket.test(char)) { + if (Legacy_TokenReg.LeftSmallBracket.test(char)) { matchFunction(); continue; } - if (TokenReg.BackQuotation.test(char)) { - matchQuotation(char, TokenReg.BackQuotation, TokenType.BackQuotation); + if (Legacy_TokenReg.BackQuotation.test(char)) { + matchQuotation(char, Legacy_TokenReg.BackQuotation, Legacy_TokenType.BackQuotation); continue; } - if (TokenReg.SingleQuotation.test(char)) { - matchQuotation(char, TokenReg.SingleQuotation, TokenType.SingleQuotation); + if (Legacy_TokenReg.SingleQuotation.test(char)) { + matchQuotation(char, Legacy_TokenReg.SingleQuotation, Legacy_TokenType.SingleQuotation); continue; } - if (TokenReg.DoubleQuotation.test(char)) { - matchQuotation(char, TokenReg.DoubleQuotation, TokenType.DoubleQuotation); + if (Legacy_TokenReg.DoubleQuotation.test(char)) { + matchQuotation(char, Legacy_TokenReg.DoubleQuotation, Legacy_TokenType.DoubleQuotation); continue; } - // 处理单行注释,以--开始,\n 结束 if (char === '-' && input[current + 1] === '-') { let value = ''; const start = current; @@ -117,7 +110,7 @@ function lexer(input: string): Token[] { char = input[++current]; } tokens.push({ - type: TokenType.Comment, + type: Legacy_TokenType.Comment, value, start: start, lineNumber: line, @@ -126,7 +119,6 @@ function lexer(input: string): Token[] { continue; } - // 处理多行注释,以 /* 开始, */结束 if (char === '/' && input[current + 1] === '*') { let value = ''; const start = current; @@ -143,7 +135,7 @@ function lexer(input: string): Token[] { ++current; tokens.push({ - type: TokenType.Comment, + type: Legacy_TokenType.Comment, value, start: start, lineNumber: startLine, @@ -152,12 +144,11 @@ function lexer(input: string): Token[] { continue; } - // 处理结束符 ; - if (TokenReg.StatementTerminator.test(char)) { + if (Legacy_TokenReg.StatementTerminator.test(char)) { const newToken = extract( char, - TokenReg.StatementTerminator, - TokenType.StatementTerminator + Legacy_TokenReg.StatementTerminator, + Legacy_TokenType.StatementTerminator ); tokens.push(newToken); continue; @@ -169,15 +160,16 @@ function lexer(input: string): Token[] { } /** - * 分割sql + * split sql * @param {String} sql + * @deprecated use parser.splitSQLByStatement() instead. */ -function splitSql(sql: string) { - const tokens = lexer(sql); +function legacy_splitSql(sql: string) { + const tokens = legacy_lexer(sql); const sqlArr = []; let startIndex = 0; - tokens.forEach((ele: Token) => { - if (ele.type === TokenType.StatementTerminator) { + tokens.forEach((ele: Legacy_Token) => { + if (ele.type === Legacy_TokenType.StatementTerminator) { sqlArr.push(sql.slice(startIndex, ele.end)); startIndex = ele.end; } @@ -189,16 +181,17 @@ function splitSql(sql: string) { } /** - * 清除注释和前后空格 + * clean comment * @param {String} sql + * @deprecated will be removed in future. */ -function cleanSql(sql: string) { - sql = sql.trim(); // 删除前后空格 - const tokens = lexer(sql); +function legacy_cleanSql(sql: string) { + sql = sql.trim(); + const tokens = legacy_lexer(sql); let resultSql = ''; let startIndex = 0; - tokens.forEach((ele: Token) => { - if (ele.type === TokenType.Comment) { + tokens.forEach((ele: Legacy_Token) => { + if (ele.type === Legacy_TokenType.Comment) { resultSql += sql.slice(startIndex, ele.start); startIndex = ele.end + 1; } @@ -206,4 +199,4 @@ function cleanSql(sql: string) { resultSql += sql.slice(startIndex); return resultSql; } -export { cleanSql, splitSql, lexer }; +export { legacy_cleanSql, legacy_splitSql, legacy_lexer }; diff --git a/src/utils/token.ts b/src/utils/token.ts index 43045bc..9c6fc83 100755 --- a/src/utils/token.ts +++ b/src/utils/token.ts @@ -1,4 +1,7 @@ -export enum TokenType { +/** + * @deprecated will be removed in future. + */ +export enum Legacy_TokenType { /** * Enclosed in single/double/back quotation, `` Symbol * 'abc', "abc", `abc` @@ -32,10 +35,11 @@ export enum TokenType { } /** + * @deprecated will be removed in future. * Token object */ -export interface Token { - type: TokenType; +export interface Legacy_Token { + type: Legacy_TokenType; value: string; start: number; end: number; @@ -44,14 +48,15 @@ export interface Token { } /** - * Token recognition rules + * @deprecated will be removed in future. + * Token recognition rules */ -export const TokenReg = { - [TokenType.StatementTerminator]: /[;]/, - [TokenType.SingleQuotation]: /['|\']/, - [TokenType.DoubleQuotation]: /["]/, - [TokenType.BackQuotation]: /[`]/, - [TokenType.LeftSmallBracket]: /[(]/, - [TokenType.RightSmallBracket]: /[)]/, - [TokenType.Comma]: /[,]/, +export const Legacy_TokenReg = { + [Legacy_TokenType.StatementTerminator]: /[;]/, + [Legacy_TokenType.SingleQuotation]: /['|\']/, + [Legacy_TokenType.DoubleQuotation]: /["]/, + [Legacy_TokenType.BackQuotation]: /[`]/, + [Legacy_TokenType.LeftSmallBracket]: /[(]/, + [Legacy_TokenType.RightSmallBracket]: /[)]/, + [Legacy_TokenType.Comma]: /[,]/, }; diff --git a/test/utils/index.test.ts b/test/utils/index.test.ts index 0bad874..c57c97e 100644 --- a/test/utils/index.test.ts +++ b/test/utils/index.test.ts @@ -1,10 +1,10 @@ -import { lexer, splitSql, cleanSql } from '../../src/utils'; -import { TokenType } from '../../src/utils/token'; +import { legacy_lexer, legacy_splitSql, legacy_cleanSql } from '../../src/utils'; +import { Legacy_TokenType } from '../../src/utils/token'; describe('utils', () => { test('split single sql', () => { const sql = 'select id,name from user'; - const result = splitSql(sql); + const result = legacy_splitSql(sql); expect(result.length).toEqual(1); }); @@ -16,14 +16,14 @@ describe('utils', () => { xxx */ select user from b`; - const result = splitSql(sql); + const result = legacy_splitSql(sql); expect(result.length).toEqual(2); }); test('split special quotation sql', () => { const sql = `select regexp_replace('a', 'bc', 'xfe'feefe', '233'); select regexp_replace('abc', "fe", '233');`; - const result = splitSql(sql); + const result = legacy_splitSql(sql); expect(result.length).toEqual(2); }); @@ -45,7 +45,7 @@ describe('utils', () => { FROM cte_sales_amounts WHERE year = 2018; SELECT * FROM table;`; - const result = splitSql(sql); + const result = legacy_splitSql(sql); expect(result.length).toEqual(2); }); @@ -57,14 +57,14 @@ describe('utils', () => { xxx */ select user from b;`; - const result = lexer(sql); + const result = legacy_lexer(sql); expect(result.length).toEqual(4); }); test('lexer for comments', () => { const sql = `select * from a;--comments`; const expected = `--comments`; - const result = lexer(sql); - const comments = result.find((token) => token.type === TokenType.Comment); + const result = legacy_lexer(sql); + const comments = result.find((token) => token.type === Legacy_TokenType.Comment); expect(comments?.value).toEqual(expected); }); test('cleanSql', () => { @@ -75,7 +75,7 @@ describe('utils', () => { xxx */ select user from b`; - const result = cleanSql(sql); + const result = legacy_cleanSql(sql); expect(result.indexOf('xxx')).toEqual(-1); }); @@ -83,7 +83,7 @@ describe('utils', () => { const sql = ` select * from a; `; const expected = 'select * from a;'; - const result = cleanSql(sql); + const result = legacy_cleanSql(sql); expect(result).toEqual(expected); }); });