feat: deprecate legacy util (#223)
* feat: deprecate legacy util * test: legacy utils unit test
This commit is contained in:
		| @ -1,5 +1,4 @@ | |||||||
| export * from './parser'; | export * from './parser'; | ||||||
| export * from './utils'; |  | ||||||
| export * from './lib/flinksql/FlinkSqlParserListener'; | export * from './lib/flinksql/FlinkSqlParserListener'; | ||||||
| export * from './lib/flinksql/FlinkSqlParserVisitor'; | export * from './lib/flinksql/FlinkSqlParserVisitor'; | ||||||
| export * from './lib/mysql/MySqlParserVisitor'; | export * from './lib/mysql/MySqlParserVisitor'; | ||||||
| @ -20,3 +19,8 @@ export { SyntaxContextType } from './parser/common/basic-parser-types'; | |||||||
|  |  | ||||||
| export type * from './parser/common/basic-parser-types'; | export type * from './parser/common/basic-parser-types'; | ||||||
| export type { SyntaxError, ParseError, ErrorHandler } from './parser/common/parseErrorListener'; | export type { SyntaxError, ParseError, ErrorHandler } from './parser/common/parseErrorListener'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @deprecated legacy, will be removed. | ||||||
|  |  */ | ||||||
|  | export * from './utils'; | ||||||
|  | |||||||
| @ -1,20 +1,19 @@ | |||||||
| import { TokenType, Token, TokenReg } from './token'; | import { Legacy_TokenType, Legacy_Token, Legacy_TokenReg } from './token'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * 获取 注释 以及 分隔符 等词法信息 |  | ||||||
|  * @param {String} sql |  * @param {String} sql | ||||||
|  |  * @deprecated use parser.createLexer() instead. | ||||||
|  */ |  */ | ||||||
| function lexer(input: string): Token[] { | function legacy_lexer(input: string): Legacy_Token[] { | ||||||
|     // 记录当前字符的位置 |  | ||||||
|     let current = 0; |     let current = 0; | ||||||
|     let line = 1; |     let line = 1; | ||||||
|     // 最终的 TokenTypes 结果 |     const tokens: Legacy_Token[] = []; | ||||||
|     const tokens: Token[] = []; |  | ||||||
|  |  | ||||||
|     /** |     const extract = ( | ||||||
|      * 提取 TokenType |         currentChar: string, | ||||||
|      */ |         validator: RegExp, | ||||||
|     const extract = (currentChar: string, validator: RegExp, TokenType: TokenType): Token => { |         TokenType: Legacy_TokenType | ||||||
|  |     ): Legacy_Token => { | ||||||
|         let value = ''; |         let value = ''; | ||||||
|         const start = current; |         const start = current; | ||||||
|         while (validator.test(currentChar)) { |         while (validator.test(currentChar)) { | ||||||
| @ -30,9 +29,6 @@ function lexer(input: string): Token[] { | |||||||
|         }; |         }; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * 过滤函数内容 |  | ||||||
|      */ |  | ||||||
|     const matchFunction = () => { |     const matchFunction = () => { | ||||||
|         const bracketNum = [current]; |         const bracketNum = [current]; | ||||||
|         for (let i = current + 1; i < input.length; i++) { |         for (let i = current + 1; i < input.length; i++) { | ||||||
| @ -40,16 +36,16 @@ function lexer(input: string): Token[] { | |||||||
|             if (currentChar === '\n') { |             if (currentChar === '\n') { | ||||||
|                 line++; |                 line++; | ||||||
|             } |             } | ||||||
|             if (TokenReg.LeftSmallBracket.test(currentChar)) { |             if (Legacy_TokenReg.LeftSmallBracket.test(currentChar)) { | ||||||
|                 bracketNum.push(i); |                 bracketNum.push(i); | ||||||
|             } |             } | ||||||
|             if (TokenReg.RightSmallBracket.test(currentChar)) { |             if (Legacy_TokenReg.RightSmallBracket.test(currentChar)) { | ||||||
|                 const start = bracketNum.pop(); |                 const start = bracketNum.pop(); | ||||||
|                 const end = i + 1; |                 const end = i + 1; | ||||||
|                 if (bracketNum.length === 0) { |                 if (bracketNum.length === 0) { | ||||||
|                     current = end; |                     current = end; | ||||||
|                     tokens.push({ |                     tokens.push({ | ||||||
|                         type: TokenType.FunctionArguments, |                         type: Legacy_TokenType.FunctionArguments, | ||||||
|                         value: input.slice(start, end), |                         value: input.slice(start, end), | ||||||
|                         start, |                         start, | ||||||
|                         lineNumber: line, |                         lineNumber: line, | ||||||
| @ -61,10 +57,11 @@ function lexer(input: string): Token[] { | |||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     /** |     const matchQuotation = ( | ||||||
|      * 过滤(提取) 引号中的内容 |         currentChar: string, | ||||||
|      */ |         validator: RegExp, | ||||||
|     const matchQuotation = (currentChar: string, validator: RegExp, TokenType: TokenType) => { |         TokenType: Legacy_TokenType | ||||||
|  |     ) => { | ||||||
|         do { |         do { | ||||||
|             if (currentChar === '\n') { |             if (currentChar === '\n') { | ||||||
|                 line++; |                 line++; | ||||||
| @ -78,36 +75,32 @@ function lexer(input: string): Token[] { | |||||||
|     while (current < input.length) { |     while (current < input.length) { | ||||||
|         let char = input[current]; |         let char = input[current]; | ||||||
|  |  | ||||||
|         // 按顺序处理 括号函数 换行符 反引号 单引号 双引号 注释 分号 |  | ||||||
|         // 引号内 可能包含注释包含的符号以及分号 所以优先处理引号里面的内容 去除干扰信息 |  | ||||||
|  |  | ||||||
|         if (char === '\n') { |         if (char === '\n') { | ||||||
|             line++; |             line++; | ||||||
|             current++; |             current++; | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (TokenReg.LeftSmallBracket.test(char)) { |         if (Legacy_TokenReg.LeftSmallBracket.test(char)) { | ||||||
|             matchFunction(); |             matchFunction(); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (TokenReg.BackQuotation.test(char)) { |         if (Legacy_TokenReg.BackQuotation.test(char)) { | ||||||
|             matchQuotation(char, TokenReg.BackQuotation, TokenType.BackQuotation); |             matchQuotation(char, Legacy_TokenReg.BackQuotation, Legacy_TokenType.BackQuotation); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (TokenReg.SingleQuotation.test(char)) { |         if (Legacy_TokenReg.SingleQuotation.test(char)) { | ||||||
|             matchQuotation(char, TokenReg.SingleQuotation, TokenType.SingleQuotation); |             matchQuotation(char, Legacy_TokenReg.SingleQuotation, Legacy_TokenType.SingleQuotation); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (TokenReg.DoubleQuotation.test(char)) { |         if (Legacy_TokenReg.DoubleQuotation.test(char)) { | ||||||
|             matchQuotation(char, TokenReg.DoubleQuotation, TokenType.DoubleQuotation); |             matchQuotation(char, Legacy_TokenReg.DoubleQuotation, Legacy_TokenType.DoubleQuotation); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // 处理单行注释,以--开始,\n 结束 |  | ||||||
|         if (char === '-' && input[current + 1] === '-') { |         if (char === '-' && input[current + 1] === '-') { | ||||||
|             let value = ''; |             let value = ''; | ||||||
|             const start = current; |             const start = current; | ||||||
| @ -117,7 +110,7 @@ function lexer(input: string): Token[] { | |||||||
|                 char = input[++current]; |                 char = input[++current]; | ||||||
|             } |             } | ||||||
|             tokens.push({ |             tokens.push({ | ||||||
|                 type: TokenType.Comment, |                 type: Legacy_TokenType.Comment, | ||||||
|                 value, |                 value, | ||||||
|                 start: start, |                 start: start, | ||||||
|                 lineNumber: line, |                 lineNumber: line, | ||||||
| @ -126,7 +119,6 @@ function lexer(input: string): Token[] { | |||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // 处理多行注释,以 /* 开始, */结束 |  | ||||||
|         if (char === '/' && input[current + 1] === '*') { |         if (char === '/' && input[current + 1] === '*') { | ||||||
|             let value = ''; |             let value = ''; | ||||||
|             const start = current; |             const start = current; | ||||||
| @ -143,7 +135,7 @@ function lexer(input: string): Token[] { | |||||||
|             ++current; |             ++current; | ||||||
|  |  | ||||||
|             tokens.push({ |             tokens.push({ | ||||||
|                 type: TokenType.Comment, |                 type: Legacy_TokenType.Comment, | ||||||
|                 value, |                 value, | ||||||
|                 start: start, |                 start: start, | ||||||
|                 lineNumber: startLine, |                 lineNumber: startLine, | ||||||
| @ -152,12 +144,11 @@ function lexer(input: string): Token[] { | |||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // 处理结束符 ; |         if (Legacy_TokenReg.StatementTerminator.test(char)) { | ||||||
|         if (TokenReg.StatementTerminator.test(char)) { |  | ||||||
|             const newToken = extract( |             const newToken = extract( | ||||||
|                 char, |                 char, | ||||||
|                 TokenReg.StatementTerminator, |                 Legacy_TokenReg.StatementTerminator, | ||||||
|                 TokenType.StatementTerminator |                 Legacy_TokenType.StatementTerminator | ||||||
|             ); |             ); | ||||||
|             tokens.push(newToken); |             tokens.push(newToken); | ||||||
|             continue; |             continue; | ||||||
| @ -169,15 +160,16 @@ function lexer(input: string): Token[] { | |||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * 分割sql |  * split sql | ||||||
|  * @param {String} sql |  * @param {String} sql | ||||||
|  |  * @deprecated use parser.splitSQLByStatement() instead. | ||||||
|  */ |  */ | ||||||
| function splitSql(sql: string) { | function legacy_splitSql(sql: string) { | ||||||
|     const tokens = lexer(sql); |     const tokens = legacy_lexer(sql); | ||||||
|     const sqlArr = []; |     const sqlArr = []; | ||||||
|     let startIndex = 0; |     let startIndex = 0; | ||||||
|     tokens.forEach((ele: Token) => { |     tokens.forEach((ele: Legacy_Token) => { | ||||||
|         if (ele.type === TokenType.StatementTerminator) { |         if (ele.type === Legacy_TokenType.StatementTerminator) { | ||||||
|             sqlArr.push(sql.slice(startIndex, ele.end)); |             sqlArr.push(sql.slice(startIndex, ele.end)); | ||||||
|             startIndex = ele.end; |             startIndex = ele.end; | ||||||
|         } |         } | ||||||
| @ -189,16 +181,17 @@ function splitSql(sql: string) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * 清除注释和前后空格 |  * clean comment | ||||||
|  * @param {String} sql |  * @param {String} sql | ||||||
|  |  * @deprecated will be removed in future. | ||||||
|  */ |  */ | ||||||
| function cleanSql(sql: string) { | function legacy_cleanSql(sql: string) { | ||||||
|     sql = sql.trim(); // 删除前后空格 |     sql = sql.trim(); | ||||||
|     const tokens = lexer(sql); |     const tokens = legacy_lexer(sql); | ||||||
|     let resultSql = ''; |     let resultSql = ''; | ||||||
|     let startIndex = 0; |     let startIndex = 0; | ||||||
|     tokens.forEach((ele: Token) => { |     tokens.forEach((ele: Legacy_Token) => { | ||||||
|         if (ele.type === TokenType.Comment) { |         if (ele.type === Legacy_TokenType.Comment) { | ||||||
|             resultSql += sql.slice(startIndex, ele.start); |             resultSql += sql.slice(startIndex, ele.start); | ||||||
|             startIndex = ele.end + 1; |             startIndex = ele.end + 1; | ||||||
|         } |         } | ||||||
| @ -206,4 +199,4 @@ function cleanSql(sql: string) { | |||||||
|     resultSql += sql.slice(startIndex); |     resultSql += sql.slice(startIndex); | ||||||
|     return resultSql; |     return resultSql; | ||||||
| } | } | ||||||
| export { cleanSql, splitSql, lexer }; | export { legacy_cleanSql, legacy_splitSql, legacy_lexer }; | ||||||
|  | |||||||
| @ -1,4 +1,7 @@ | |||||||
| export enum TokenType { | /** | ||||||
|  |  * @deprecated will be removed in future. | ||||||
|  |  */ | ||||||
|  | export enum Legacy_TokenType { | ||||||
|     /** |     /** | ||||||
|      * Enclosed in single/double/back quotation, `` Symbol |      * Enclosed in single/double/back quotation, `` Symbol | ||||||
|      * 'abc', "abc", `abc` |      * 'abc', "abc", `abc` | ||||||
| @ -32,10 +35,11 @@ export enum TokenType { | |||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  |  * @deprecated will be removed in future. | ||||||
|  * Token object |  * Token object | ||||||
|  */ |  */ | ||||||
| export interface Token { | export interface Legacy_Token { | ||||||
|     type: TokenType; |     type: Legacy_TokenType; | ||||||
|     value: string; |     value: string; | ||||||
|     start: number; |     start: number; | ||||||
|     end: number; |     end: number; | ||||||
| @ -44,14 +48,15 @@ export interface Token { | |||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  *  Token recognition rules |  * @deprecated will be removed in future. | ||||||
|  |  * Token recognition rules | ||||||
|  */ |  */ | ||||||
| export const TokenReg = { | export const Legacy_TokenReg = { | ||||||
|     [TokenType.StatementTerminator]: /[;]/, |     [Legacy_TokenType.StatementTerminator]: /[;]/, | ||||||
|     [TokenType.SingleQuotation]: /['|\']/, |     [Legacy_TokenType.SingleQuotation]: /['|\']/, | ||||||
|     [TokenType.DoubleQuotation]: /["]/, |     [Legacy_TokenType.DoubleQuotation]: /["]/, | ||||||
|     [TokenType.BackQuotation]: /[`]/, |     [Legacy_TokenType.BackQuotation]: /[`]/, | ||||||
|     [TokenType.LeftSmallBracket]: /[(]/, |     [Legacy_TokenType.LeftSmallBracket]: /[(]/, | ||||||
|     [TokenType.RightSmallBracket]: /[)]/, |     [Legacy_TokenType.RightSmallBracket]: /[)]/, | ||||||
|     [TokenType.Comma]: /[,]/, |     [Legacy_TokenType.Comma]: /[,]/, | ||||||
| }; | }; | ||||||
|  | |||||||
| @ -1,10 +1,10 @@ | |||||||
| import { lexer, splitSql, cleanSql } from '../../src/utils'; | import { legacy_lexer, legacy_splitSql, legacy_cleanSql } from '../../src/utils'; | ||||||
| import { TokenType } from '../../src/utils/token'; | import { Legacy_TokenType } from '../../src/utils/token'; | ||||||
|  |  | ||||||
| describe('utils', () => { | describe('utils', () => { | ||||||
|     test('split single sql', () => { |     test('split single sql', () => { | ||||||
|         const sql = 'select id,name from user'; |         const sql = 'select id,name from user'; | ||||||
|         const result = splitSql(sql); |         const result = legacy_splitSql(sql); | ||||||
|         expect(result.length).toEqual(1); |         expect(result.length).toEqual(1); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
| @ -16,14 +16,14 @@ describe('utils', () => { | |||||||
|             xxx |             xxx | ||||||
|         */ |         */ | ||||||
|         select user from b`; |         select user from b`; | ||||||
|         const result = splitSql(sql); |         const result = legacy_splitSql(sql); | ||||||
|         expect(result.length).toEqual(2); |         expect(result.length).toEqual(2); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     test('split special quotation sql', () => { |     test('split special quotation sql', () => { | ||||||
|         const sql = `select regexp_replace('a', 'bc', 'xfe'feefe', '233');  |         const sql = `select regexp_replace('a', 'bc', 'xfe'feefe', '233');  | ||||||
|             select regexp_replace('abc', "fe", '233');`; |             select regexp_replace('abc', "fe", '233');`; | ||||||
|         const result = splitSql(sql); |         const result = legacy_splitSql(sql); | ||||||
|         expect(result.length).toEqual(2); |         expect(result.length).toEqual(2); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
| @ -45,7 +45,7 @@ describe('utils', () => { | |||||||
|         FROM cte_sales_amounts |         FROM cte_sales_amounts | ||||||
|         WHERE year = 2018; |         WHERE year = 2018; | ||||||
|         SELECT * FROM table;`; |         SELECT * FROM table;`; | ||||||
|         const result = splitSql(sql); |         const result = legacy_splitSql(sql); | ||||||
|         expect(result.length).toEqual(2); |         expect(result.length).toEqual(2); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
| @ -57,14 +57,14 @@ describe('utils', () => { | |||||||
|             xxx |             xxx | ||||||
|         */ |         */ | ||||||
|         select user from b;`; |         select user from b;`; | ||||||
|         const result = lexer(sql); |         const result = legacy_lexer(sql); | ||||||
|         expect(result.length).toEqual(4); |         expect(result.length).toEqual(4); | ||||||
|     }); |     }); | ||||||
|     test('lexer for comments', () => { |     test('lexer for comments', () => { | ||||||
|         const sql = `select * from a;--comments`; |         const sql = `select * from a;--comments`; | ||||||
|         const expected = `--comments`; |         const expected = `--comments`; | ||||||
|         const result = lexer(sql); |         const result = legacy_lexer(sql); | ||||||
|         const comments = result.find((token) => token.type === TokenType.Comment); |         const comments = result.find((token) => token.type === Legacy_TokenType.Comment); | ||||||
|         expect(comments?.value).toEqual(expected); |         expect(comments?.value).toEqual(expected); | ||||||
|     }); |     }); | ||||||
|     test('cleanSql', () => { |     test('cleanSql', () => { | ||||||
| @ -75,7 +75,7 @@ describe('utils', () => { | |||||||
|             xxx |             xxx | ||||||
|         */ |         */ | ||||||
|         select user from b`; |         select user from b`; | ||||||
|         const result = cleanSql(sql); |         const result = legacy_cleanSql(sql); | ||||||
|         expect(result.indexOf('xxx')).toEqual(-1); |         expect(result.indexOf('xxx')).toEqual(-1); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
| @ -83,7 +83,7 @@ describe('utils', () => { | |||||||
|         const sql = `    |         const sql = `    | ||||||
|         select * from a;    `; |         select * from a;    `; | ||||||
|         const expected = 'select * from a;'; |         const expected = 'select * from a;'; | ||||||
|         const result = cleanSql(sql); |         const result = legacy_cleanSql(sql); | ||||||
|         expect(result).toEqual(expected); |         expect(result).toEqual(expected); | ||||||
|     }); |     }); | ||||||
| }); | }); | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user