From 340ccae7d702c2fd479799e3ddd7b15ae37a6bd7 Mon Sep 17 00:00:00 2001
From: Hayden <hayden9653@gmail.com>
Date: Wed, 29 Nov 2023 14:58:24 +0800
Subject: [PATCH] feat: deprecate legacy util (#223)

* feat: deprecate legacy util

* test: legacy utils unit test
---
 src/index.ts             |  6 ++-
 src/utils/index.ts       | 93 +++++++++++++++++++---------------------
 src/utils/token.ts       | 29 +++++++------
 test/utils/index.test.ts | 22 +++++-----
 4 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index 9104a84..68ca30c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,5 +1,4 @@
 export * from './parser';
-export * from './utils';
 export * from './lib/flinksql/FlinkSqlParserListener';
 export * from './lib/flinksql/FlinkSqlParserVisitor';
 export * from './lib/mysql/MySqlParserVisitor';
@@ -20,3 +19,8 @@ export { SyntaxContextType } from './parser/common/basic-parser-types';
 
 export type * from './parser/common/basic-parser-types';
 export type { SyntaxError, ParseError, ErrorHandler } from './parser/common/parseErrorListener';
+
+/**
+ * @deprecated legacy, will be removed.
+ */
+export * from './utils';
diff --git a/src/utils/index.ts b/src/utils/index.ts
index 12bfb1d..9ee2a6c 100644
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -1,20 +1,19 @@
-import { TokenType, Token, TokenReg } from './token';
+import { Legacy_TokenType, Legacy_Token, Legacy_TokenReg } from './token';
 
 /**
- * 获取 注释 以及 分隔符 等词法信息
  * @param {String} sql
+ * @deprecated use parser.createLexer() instead.
  */
-function lexer(input: string): Token[] {
-    // 记录当前字符的位置
+function legacy_lexer(input: string): Legacy_Token[] {
     let current = 0;
     let line = 1;
-    // 最终的 TokenTypes 结果
-    const tokens: Token[] = [];
+    const tokens: Legacy_Token[] = [];
 
-    /**
-     * 提取 TokenType
-     */
-    const extract = (currentChar: string, validator: RegExp, TokenType: TokenType): Token => {
+    const extract = (
+        currentChar: string,
+        validator: RegExp,
+        TokenType: Legacy_TokenType
+    ): Legacy_Token => {
         let value = '';
         const start = current;
         while (validator.test(currentChar)) {
@@ -30,9 +29,6 @@ function lexer(input: string): Token[] {
         };
     };
 
-    /**
-     * 过滤函数内容
-     */
     const matchFunction = () => {
         const bracketNum = [current];
         for (let i = current + 1; i < input.length; i++) {
@@ -40,16 +36,16 @@ function lexer(input: string): Token[] {
             if (currentChar === '\n') {
                 line++;
             }
-            if (TokenReg.LeftSmallBracket.test(currentChar)) {
+            if (Legacy_TokenReg.LeftSmallBracket.test(currentChar)) {
                 bracketNum.push(i);
             }
-            if (TokenReg.RightSmallBracket.test(currentChar)) {
+            if (Legacy_TokenReg.RightSmallBracket.test(currentChar)) {
                 const start = bracketNum.pop();
                 const end = i + 1;
                 if (bracketNum.length === 0) {
                     current = end;
                     tokens.push({
-                        type: TokenType.FunctionArguments,
+                        type: Legacy_TokenType.FunctionArguments,
                         value: input.slice(start, end),
                         start,
                         lineNumber: line,
@@ -61,10 +57,11 @@ function lexer(input: string): Token[] {
         }
     };
 
-    /**
-     * 过滤（提取） 引号中的内容
-     */
-    const matchQuotation = (currentChar: string, validator: RegExp, TokenType: TokenType) => {
+    const matchQuotation = (
+        currentChar: string,
+        validator: RegExp,
+        TokenType: Legacy_TokenType
+    ) => {
         do {
             if (currentChar === '\n') {
                 line++;
@@ -78,36 +75,32 @@ function lexer(input: string): Token[] {
     while (current < input.length) {
         let char = input[current];
 
-        // 按顺序处理 括号函数 换行符 反引号 单引号 双引号 注释 分号
-        // 引号内 可能包含注释包含的符号以及分号 所以优先处理引号里面的内容 去除干扰信息
-
         if (char === '\n') {
             line++;
             current++;
             continue;
         }
 
-        if (TokenReg.LeftSmallBracket.test(char)) {
+        if (Legacy_TokenReg.LeftSmallBracket.test(char)) {
             matchFunction();
             continue;
         }
 
-        if (TokenReg.BackQuotation.test(char)) {
-            matchQuotation(char, TokenReg.BackQuotation, TokenType.BackQuotation);
+        if (Legacy_TokenReg.BackQuotation.test(char)) {
+            matchQuotation(char, Legacy_TokenReg.BackQuotation, Legacy_TokenType.BackQuotation);
             continue;
         }
 
-        if (TokenReg.SingleQuotation.test(char)) {
-            matchQuotation(char, TokenReg.SingleQuotation, TokenType.SingleQuotation);
+        if (Legacy_TokenReg.SingleQuotation.test(char)) {
+            matchQuotation(char, Legacy_TokenReg.SingleQuotation, Legacy_TokenType.SingleQuotation);
             continue;
         }
 
-        if (TokenReg.DoubleQuotation.test(char)) {
-            matchQuotation(char, TokenReg.DoubleQuotation, TokenType.DoubleQuotation);
+        if (Legacy_TokenReg.DoubleQuotation.test(char)) {
+            matchQuotation(char, Legacy_TokenReg.DoubleQuotation, Legacy_TokenType.DoubleQuotation);
             continue;
         }
 
-        // 处理单行注释，以--开始，\n 结束
         if (char === '-' && input[current + 1] === '-') {
             let value = '';
             const start = current;
@@ -117,7 +110,7 @@ function lexer(input: string): Token[] {
                 char = input[++current];
             }
             tokens.push({
-                type: TokenType.Comment,
+                type: Legacy_TokenType.Comment,
                 value,
                 start: start,
                 lineNumber: line,
@@ -126,7 +119,6 @@ function lexer(input: string): Token[] {
             continue;
         }
 
-        // 处理多行注释，以 /* 开始， */结束
         if (char === '/' && input[current + 1] === '*') {
             let value = '';
             const start = current;
@@ -143,7 +135,7 @@ function lexer(input: string): Token[] {
             ++current;
 
             tokens.push({
-                type: TokenType.Comment,
+                type: Legacy_TokenType.Comment,
                 value,
                 start: start,
                 lineNumber: startLine,
@@ -152,12 +144,11 @@ function lexer(input: string): Token[] {
             continue;
         }
 
-        // 处理结束符 ;
-        if (TokenReg.StatementTerminator.test(char)) {
+        if (Legacy_TokenReg.StatementTerminator.test(char)) {
             const newToken = extract(
                 char,
-                TokenReg.StatementTerminator,
-                TokenType.StatementTerminator
+                Legacy_TokenReg.StatementTerminator,
+                Legacy_TokenType.StatementTerminator
             );
             tokens.push(newToken);
             continue;
@@ -169,15 +160,16 @@ function lexer(input: string): Token[] {
 }
 
 /**
- * 分割sql
+ * split sql
  * @param {String} sql
+ * @deprecated use parser.splitSQLByStatement() instead.
  */
-function splitSql(sql: string) {
-    const tokens = lexer(sql);
+function legacy_splitSql(sql: string) {
+    const tokens = legacy_lexer(sql);
     const sqlArr = [];
     let startIndex = 0;
-    tokens.forEach((ele: Token) => {
-        if (ele.type === TokenType.StatementTerminator) {
+    tokens.forEach((ele: Legacy_Token) => {
+        if (ele.type === Legacy_TokenType.StatementTerminator) {
             sqlArr.push(sql.slice(startIndex, ele.end));
             startIndex = ele.end;
         }
@@ -189,16 +181,17 @@ function splitSql(sql: string) {
 }
 
 /**
- * 清除注释和前后空格
+ * clean comment
  * @param {String} sql
+ * @deprecated will be removed in future.
  */
-function cleanSql(sql: string) {
-    sql = sql.trim(); // 删除前后空格
-    const tokens = lexer(sql);
+function legacy_cleanSql(sql: string) {
+    sql = sql.trim();
+    const tokens = legacy_lexer(sql);
     let resultSql = '';
     let startIndex = 0;
-    tokens.forEach((ele: Token) => {
-        if (ele.type === TokenType.Comment) {
+    tokens.forEach((ele: Legacy_Token) => {
+        if (ele.type === Legacy_TokenType.Comment) {
             resultSql += sql.slice(startIndex, ele.start);
             startIndex = ele.end + 1;
         }
@@ -206,4 +199,4 @@ function cleanSql(sql: string) {
     resultSql += sql.slice(startIndex);
     return resultSql;
 }
-export { cleanSql, splitSql, lexer };
+export { legacy_cleanSql, legacy_splitSql, legacy_lexer };
diff --git a/src/utils/token.ts b/src/utils/token.ts
index 43045bc..9c6fc83 100755
--- a/src/utils/token.ts
+++ b/src/utils/token.ts
@@ -1,4 +1,7 @@
-export enum TokenType {
+/**
+ * @deprecated will be removed in future.
+ */
+export enum Legacy_TokenType {
     /**
      * Enclosed in single/double/back quotation, `` Symbol
      * 'abc', "abc", `abc`
@@ -32,10 +35,11 @@ export enum TokenType {
 }
 
 /**
+ * @deprecated will be removed in future.
  * Token object
  */
-export interface Token {
-    type: TokenType;
+export interface Legacy_Token {
+    type: Legacy_TokenType;
     value: string;
     start: number;
     end: number;
@@ -44,14 +48,15 @@ export interface Token {
 }
 
 /**
- *  Token recognition rules
+ * @deprecated will be removed in future.
+ * Token recognition rules
  */
-export const TokenReg = {
-    [TokenType.StatementTerminator]: /[;]/,
-    [TokenType.SingleQuotation]: /['|\']/,
-    [TokenType.DoubleQuotation]: /["]/,
-    [TokenType.BackQuotation]: /[`]/,
-    [TokenType.LeftSmallBracket]: /[(]/,
-    [TokenType.RightSmallBracket]: /[)]/,
-    [TokenType.Comma]: /[,]/,
+export const Legacy_TokenReg = {
+    [Legacy_TokenType.StatementTerminator]: /[;]/,
+    [Legacy_TokenType.SingleQuotation]: /['|\']/,
+    [Legacy_TokenType.DoubleQuotation]: /["]/,
+    [Legacy_TokenType.BackQuotation]: /[`]/,
+    [Legacy_TokenType.LeftSmallBracket]: /[(]/,
+    [Legacy_TokenType.RightSmallBracket]: /[)]/,
+    [Legacy_TokenType.Comma]: /[,]/,
 };
diff --git a/test/utils/index.test.ts b/test/utils/index.test.ts
index 0bad874..c57c97e 100644
--- a/test/utils/index.test.ts
+++ b/test/utils/index.test.ts
@@ -1,10 +1,10 @@
-import { lexer, splitSql, cleanSql } from '../../src/utils';
-import { TokenType } from '../../src/utils/token';
+import { legacy_lexer, legacy_splitSql, legacy_cleanSql } from '../../src/utils';
+import { Legacy_TokenType } from '../../src/utils/token';
 
 describe('utils', () => {
     test('split single sql', () => {
         const sql = 'select id,name from user';
-        const result = splitSql(sql);
+        const result = legacy_splitSql(sql);
         expect(result.length).toEqual(1);
     });
 
@@ -16,14 +16,14 @@ describe('utils', () => {
             xxx
         */
         select user from b`;
-        const result = splitSql(sql);
+        const result = legacy_splitSql(sql);
         expect(result.length).toEqual(2);
     });
 
     test('split special quotation sql', () => {
         const sql = `select regexp_replace('a', 'bc', 'xfe'feefe', '233'); 
             select regexp_replace('abc', "fe", '233');`;
-        const result = splitSql(sql);
+        const result = legacy_splitSql(sql);
         expect(result.length).toEqual(2);
     });
 
@@ -45,7 +45,7 @@ describe('utils', () => {
         FROM cte_sales_amounts
         WHERE year = 2018;
         SELECT * FROM table;`;
-        const result = splitSql(sql);
+        const result = legacy_splitSql(sql);
         expect(result.length).toEqual(2);
     });
 
@@ -57,14 +57,14 @@ describe('utils', () => {
             xxx
         */
         select user from b;`;
-        const result = lexer(sql);
+        const result = legacy_lexer(sql);
         expect(result.length).toEqual(4);
     });
     test('lexer for comments', () => {
         const sql = `select * from a;--comments`;
         const expected = `--comments`;
-        const result = lexer(sql);
-        const comments = result.find((token) => token.type === TokenType.Comment);
+        const result = legacy_lexer(sql);
+        const comments = result.find((token) => token.type === Legacy_TokenType.Comment);
         expect(comments?.value).toEqual(expected);
     });
     test('cleanSql', () => {
@@ -75,7 +75,7 @@ describe('utils', () => {
             xxx
         */
         select user from b`;
-        const result = cleanSql(sql);
+        const result = legacy_cleanSql(sql);
         expect(result.indexOf('xxx')).toEqual(-1);
     });
 
@@ -83,7 +83,7 @@ describe('utils', () => {
         const sql = `   
         select * from a;    `;
         const expected = 'select * from a;';
-        const result = cleanSql(sql);
+        const result = legacy_cleanSql(sql);
         expect(result).toEqual(expected);
     });
 });