From 7d6c753d824cfb8e3808132356a2c78bda81795c Mon Sep 17 00:00:00 2001 From: Erindcl Date: Wed, 9 Dec 2020 17:59:24 +0800 Subject: [PATCH] =?UTF-8?q?feat(utils):=20add=20cleanSql=E3=80=81splitSql?= =?UTF-8?q?=E3=80=81lexer=20func=20and=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/index.ts | 291 ++++++++++++++++++++++++--------------- src/utils/token.ts | 46 +++++++ test/utils/index.test.ts | 81 +++++------ 3 files changed, 261 insertions(+), 157 deletions(-) create mode 100755 src/utils/token.ts diff --git a/src/utils/index.ts b/src/utils/index.ts index f73d0ab..0d97949 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,125 +1,190 @@ -function replaceStrFormIndexArr(str, replaceStr, indexArr) { - let result = ''; - let index = 0; - if (!indexArr || indexArr.length < 1) { - return str; - } - for (let i = 0; i < indexArr.length; i++) { - const indexItem = indexArr[i]; - const begin = indexItem.begin; +import { TokenType, Token, TokenReg } from './token'; - result = result + str.substring(index, begin) + replaceStr; - index = indexItem.end + 1; +/** + * 获取 注释 以及 分隔符 等词法信息 + * @param {String} sql + */ +function lexer(input: string): Token[] { + // 记录当前字符的位置 + let current = 0; + let line = 1; + // 最终的 TokenTypes 结果 + const tokens: Token[] = []; - if (i == indexArr.length - 1) { - result = result + str.substring(index); + /** + * 提取 TokenType + */ + // eslint-disable-next-line + const extract = (currentChar: string, validator: RegExp, TokenType: TokenType): Token => { + let value = ''; + const start = current; + while (validator.test(currentChar)) { + value += currentChar; + currentChar = input[++current]; } - } - - return result; -} -function splitSql(sql: string) { - let haveEnd = true; - if (!sql.endsWith(';')) { - sql += ';'; - haveEnd = false; - } - interface splitParser { - index: number; - queue: string; - sqls: number[]; - } - function pushSql(parser: splitParser, sql: string) { - if (!haveEnd && parser.index == sql.length - 1) { - parser.sqls.push(parser.index - 1); - parser.queue = ''; - } else { - parser.sqls.push(parser.index); - parser.queue = ''; - } - } - // 处理引号 - function quoteToken(parser: splitParser, sql: string): string { - const queue = parser.queue; - const endsWith = queue[queue.length - 1]; - if (endsWith == '\'' || endsWith == '"') { - const nextToken = sql.indexOf(endsWith, parser.index + 1); - if (nextToken != -1) { - parser.index = nextToken; - parser.queue = ''; - } else { - parser.index = sql.length - 1; - } - } else { - return null; - } - } - // 处理单行注释 - function singleLineCommentToken(parser: splitParser, sql: string): string { - let queue = parser.queue; - if (queue.endsWith('--')) { - const nextToken = sql.indexOf('\n', parser.index + 1); - if (nextToken != -1) { - parser.index = nextToken; - queue = ''; - } else { - parser.index = sql.length - 1; - } - } else { - return null; - } - } - // 处理多行注释 - function multipleLineCommentToken( - parser: splitParser, sql: string, - ): string { - const queue = parser.queue; - if (queue.endsWith('/*')) { - const nextToken = sql.indexOf('*/', parser.index + 1); - if (nextToken != -1) { - parser.index = nextToken + 1; - parser.queue = ''; - } else { - parser.index = sql.length - 1; - parser.queue = ''; - } - } else { - return null; - } - } - function splitToken(parser: splitParser, sql: string): string { - const queue = parser.queue; - if (queue.endsWith(';')) { - pushSql(parser, sql); - } else { - return null; - } - } - const parser: splitParser = { - index: 0, - queue: '', - sqls: [], + return { + type: TokenType, + start: start, + end: current, + lineNumber: line, + value: value, + }; }; - for (parser.index = 0; parser.index < sql.length; parser.index++) { - const char = sql[parser.index]; - parser.queue += char; - const tokenFuncs = [ - quoteToken, - singleLineCommentToken, - multipleLineCommentToken, - splitToken, - ]; - for (let i = 0; i < tokenFuncs.length; i++) { - tokenFuncs[i](parser, sql); + + /** + * 过滤(提取) 引号中的内容 + */ + // eslint-disable-next-line + const matchQuotation = (currentChar: string, validator: RegExp, TokenType: TokenType) => { + // let value = ''; + // let start = current; + // let startLine = line; + + do { + if (currentChar === '\n') { + line++; + } + // value += currentChar; + currentChar = input[++current]; + } while (!validator.test(currentChar)); + + // value += currentChar; + ++current; + // console.log(TokenType, value, start, startLine, current) + }; + + while (current < input.length) { + let char = input[current]; + + // 按顺序处理 换行符 反引号 单引号 双引号 注释 分号 + // 引号内 可能包含注释包含的符号以及分号 所以优先处理引号里面的内容 去除干扰信息 + + if (char === '\n') { + line++; + current++; + continue; } - if (parser.index == sql.length - 1 && parser.queue) { - pushSql(parser, sql); + + if (TokenReg.BackQuotation.test(char)) { + // eslint-disable-next-line + matchQuotation(char, TokenReg.BackQuotation, TokenType.BackQuotation); + continue; } + + if (TokenReg.SingleQuotation.test(char)) { + // eslint-disable-next-line + matchQuotation(char, TokenReg.SingleQuotation, TokenType.SingleQuotation); + continue; + } + + if (TokenReg.DoubleQuotation.test(char)) { + // eslint-disable-next-line + matchQuotation(char, TokenReg.DoubleQuotation, TokenType.DoubleQuotation); + continue; + } + + // 处理单行注释,以--开始,\n 结束 + if (char === '-' && input[current + 1] === '-') { + let value = ''; + const start = current; + + while (char !== '\n') { + value += char; + char = input[++current]; + } + tokens.push({ + type: TokenType.Comment, + value, + start: start, + lineNumber: line, + end: current, + }); + continue; + } + + // 处理多行注释,以 /* 开始, */结束 + if (char === '/' && input[current + 1] === '*') { + let value = ''; + const start = current; + const startLine = line; + + while (!(char === '/' && input[current - 1] === '*')) { + if (char === '\n') { + line++; + } + value += char; + char = input[++current]; + } + value += char; + ++current; + + tokens.push({ + type: TokenType.Comment, + value, + start: start, + lineNumber: startLine, + end: current, + }); + continue; + } + + // 处理结束符 ; + if (TokenReg.StatementTerminator.test(char)) { + const newToken = extract( + char, + TokenReg.StatementTerminator, + TokenType.StatementTerminator, + ); + tokens.push(newToken); + continue; + } + + current++; } - return parser.sqls; + return tokens; +} + +/** + * 分割sql + * @param {String} sql + */ +function splitSql(sql: string) { + const tokens = lexer(sql); + const sqlArr = []; + let startIndex = 0; + tokens.forEach((ele: Token) => { + if (ele.type === TokenType.StatementTerminator) { + sqlArr.push(sql.slice(startIndex, ele.end)); + startIndex = ele.end; + } + }); + if (startIndex < sql.length) { + sqlArr.push(sql.slice(startIndex)); + } + return sqlArr; +} + +/** + * 清除注释和前后空格 + * @param {String} sql + */ +function cleanSql(sql: string) { + sql.trim(); // 删除前后空格 + const tokens = lexer(sql); + let resultSql = ''; + let startIndex = 0; + tokens.forEach((ele: Token) => { + if (ele.type === TokenType.Comment) { + resultSql += sql.slice(startIndex, ele.start); + startIndex = ele.end + 1; + } + }); + resultSql += sql.slice(startIndex); + return resultSql; } export { - replaceStrFormIndexArr, + cleanSql, splitSql, + lexer, }; diff --git a/src/utils/token.ts b/src/utils/token.ts new file mode 100755 index 0000000..490b58d --- /dev/null +++ b/src/utils/token.ts @@ -0,0 +1,46 @@ +export enum TokenType { + /** + * Enclosed in single/double/back quotation, `` Symbol + * 'abc', "abc", `abc` + */ + SingleQuotation = 'SingleQuotation', + DoubleQuotation = 'DoubleQuotation', + BackQuotation = 'BackQuotation', + + /** + * Language element type + */ + Comment = 'Comment', + + /** + * Statement + */ + StatementTerminator = 'StatementTerminator', + + /** + * Others + */ + Error = 'Error' +} + +/** + * Token object + */ +export interface Token { + type: TokenType, + value: string; + start: number; + end: number; + lineNumber: number; + message?: string; +} + +/** +* Token recognition rules +*/ +export const TokenReg = { + [TokenType.StatementTerminator]: /[;]/, + [TokenType.SingleQuotation]: /[']/, + [TokenType.DoubleQuotation]: /["]/, + [TokenType.BackQuotation]: /[`]/, +}; diff --git a/test/utils/index.test.ts b/test/utils/index.test.ts index 8e221ab..dc931d2 100644 --- a/test/utils/index.test.ts +++ b/test/utils/index.test.ts @@ -1,48 +1,41 @@ import * as utils from '../../src/utils'; describe('utils', () => { - describe('split sql', () => { - test('single', () => { - let sql = 'select id,name from user'; - let result = utils.splitSql(sql); - expect(result).toEqual([sql.length - 1]); - sql += ';'; - result = utils.splitSql(sql); - expect(result).toEqual([sql.length - 1]); - }); - test('multiple', () => { - const sql = `-- a ; - select * from a; - select user from b`; - const result = utils.splitSql(sql); - expect(result).toEqual([34, 65]); - }); - test('error sql', () => { - const sql = `CREATE TABLE MyResult( - a double, - b timestamp, - c timestamp - )WITH( - type ='mysql, - url ='jdbc:mysql://1.1.1.1:3306/hi?charset=utf8', - userName ='name', - password ='123', - tableName ='user' - );`; - const result = utils.splitSql(sql); - expect(result).toEqual([337]); - const sql2 = `CREATE TABLE MyResult( - a double, - b timestamp, - c timestamp - )WITH( - type ='mysql, - url ='jdbc:mysql://1.1.1.1:3306/hi?charset=utf8', - userName ='name', - password ='123', - tableName ='user' - )`; - const result2 = utils.splitSql(sql2); - expect(result2).toEqual([336]); - }); + test('split single sql', () => { + const sql = 'select id,name from user'; + const result = utils.splitSql(sql); + expect(result.length).toEqual(1); + }); + test('split multiple sql', () => { + const sql = `-- a ; + select * from a; + /* + xxx + xxx + */ + select user from b`; + const result = utils.splitSql(sql); + expect(result.length).toEqual(2); + }); + test('lexer', () => { + const sql = `-- a ; + select * from a; + /* + xxx + xxx + */ + select user from b;`; + const result = utils.lexer(sql); + expect(result.length).toEqual(4); + }); + test('cleanSql', () => { + const sql = `-- a ; + select * from a; + /* + xxx + xxx + */ + select user from b`; + const result = utils.cleanSql(sql); + expect(result.indexOf('xxx')).toEqual(-1); }); });