lava-oushudb-dt-sql-parser/src/utils/index.ts
2021-05-11 13:40:52 +08:00

184 lines
4.8 KiB
TypeScript

import { TokenType, Token, TokenReg } from './token';
/**
* 获取 注释 以及 分隔符 等词法信息
* @param {String} sql
*/
function lexer(input: string): Token[] {
// 记录当前字符的位置
let current = 0;
let line = 1;
// 最终的 TokenTypes 结果
const tokens: Token[] = [];
/**
* 提取 TokenType
*/
// eslint-disable-next-line
const extract = (currentChar: string, validator: RegExp, TokenType: TokenType): Token => {
let value = '';
const start = current;
while (validator.test(currentChar)) {
value += currentChar;
currentChar = input[++current];
}
return {
type: TokenType,
start: start,
end: current,
lineNumber: line,
value: value,
};
};
/**
* 过滤(提取) 引号中的内容
*/
// eslint-disable-next-line
const matchQuotation = (currentChar: string, validator: RegExp, TokenType: TokenType) => {
do {
if (currentChar === '\n') {
line++;
}
currentChar = input[++current];
} while (!validator.test(currentChar));
++current;
};
while (current < input.length) {
let char = input[current];
// 按顺序处理 换行符 反引号 单引号 双引号 注释 分号
// 引号内 可能包含注释包含的符号以及分号 所以优先处理引号里面的内容 去除干扰信息
if (char === '\n') {
line++;
current++;
continue;
}
if (TokenReg.BackQuotation.test(char)) {
// eslint-disable-next-line
matchQuotation(char, TokenReg.BackQuotation, TokenType.BackQuotation);
continue;
}
if (TokenReg.SingleQuotation.test(char)) {
// eslint-disable-next-line
matchQuotation(char, TokenReg.SingleQuotation, TokenType.SingleQuotation);
continue;
}
if (TokenReg.DoubleQuotation.test(char)) {
// eslint-disable-next-line
matchQuotation(char, TokenReg.DoubleQuotation, TokenType.DoubleQuotation);
continue;
}
// 处理单行注释,以--开始,\n 结束
if (char === '-' && input[current + 1] === '-') {
let value = '';
const start = current;
while (char !== '\n' && current < input.length) {
value += char;
char = input[++current];
}
tokens.push({
type: TokenType.Comment,
value,
start: start,
lineNumber: line,
end: current,
});
continue;
}
// 处理多行注释,以 /* 开始, */结束
if (char === '/' && input[current + 1] === '*') {
let value = '';
const start = current;
const startLine = line;
while (!(char === '/' && input[current - 1] === '*')) {
if (char === '\n') {
line++;
}
value += char;
char = input[++current];
}
value += char;
++current;
tokens.push({
type: TokenType.Comment,
value,
start: start,
lineNumber: startLine,
end: current,
});
continue;
}
// 处理结束符 ;
if (TokenReg.StatementTerminator.test(char)) {
const newToken = extract(
char,
TokenReg.StatementTerminator,
TokenType.StatementTerminator,
);
tokens.push(newToken);
continue;
}
current++;
}
return tokens;
}
/**
* 分割sql
* @param {String} sql
*/
function splitSql(sql: string) {
const tokens = lexer(sql);
const sqlArr = [];
let startIndex = 0;
tokens.forEach((ele: Token) => {
if (ele.type === TokenType.StatementTerminator) {
sqlArr.push(sql.slice(startIndex, ele.end));
startIndex = ele.end;
}
});
if (startIndex < sql.length) {
sqlArr.push(sql.slice(startIndex));
}
return sqlArr;
}
/**
* 清除注释和前后空格
* @param {String} sql
*/
function cleanSql(sql: string) {
sql = sql.trim(); // 删除前后空格
const tokens = lexer(sql);
let resultSql = '';
let startIndex = 0;
tokens.forEach((ele: Token) => {
if (ele.type === TokenType.Comment) {
resultSql += sql.slice(startIndex, ele.start);
startIndex = ele.end + 1;
}
});
resultSql += sql.slice(startIndex);
return resultSql;
}
export {
cleanSql,
splitSql,
lexer,
};