From 89bd5b172d2472bf9edde7a5f50d60f0e0ea95b6 Mon Sep 17 00:00:00 2001 From: xigua Date: Tue, 15 Dec 2020 15:04:46 +0800 Subject: [PATCH 01/13] update README.md, add README-zh_CN.md --- README-zh_CN.md | 216 +++++++++++++++++++++++++++++++++++++++++ README.md | 249 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 392 insertions(+), 73 deletions(-) create mode 100644 README-zh_CN.md diff --git a/README-zh_CN.md b/README-zh_CN.md new file mode 100644 index 0000000..487c28a --- /dev/null +++ b/README-zh_CN.md @@ -0,0 +1,216 @@ +# dt-sql-parser + +[![NPM version][npm-image]][npm-url] + +[npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square +[npm-url]: https://www.npmjs.com/package/dt-sql-parser + +此项目是基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 语言解析器。主要用于大数据开发中,对各类 SQL 的解析。目前支持的 SQL: + +- MySQL +- Flink SQL +- Spark SQL +- Hive SQL +- PL/SQL + +每种 SQL 都提供了对应基础类、Visitor 类和 Listener 类,包含了生成 token、生成 AST、语法校验、visitor 和 listener 模式遍历 AST 指定节点等功能。 + +此外,为了方便解析,还提供了几个辅助方法可以在解析前对 SQL 进行格式处理。主要作用是清除 SQL 语句中的 '--' 和 '/**/' 两种类型的注释,以及拆分大段 SQL。 + +提示:项目中的 grammar 文件也可以通过 [ANTLR4](https://github.com/antlr/antlr4) 编译成其他语言 + +[English](./README.md) | 简体中文 + +## 安装 + +``` +// use npm +npm i dt-sql-parser --save + +// use yarn +yarn add dt-sql-parser +``` + +## 示例 + +### Clean + +清除注释和前后空格 + +```javascript +import { cleanSql } from 'dt-sql-parser'; + +const sql = `-- comment comment +select id,name from user1; ` +const cleanedSql = cleanSql(sql) +console.log(cleanedSql) + +/* +select id,name from user1; +*/ +``` + +### Split + +分割 sql + +```javascript +import { splitSql } from 'dt-sql-parser'; + +const sql = `select id,name from user1; +select id,name from user2;` +const sqlList = splitSql(sql) +console.log(sqlList) + +/* +["select id,name from user1;", "\nselect id,name from user2;"] +*/ +``` + +### Tokens + +对 sql 语句进行词法分析,生成 token + +```javascript +import { GenericSQL } from 'dt-sql-parser'; + +const parser = new GenericSQL() +const sql = 'select id,name,sex from user1;' +const tokens = parser.getAllTokens(sql) +console.log(tokens) +/* +[ + { + channel: 0 + column: 0 + line: 1 + source: [SqlLexer, InputStream] + start: 0 + stop: 5 + tokenIndex: -1 + type: 137 + _text: null + text: "SELECT" + }, + ... +] +*/ +``` + +### Syntax validation + +validate 方法对 sql 语句的语法正确性进行校验,返回一个由 error 组成的数组 + +```javascript +import { GenericSQL } from 'dt-sql-parser'; + +const validate = (sql) => { + const parser = new GenericSQL() + const errors = parser.validate(sql) + console.log(errors) +} +``` +语法正确的 sql: +```javascript +const correctSql = 'select id,name from user1;' +validate(correctSql) +/* +[] +*/ +``` +包含错误语法的 sql: +```javascript +const incorrectSql = 'selec id,name from user1;' +validate(incorrectSql) +/* +[ + { + endCol: 5, + endLine: 1, + startCol: 0, + startLine: 1, + message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" + } +] +*/ +``` + +### Visitor + +使用 visitor 模式访问 AST 中的指定节点 + +```javascript +import { GenericSQL, SqlParserVisitor } from 'dt-sql-parser'; + +const parser = new GenericSQL() +const sql = `select id,name from user1;` +// parseTree +const tree = parser.parse(sql) +class MyVisitor extends SqlParserVisitor { + // 重写 visitTableName 方法 + visitTableName(ctx) { + let tableName = ctx.getText().toLowerCase() + console.log('TableName', tableName) + } + // 重写 visitSelectElements 方法 + visitSelectElements(ctx) { + let selectElements = ctx.getText().toLowerCase() + console.log('SelectElements', selectElements) + } +} +const visitor = new MyVisitor() +visitor.visit(tree) + +/* +SelectElements id,name +TableName user1 +*/ + +``` +提示:使用 Visitor 模式时,节点的方法名称可以在对应 SQL 目录下的 Visitor 文件中查找 + +### Listener + +listener 模式,利用 [ANTLR4](https://github.com/antlr/antlr4) 提供的 ParseTreeWalker 对象遍历 AST,进入各个节点时调用对应的方法。 + +```javascript +import { GenericSQL, SqlParserListener } from 'dt-sql-parser'; + +const parser = new GenericSQL(); +const sql = 'select id,name from user1;' +// parseTree +const tree = parser.parse(sql) +class MyListener extends SqlParserListener { + enterTableName(ctx) { + let tableName = ctx.getText().toLowerCase() + console.log('TableName', tableName) + } + enterSelectElements(ctx) { + let selectElements = ctx.getText().toLowerCase() + log('SelectElements', selectElements) + } +} +const listenTableName = new MyListener(); +parser.listen(listenTableName, tree); + +/* +SelectElements id,name +TableName user1 +*/ + +``` + +提示:使用 Listener 模式时,节点的方法名称可以在对应 SQL 目录下的 Listener 文件中查找 + +### 其他 + +- parserTreeToString 将 SQL 解析成 AST,再转成 string 形式 + +## 路线图 + +- Auto-complete +- Impala SQL + +## 许可证 + +[MIT](./LICENSE) \ No newline at end of file diff --git a/README.md b/README.md index 9212497..487c28a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -> 如果你只想单纯的解析(SQL/SparkSQL),请使用 [cuopyue](https://github.com/HSunboy/cuopyue) - # dt-sql-parser [![NPM version][npm-image]][npm-url] @@ -7,107 +5,212 @@ [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -本项目用于处理SQL,目前含有功能 +此项目是基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 语言解析器。主要用于大数据开发中,对各类 SQL 的解析。目前支持的 SQL: -1. 校验SQL,hive SQL,impala SQL,flinkSQL 等语法,并给予错误信息与建议提示 -2. SQL分割,根据`;`将sql分割为数组 -3. 去除SQL中的的注释(目前支持`--`,`/**/`类型注释) +- MySQL +- Flink SQL +- Spark SQL +- Hive SQL +- PL/SQL -## 用法 +每种 SQL 都提供了对应基础类、Visitor 类和 Listener 类,包含了生成 token、生成 AST、语法校验、visitor 和 listener 模式遍历 AST 指定节点等功能。 -### 过滤注释 / SQL分割 +此外,为了方便解析,还提供了几个辅助方法可以在解析前对 SQL 进行格式处理。主要作用是清除 SQL 语句中的 '--' 和 '/**/' 两种类型的注释,以及拆分大段 SQL。 -``` javascript -const dtFilter=require("dt-sql-parser").filter; -const sql=` -/*sttttttttart*/create table /*hhhhhhhh -hhhhhh -aaaaaa*/ sql_task_comment_test(id int comment 'id') comment 'sql test'; - --eeeeeeeend -` -console.log(dtFilter.filterComments(sql))//过滤注释 -console.log(dtFilter.splitSql(sql));//分割sql +提示:项目中的 grammar 文件也可以通过 [ANTLR4](https://github.com/antlr/antlr4) 编译成其他语言 + +[English](./README.md) | 简体中文 + +## 安装 + +``` +// use npm +npm i dt-sql-parser --save + +// use yarn +yarn add dt-sql-parser ``` -### 校验hive sql语法 +## 示例 -``` javascript -const dtSqlParser=require("dt-sql-parser").parser; +### Clean -console.log(dtSqlParser.parseSyntax("selet * form",'hive')); +清除注释和前后空格 + +```javascript +import { cleanSql } from 'dt-sql-parser'; + +const sql = `-- comment comment +select id,name from user1; ` +const cleanedSql = cleanSql(sql) +console.log(cleanedSql) /* -{ - "text": "selet",//错误部分 - "token": "REGULAR_IDENTIFIER",//类型 - "line": 0, - "loc": {//错误位置信息 - "first_line": 1, - "last_line": 1, - "first_column": 0, - "last_column": 5 - }, - "ruleId": "0", - "expected": [//建议输入内容 - { - "text": "select",//建议内容 - "distance": 1//建议优先级 - }, - { - "text": "delete", - "distance": 2 - } - ], - "recoverable": false, - "incompleteStatement": true -} +select id,name from user1; */ ``` -## API +### Split -### filter +分割 sql -#### function filterComments(sql:string):string +```javascript +import { splitSql } from 'dt-sql-parser'; -过滤 `sql` 注释(支持`/*`和`--`) +const sql = `select id,name from user1; +select id,name from user2;` +const sqlList = splitSql(sql) +console.log(sqlList) -#### function splitSql(sql:string):Array +/* +["select id,name from user1;", "\nselect id,name from user2;"] +*/ +``` -自动去除注释,并且提取出各个 `sql` +### Tokens -### parser +对 sql 语句进行词法分析,生成 token -#### function parseSyntax(sql:string|Array, type?:string):Object|boolean +```javascript +import { GenericSQL } from 'dt-sql-parser'; -校验 `sql` 语法,如果没错误,则返回 `false`,否则返回错误详细信息 +const parser = new GenericSQL() +const sql = 'select id,name,sex from user1;' +const tokens = parser.getAllTokens(sql) +console.log(tokens) +/* +[ + { + channel: 0 + column: 0 + line: 1 + source: [SqlLexer, InputStream] + start: 0 + stop: 5 + tokenIndex: -1 + type: 137 + _text: null + text: "SELECT" + }, + ... +] +*/ +``` -可以提供一个含有两个字符串的数组,代表被光标分割的两个 `sql片段` +### Syntax validation -#### function parserSql(sql:string|Array, type?:string):Object +validate 方法对 sql 语句的语法正确性进行校验,返回一个由 error 组成的数组 -解析 `sql` 语法,根据上下文提示补全字段与其它辅助信息 +```javascript +import { GenericSQL } from 'dt-sql-parser'; -可以提供一个含有两个字符串的数组,代表被光标分割的两个sql片段 +const validate = (sql) => { + const parser = new GenericSQL() + const errors = parser.validate(sql) + console.log(errors) +} +``` +语法正确的 sql: +```javascript +const correctSql = 'select id,name from user1;' +validate(correctSql) +/* +[] +*/ +``` +包含错误语法的 sql: +```javascript +const incorrectSql = 'selec id,name from user1;' +validate(incorrectSql) +/* +[ + { + endCol: 5, + endLine: 1, + startCol: 0, + startLine: 1, + message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" + } +] +*/ +``` -### flinksqlParser +### Visitor -#### function flinksqlParser (sql: sql): SyntaxError +使用 visitor 模式访问 AST 中的指定节点 -校验 `flinksql` 语法。 +```javascript +import { GenericSQL, SqlParserVisitor } from 'dt-sql-parser'; ->本项目文档不是很详细,也不准确(暂时没精力写),项目功能可以满足 hivesql,sql,impala,flinksql 的语法检查和提示功能。 -具体使用方式可以参照代码中的 ts 类型。 ----- +const parser = new GenericSQL() +const sql = `select id,name from user1;` +// parseTree +const tree = parser.parse(sql) +class MyVisitor extends SqlParserVisitor { + // 重写 visitTableName 方法 + visitTableName(ctx) { + let tableName = ctx.getText().toLowerCase() + console.log('TableName', tableName) + } + // 重写 visitSelectElements 方法 + visitSelectElements(ctx) { + let selectElements = ctx.getText().toLowerCase() + console.log('SelectElements', selectElements) + } +} +const visitor = new MyVisitor() +visitor.visit(tree) -hive,impala语法解析文件来自[Hue](https://github.com/cloudera/hue) +/* +SelectElements id,name +TableName user1 +*/ ----- +``` +提示:使用 Visitor 模式时,节点的方法名称可以在对应 SQL 目录下的 Visitor 文件中查找 -### ChangeLog +### Listener -- 1.1.8 添加转义字符支持 -- 1.1.9 添加函数的中括号语法支持[ split(nameList](0) ) -- 1.2.0 添加 ts,添加测试 -- 2.0.0 添加flinksql语法检查 -- 3.0.0 拆分hive,impala,集成最新 `HUE` 方案 +listener 模式,利用 [ANTLR4](https://github.com/antlr/antlr4) 提供的 ParseTreeWalker 对象遍历 AST,进入各个节点时调用对应的方法。 + +```javascript +import { GenericSQL, SqlParserListener } from 'dt-sql-parser'; + +const parser = new GenericSQL(); +const sql = 'select id,name from user1;' +// parseTree +const tree = parser.parse(sql) +class MyListener extends SqlParserListener { + enterTableName(ctx) { + let tableName = ctx.getText().toLowerCase() + console.log('TableName', tableName) + } + enterSelectElements(ctx) { + let selectElements = ctx.getText().toLowerCase() + log('SelectElements', selectElements) + } +} +const listenTableName = new MyListener(); +parser.listen(listenTableName, tree); + +/* +SelectElements id,name +TableName user1 +*/ + +``` + +提示:使用 Listener 模式时,节点的方法名称可以在对应 SQL 目录下的 Listener 文件中查找 + +### 其他 + +- parserTreeToString 将 SQL 解析成 AST,再转成 string 形式 + +## 路线图 + +- Auto-complete +- Impala SQL + +## 许可证 + +[MIT](./LICENSE) \ No newline at end of file From 6bfc3b279416455fc714eb48f52ef4ef171408d9 Mon Sep 17 00:00:00 2001 From: xigua Date: Tue, 15 Dec 2020 15:24:37 +0800 Subject: [PATCH 02/13] update docs --- README-zh_CN.md | 2 +- README.md | 44 ++++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index 487c28a..afe4dc5 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -204,7 +204,7 @@ TableName user1 ### 其他 -- parserTreeToString 将 SQL 解析成 AST,再转成 string 形式 +- parserTreeToString (将 SQL 解析成 AST,再转成 string 形式) ## 路线图 diff --git a/README.md b/README.md index 487c28a..02f5287 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -此项目是基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 语言解析器。主要用于大数据开发中,对各类 SQL 的解析。目前支持的 SQL: +dt-sql-parser is a SQL parser built on [ANTLR4](https://github.com/antlr/antlr4) .It's mainly used for analyzing all kinds of SQL in the development of big data. Supported SQL: - MySQL - Flink SQL @@ -13,15 +13,15 @@ - Hive SQL - PL/SQL -每种 SQL 都提供了对应基础类、Visitor 类和 Listener 类,包含了生成 token、生成 AST、语法校验、visitor 和 listener 模式遍历 AST 指定节点等功能。 +It provides the basic class, Visitor class, and Listener class. These class including the ability to generate tokens, generate parse tree, syntax validation, and Visitor & Listener patterns to traverse the AST. -此外,为了方便解析,还提供了几个辅助方法可以在解析前对 SQL 进行格式处理。主要作用是清除 SQL 语句中的 '--' 和 '/**/' 两种类型的注释,以及拆分大段 SQL。 +In addition, several helper methods are provided to format the SQL before parsing. The main effect is to clear the '--' and '/**/' types of comments in SQL statements, and to split large chunks of SQL -提示:项目中的 grammar 文件也可以通过 [ANTLR4](https://github.com/antlr/antlr4) 编译成其他语言 +tips: The Grammar file can also be compiled into other languages with [ANTLR4](https://github.com/antlr/antlr4) . [English](./README.md) | 简体中文 -## 安装 +## Installation ``` // use npm @@ -31,11 +31,11 @@ npm i dt-sql-parser --save yarn add dt-sql-parser ``` -## 示例 +## Usage ### Clean -清除注释和前后空格 +clear comments and Spaces before and after ```javascript import { cleanSql } from 'dt-sql-parser'; @@ -52,7 +52,7 @@ select id,name from user1; ### Split -分割 sql +split sql ```javascript import { splitSql } from 'dt-sql-parser'; @@ -69,7 +69,7 @@ console.log(sqlList) ### Tokens -对 sql 语句进行词法分析,生成 token +lexical analysis, generate token ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -99,7 +99,7 @@ console.log(tokens) ### Syntax validation -validate 方法对 sql 语句的语法正确性进行校验,返回一个由 error 组成的数组 +verifies the syntax correctness of the SQL statement and returns an array of errors ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -110,7 +110,7 @@ const validate = (sql) => { console.log(errors) } ``` -语法正确的 sql: +correct sql: ```javascript const correctSql = 'select id,name from user1;' validate(correctSql) @@ -118,7 +118,7 @@ validate(correctSql) [] */ ``` -包含错误语法的 sql: +incorrect sql: ```javascript const incorrectSql = 'selec id,name from user1;' validate(incorrectSql) @@ -137,7 +137,7 @@ validate(incorrectSql) ### Visitor -使用 visitor 模式访问 AST 中的指定节点 +access the specified node in the AST by Visitor pattern ```javascript import { GenericSQL, SqlParserVisitor } from 'dt-sql-parser'; @@ -147,12 +147,12 @@ const sql = `select id,name from user1;` // parseTree const tree = parser.parse(sql) class MyVisitor extends SqlParserVisitor { - // 重写 visitTableName 方法 + // overwrite visitTableName visitTableName(ctx) { let tableName = ctx.getText().toLowerCase() console.log('TableName', tableName) } - // 重写 visitSelectElements 方法 + // overwrite visitSelectElements visitSelectElements(ctx) { let selectElements = ctx.getText().toLowerCase() console.log('SelectElements', selectElements) @@ -167,11 +167,11 @@ TableName user1 */ ``` -提示:使用 Visitor 模式时,节点的方法名称可以在对应 SQL 目录下的 Visitor 文件中查找 +tips: The node's method name can be found in the Visitor file under the corresponding SQL directory ### Listener -listener 模式,利用 [ANTLR4](https://github.com/antlr/antlr4) 提供的 ParseTreeWalker 对象遍历 AST,进入各个节点时调用对应的方法。 +access the specified node in the AST by Listener pattern ```javascript import { GenericSQL, SqlParserListener } from 'dt-sql-parser'; @@ -200,17 +200,17 @@ TableName user1 ``` -提示:使用 Listener 模式时,节点的方法名称可以在对应 SQL 目录下的 Listener 文件中查找 +tips: The node's method name can be found in the Listener file under the corresponding SQL directory -### 其他 +### Other -- parserTreeToString 将 SQL 解析成 AST,再转成 string 形式 +- parserTreeToString (parse the SQL into AST and turn it into a String) -## 路线图 +## Roadmap - Auto-complete - Impala SQL -## 许可证 +## License [MIT](./LICENSE) \ No newline at end of file From 828808808e2a5c907234dfdb00b7cf4781d25637 Mon Sep 17 00:00:00 2001 From: xigua Date: Tue, 15 Dec 2020 15:29:41 +0800 Subject: [PATCH 03/13] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02f5287..8908030 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ In addition, several helper methods are provided to format the SQL before parsin tips: The Grammar file can also be compiled into other languages with [ANTLR4](https://github.com/antlr/antlr4) . -[English](./README.md) | 简体中文 +English | [简体中文](./README-zh_CN.md) ## Installation From 096813cdee861e4515c637d8d49d2770eeb55d65 Mon Sep 17 00:00:00 2001 From: xigua Date: Thu, 17 Dec 2020 10:12:05 +0800 Subject: [PATCH 04/13] update example code format --- README-zh_CN.md | 8 ++++---- README.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index afe4dc5..1708bfb 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -80,8 +80,8 @@ const tokens = parser.getAllTokens(sql) console.log(tokens) /* [ - { - channel: 0 + { + channel: 0 column: 0 line: 1 source: [SqlLexer, InputStream] @@ -91,8 +91,8 @@ console.log(tokens) type: 137 _text: null text: "SELECT" - }, - ... + }, + ... ] */ ``` diff --git a/README.md b/README.md index 8908030..bf60df8 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,8 @@ const tokens = parser.getAllTokens(sql) console.log(tokens) /* [ - { - channel: 0 + { + channel: 0 column: 0 line: 1 source: [SqlLexer, InputStream] @@ -91,8 +91,8 @@ console.log(tokens) type: 137 _text: null text: "SELECT" - }, - ... + }, + ... ] */ ``` From 3aa21f51c08e525efd6ccd26b27bc18a96358848 Mon Sep 17 00:00:00 2001 From: xigua Date: Thu, 17 Dec 2020 11:17:14 +0800 Subject: [PATCH 05/13] update docs --- README-zh_CN.md | 18 ++++++++++-------- README.md | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index 1708bfb..0d47677 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -5,13 +5,9 @@ [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -此项目是基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 语言解析器。主要用于大数据开发中,对各类 SQL 的解析。目前支持的 SQL: +[English](./README.md) | 简体中文 -- MySQL -- Flink SQL -- Spark SQL -- Hive SQL -- PL/SQL +dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 解析器集合。主要用于大数据开发中,对各类 SQL 的解析。 每种 SQL 都提供了对应基础类、Visitor 类和 Listener 类,包含了生成 token、生成 AST、语法校验、visitor 和 listener 模式遍历 AST 指定节点等功能。 @@ -19,7 +15,13 @@ 提示:项目中的 grammar 文件也可以通过 [ANTLR4](https://github.com/antlr/antlr4) 编译成其他语言 -[English](./README.md) | 简体中文 +目前支持的 SQL: + +- MySQL +- Flink SQL +- Spark SQL +- Hive SQL +- PL/SQL ## 安装 @@ -209,7 +211,7 @@ TableName user1 ## 路线图 - Auto-complete -- Impala SQL +- Format code ## 许可证 diff --git a/README.md b/README.md index bf60df8..4bc3339 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,9 @@ [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -dt-sql-parser is a SQL parser built on [ANTLR4](https://github.com/antlr/antlr4) .It's mainly used for analyzing all kinds of SQL in the development of big data. Supported SQL: +English | [简体中文](./README-zh_CN.md) -- MySQL -- Flink SQL -- Spark SQL -- Hive SQL -- PL/SQL +dt-sql-parser is a collection of SQL parsers developed based on [ANTLR4](https://github.com/antlr/antlr4) .It's mainly used for parsing all kinds of SQL in the development of big data. It provides the basic class, Visitor class, and Listener class. These class including the ability to generate tokens, generate parse tree, syntax validation, and Visitor & Listener patterns to traverse the AST. @@ -19,7 +15,13 @@ In addition, several helper methods are provided to format the SQL before parsin tips: The Grammar file can also be compiled into other languages with [ANTLR4](https://github.com/antlr/antlr4) . -English | [简体中文](./README-zh_CN.md) +Supported SQL: + +- MySQL +- Flink SQL +- Spark SQL +- Hive SQL +- PL/SQL ## Installation @@ -209,7 +211,7 @@ tips: The node's method name can be found in the Listener file under the corresp ## Roadmap - Auto-complete -- Impala SQL +- Format code ## License From fa5fed2c2aa8eddf1080078d48efbcac38cf396c Mon Sep 17 00:00:00 2001 From: xiaowei Date: Thu, 17 Dec 2020 16:46:42 +0800 Subject: [PATCH 06/13] docs: update README --- NeREADME.md | 33 ---------- README-zh_CN.md | 162 +++++++++++++++++++++++++----------------------- README.md | 157 +++++++++++++++++++++++++--------------------- 3 files changed, 172 insertions(+), 180 deletions(-) delete mode 100644 NeREADME.md diff --git a/NeREADME.md b/NeREADME.md deleted file mode 100644 index 0492d02..0000000 --- a/NeREADME.md +++ /dev/null @@ -1,33 +0,0 @@ -# dt-sql-parser - -[![NPM version][npm-image]][npm-url] - -[npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square -[npm-url]: https://www.npmjs.com/package/dt-sql-parser - -## Installation - -## Usage - -### Basic - -### Syntax validation - -### Visitor - -### Listener - -## Example - -## Roadmap - -- Unify parser generate to Antlr4 -- Generic SQL -- Flink SQL -- Libra SQL -- TiDB - MySQL Compatible Syntax - -## Contributing - -## License diff --git a/README-zh_CN.md b/README-zh_CN.md index 0d47677..c883873 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -7,15 +7,9 @@ [English](./README.md) | 简体中文 -dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的 SQL 解析器集合。主要用于大数据开发中,对各类 SQL 的解析。 +dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的, 针对大数据领域的 `SQL Parser` 项目。通过[ANTLR4](https://github.com/antlr/antlr4) 默认生成的 Parser、Visitor 和 Listener 对象,我们可以轻松的做到对 SQL 语句的`语法检查`(Syntax Validation)、`词法分析`(Tokenizer)、 `遍历 AST` 节点等功能。此外,还提供了几个辅助方法, 例如 SQL 切分(Split)、过滤 SQL 语句中的 `--` 和 `/**/` 等类型的注释。 -每种 SQL 都提供了对应基础类、Visitor 类和 Listener 类,包含了生成 token、生成 AST、语法校验、visitor 和 listener 模式遍历 AST 指定节点等功能。 - -此外,为了方便解析,还提供了几个辅助方法可以在解析前对 SQL 进行格式处理。主要作用是清除 SQL 语句中的 '--' 和 '/**/' 两种类型的注释,以及拆分大段 SQL。 - -提示:项目中的 grammar 文件也可以通过 [ANTLR4](https://github.com/antlr/antlr4) 编译成其他语言 - -目前支持的 SQL: +已支持的 SQL 类型: - MySQL - Flink SQL @@ -23,9 +17,11 @@ dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发 - Hive SQL - PL/SQL +> 提示:当前的 Parser 是 `Javascript` 语言版本,如果有必要,可以尝试编译 Grammar 文件到其他目标语言 + ## 安装 -``` +```bash // use npm npm i dt-sql-parser --save @@ -33,45 +29,60 @@ npm i dt-sql-parser --save yarn add dt-sql-parser ``` -## 示例 +## 使用 -### Clean +### 语法校验(Syntax Validation) -清除注释和前后空格 +首先需要声明想对应的 Parser 对象,不同的 SQL 类型需要引入不同的 Parser 对象处理,例如如果是 +真的 `Flink SQL`, 则需要单独引入 `FlinkSQL` 对象, 这里我们使用 `GenericSQL` 作为示例: ```javascript -import { cleanSql } from 'dt-sql-parser'; +import { GenericSQL } from 'dt-sql-parser'; -const sql = `-- comment comment -select id,name from user1; ` -const cleanedSql = cleanSql(sql) -console.log(cleanedSql) +const parser = new GenericSQL(); +const correctSql = 'select id,name from user1;'; +const errors = parser.validate(correctSql); +console.log(errors); +``` + +输出: + +```javascript /* -select id,name from user1; +[] */ ``` -### Split - -分割 sql +校验失败示例: ```javascript -import { splitSql } from 'dt-sql-parser'; +const incorrectSql = 'selec id,name from user1;' +const errors = parser.validate(incorrectSql); +console.log(errors); +``` -const sql = `select id,name from user1; -select id,name from user2;` -const sqlList = splitSql(sql) -console.log(sqlList) +输出: +```javascript /* -["select id,name from user1;", "\nselect id,name from user2;"] +[ + { + endCol: 5, + endLine: 1, + startCol: 0, + startLine: 1, + message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" + } +] */ ``` -### Tokens +先实例化 Parser 对象,然后使用 `validate` 方法对 SQL 语句进行校验,如果校验失败,则返回一个包含 `Error` 信息的数组。 -对 sql 语句进行词法分析,生成 token +### 词法分析(Tokenizer) + +必要场景下,可单独对 SQL 语句进行词法分析,获取所有的 Tokens 对象: ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -99,47 +110,9 @@ console.log(tokens) */ ``` -### Syntax validation +### 访问者模式(Visitor) -validate 方法对 sql 语句的语法正确性进行校验,返回一个由 error 组成的数组 - -```javascript -import { GenericSQL } from 'dt-sql-parser'; - -const validate = (sql) => { - const parser = new GenericSQL() - const errors = parser.validate(sql) - console.log(errors) -} -``` -语法正确的 sql: -```javascript -const correctSql = 'select id,name from user1;' -validate(correctSql) -/* -[] -*/ -``` -包含错误语法的 sql: -```javascript -const incorrectSql = 'selec id,name from user1;' -validate(incorrectSql) -/* -[ - { - endCol: 5, - endLine: 1, - startCol: 0, - startLine: 1, - message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" - } -] -*/ -``` - -### Visitor - -使用 visitor 模式访问 AST 中的指定节点 +使用 Visitor 模式访问 AST 中的指定节点 ```javascript import { GenericSQL, SqlParserVisitor } from 'dt-sql-parser'; @@ -169,11 +142,12 @@ TableName user1 */ ``` -提示:使用 Visitor 模式时,节点的方法名称可以在对应 SQL 目录下的 Visitor 文件中查找 -### Listener +> 提示:使用 Visitor 模式时,节点的方法名称可以在对应 SQL 目录下的 Visitor 文件中查找 -listener 模式,利用 [ANTLR4](https://github.com/antlr/antlr4) 提供的 ParseTreeWalker 对象遍历 AST,进入各个节点时调用对应的方法。 +### 监听器(Listener) + +Listener 模式,利用 [ANTLR4](https://github.com/antlr/antlr4) 提供的 ParseTreeWalker 对象遍历 AST,进入各个节点时调用对应的方法。 ```javascript import { GenericSQL, SqlParserListener } from 'dt-sql-parser'; @@ -202,11 +176,47 @@ TableName user1 ``` -提示:使用 Listener 模式时,节点的方法名称可以在对应 SQL 目录下的 Listener 文件中查找 +> 提示:使用 Listener 模式时,节点的方法名称可以在对应 SQL 目录下的 Listener 文件中查找 -### 其他 +### 清理注释内容 -- parserTreeToString (将 SQL 解析成 AST,再转成 string 形式) +清除注释和前后空格 + +```javascript +import { cleanSql } from 'dt-sql-parser'; + +const sql = `-- comment comment +select id,name from user1; ` +const cleanedSql = cleanSql(sql) +console.log(cleanedSql) + +/* +select id,name from user1; +*/ +``` + +### 切割 SQL (Split) + +SQL 太大的情况下,我们可以先将SQL语句按 `;` 切割,然后逐句处理。 + +```javascript +import { splitSql } from 'dt-sql-parser'; + +const sql = `select id,name from user1; +select id,name from user2;` +const sqlList = splitSql(sql) +console.log(sqlList) + +/* +["select id,name from user1;", "\nselect id,name from user2;"] +*/ +``` + +### 其他 API + +- parserTreeToString (input: string) + +将 SQL 解析成 `List-like` 风格的树形字符串, 一般用于测试 ## 路线图 @@ -215,4 +225,4 @@ TableName user1 ## 许可证 -[MIT](./LICENSE) \ No newline at end of file +[MIT](./LICENSE) diff --git a/README.md b/README.md index 4bc3339..b801aa1 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,16 @@ [![NPM version][npm-image]][npm-url] +English | [简体中文](./README-zh_CN.md) + [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -English | [简体中文](./README-zh_CN.md) +dt-sql-parser is a `SQL Parser` project built with [ANTLR4](https://github.com/antlr/antlr4), and it's mainly for the `BigData` domain. The [ANTLR4](https://github.com/antlr/antlr4) generated the basic Parser, Visitor, and Listener, so it's easy to complete `validate`, `tokenize`, `traverse` the AST, and so on features. -dt-sql-parser is a collection of SQL parsers developed based on [ANTLR4](https://github.com/antlr/antlr4) .It's mainly used for parsing all kinds of SQL in the development of big data. +Besides, it' provides some helper methods, like `split` SQL, and filter the `--` and `/**/` types of comments in SQL. -It provides the basic class, Visitor class, and Listener class. These class including the ability to generate tokens, generate parse tree, syntax validation, and Visitor & Listener patterns to traverse the AST. - -In addition, several helper methods are provided to format the SQL before parsing. The main effect is to clear the '--' and '/**/' types of comments in SQL statements, and to split large chunks of SQL - -tips: The Grammar file can also be compiled into other languages with [ANTLR4](https://github.com/antlr/antlr4) . +> Tips: This project is the default for Javascript language, also you can try to compile it to other languages if you need. Supported SQL: @@ -25,7 +23,7 @@ Supported SQL: ## Installation -``` +```bash // use npm npm i dt-sql-parser --save @@ -35,43 +33,61 @@ yarn add dt-sql-parser ## Usage -### Clean +### Syntax Validation -clear comments and Spaces before and after +First, we need to import the `Parser` object from `dt-sql-parser`, the different language needs +different Parser, so if you need to handle the `Flink SQL`, you can import the `FlinkSQL Parser`. + +The below is a `GenericSQL Parser` example: ```javascript -import { cleanSql } from 'dt-sql-parser'; +import { GenericSQL } from 'dt-sql-parser'; -const sql = `-- comment comment -select id,name from user1; ` -const cleanedSql = cleanSql(sql) -console.log(cleanedSql) +const parser = new GenericSQL(); +const correctSql = 'select id,name from user1;'; +const errors = parser.validate(correctSql); +console.log(errors); +``` + +output: + +```javascript /* -select id,name from user1; +[] */ ``` -### Split - -split sql +validate failed: ```javascript -import { splitSql } from 'dt-sql-parser'; +const incorrectSql = 'selec id,name from user1;' +const errors = parser.validate(incorrectSql); +console.log(errors); +``` -const sql = `select id,name from user1; -select id,name from user2;` -const sqlList = splitSql(sql) -console.log(sqlList) +output: +```javascript /* -["select id,name from user1;", "\nselect id,name from user2;"] +[ + { + endCol: 5, + endLine: 1, + startCol: 0, + startLine: 1, + message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" + } +] */ ``` -### Tokens +We instanced a Parser object, and use the `validate` method to check the SQL syntax, if failed +returns an array object includes `error` message. -lexical analysis, generate token +### Tokenizer + +You can also the all `tokens` by the Parser: ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -99,47 +115,9 @@ console.log(tokens) */ ``` -### Syntax validation - -verifies the syntax correctness of the SQL statement and returns an array of errors - -```javascript -import { GenericSQL } from 'dt-sql-parser'; - -const validate = (sql) => { - const parser = new GenericSQL() - const errors = parser.validate(sql) - console.log(errors) -} -``` -correct sql: -```javascript -const correctSql = 'select id,name from user1;' -validate(correctSql) -/* -[] -*/ -``` -incorrect sql: -```javascript -const incorrectSql = 'selec id,name from user1;' -validate(incorrectSql) -/* -[ - { - endCol: 5, - endLine: 1, - startCol: 0, - startLine: 1, - message: "mismatched input 'SELEC' expecting {, 'ALTER', 'ANALYZE', 'CALL', 'CHANGE', 'CHECK', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DROP', 'EXPLAIN', 'GET', 'GRANT', 'INSERT', 'KILL', 'LOAD', 'LOCK', 'OPTIMIZE', 'PURGE', 'RELEASE', 'RENAME', 'REPLACE', 'RESIGNAL', 'REVOKE', 'SELECT', 'SET', 'SHOW', 'SIGNAL', 'UNLOCK', 'UPDATE', 'USE', 'BEGIN', 'BINLOG', 'CACHE', 'CHECKSUM', 'COMMIT', 'DEALLOCATE', 'DO', 'FLUSH', 'HANDLER', 'HELP', 'INSTALL', 'PREPARE', 'REPAIR', 'RESET', 'ROLLBACK', 'SAVEPOINT', 'START', 'STOP', 'TRUNCATE', 'UNINSTALL', 'XA', 'EXECUTE', 'SHUTDOWN', '--', '(', ';'}" - } -] -*/ -``` - ### Visitor -access the specified node in the AST by Visitor pattern +Traverse the tree node by the Visitor: ```javascript import { GenericSQL, SqlParserVisitor } from 'dt-sql-parser'; @@ -169,7 +147,8 @@ TableName user1 */ ``` -tips: The node's method name can be found in the Visitor file under the corresponding SQL directory + +> Tips: The node's method name can be found in the Visitor file under the corresponding SQL directory ### Listener @@ -202,11 +181,47 @@ TableName user1 ``` -tips: The node's method name can be found in the Listener file under the corresponding SQL directory +> Tips: The node's method name can be found in the Listener file under the corresponding SQL directory -### Other +### Clean -- parserTreeToString (parse the SQL into AST and turn it into a String) +Clear the `comments` and `spaces` before and after + +```javascript +import { cleanSql } from 'dt-sql-parser'; + +const sql = `-- comment comment +select id,name from user1; ` +const cleanedSql = cleanSql(sql) +console.log(cleanedSql) + +/* +select id,name from user1; +*/ +``` + +### Split SQL + +When the SQL text is very big, you can think about to split it by `;` , and handle each line. + +```javascript +import { splitSql } from 'dt-sql-parser'; + +const sql = `select id,name from user1; +select id,name from user2;` +const sqlList = splitSql(sql) +console.log(sqlList) + +/* +["select id,name from user1;", "\nselect id,name from user2;"] +*/ +``` + +### Other API + +- parserTreeToString(input: string) + +Parse the input and convert the AST to a `List-like` tree string. ## Roadmap @@ -215,4 +230,4 @@ tips: The node's method name can be found in the Listener file under the corresp ## License -[MIT](./LICENSE) \ No newline at end of file +[MIT](./LICENSE) From 4586c7d63fe00c88c2301903d3b911f1edc1d153 Mon Sep 17 00:00:00 2001 From: xiaowei Date: Thu, 17 Dec 2020 16:51:51 +0800 Subject: [PATCH 07/13] docs: update incorrect words --- README-zh_CN.md | 4 ++-- README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index c883873..84388d1 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -33,8 +33,8 @@ yarn add dt-sql-parser ### 语法校验(Syntax Validation) -首先需要声明想对应的 Parser 对象,不同的 SQL 类型需要引入不同的 Parser 对象处理,例如如果是 -真的 `Flink SQL`, 则需要单独引入 `FlinkSQL` 对象, 这里我们使用 `GenericSQL` 作为示例: +首先需要声明相应的 Parser 对象,不同的 SQL 类型需要引入不同的 Parser 对象处理,例如如果是 +针对 `Flink SQL`,则需要单独引入 `FlinkSQL` Parser,这里我们使用 `GenericSQL` 作为示例: ```javascript import { GenericSQL } from 'dt-sql-parser'; diff --git a/README.md b/README.md index b801aa1..525f7b8 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ English | [简体中文](./README-zh_CN.md) [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -dt-sql-parser is a `SQL Parser` project built with [ANTLR4](https://github.com/antlr/antlr4), and it's mainly for the `BigData` domain. The [ANTLR4](https://github.com/antlr/antlr4) generated the basic Parser, Visitor, and Listener, so it's easy to complete `validate`, `tokenize`, `traverse` the AST, and so on features. +dt-sql-parser is a `SQL Parser` project built with [ANTLR4](https://github.com/antlr/antlr4), and it's mainly for the `BigData` domain. The [ANTLR4](https://github.com/antlr/antlr4) generated the basic Parser, Visitor, and Listener, so it's easy to complete the `syntax validation`, `tokenizer`, `traverse` the AST, and so on features. Besides, it' provides some helper methods, like `split` SQL, and filter the `--` and `/**/` types of comments in SQL. From b1193785aa95bd4042cfb65ecc0fd2b16f5c9d46 Mon Sep 17 00:00:00 2001 From: xiaowei Date: Thu, 17 Dec 2020 17:15:46 +0800 Subject: [PATCH 08/13] docs: prettier md format --- README-zh_CN.md | 6 +++--- README.md | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index 84388d1..c30eee3 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -7,7 +7,7 @@ [English](./README.md) | 简体中文 -dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的, 针对大数据领域的 `SQL Parser` 项目。通过[ANTLR4](https://github.com/antlr/antlr4) 默认生成的 Parser、Visitor 和 Listener 对象,我们可以轻松的做到对 SQL 语句的`语法检查`(Syntax Validation)、`词法分析`(Tokenizer)、 `遍历 AST` 节点等功能。此外,还提供了几个辅助方法, 例如 SQL 切分(Split)、过滤 SQL 语句中的 `--` 和 `/**/` 等类型的注释。 +dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的, 针对大数据领域的 **SQL Parser** 项目。通过[ANTLR4](https://github.com/antlr/antlr4) 默认生成的 Parser、Visitor 和 Listener 对象,我们可以轻松的做到对 SQL 语句的**语法检查**(Syntax Validation)、**词法分析**(Tokenizer)、 **遍历 AST** 节点等功能。此外,还提供了几个辅助方法, 例如 SQL 切分(Split)、过滤 SQL 语句中的 `--` 和 `/**/` 等类型的注释。 已支持的 SQL 类型: @@ -34,7 +34,7 @@ yarn add dt-sql-parser ### 语法校验(Syntax Validation) 首先需要声明相应的 Parser 对象,不同的 SQL 类型需要引入不同的 Parser 对象处理,例如如果是 -针对 `Flink SQL`,则需要单独引入 `FlinkSQL` Parser,这里我们使用 `GenericSQL` 作为示例: +针对 **Flink SQL**,则需要单独引入 **FlinkSQL** Parser,这里我们使用 **GenericSQL** 作为示例: ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -78,7 +78,7 @@ console.log(errors); */ ``` -先实例化 Parser 对象,然后使用 `validate` 方法对 SQL 语句进行校验,如果校验失败,则返回一个包含 `Error` 信息的数组。 +先实例化 Parser 对象,然后使用 `validate` 方法对 SQL 语句进行校验,如果校验失败,则返回一个包含 `error` 信息的数组。 ### 词法分析(Tokenizer) diff --git a/README.md b/README.md index 525f7b8..ac7ea7f 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ English | [简体中文](./README-zh_CN.md) [npm-image]: https://img.shields.io/npm/v/dt-sql-parser.svg?style=flat-square [npm-url]: https://www.npmjs.com/package/dt-sql-parser -dt-sql-parser is a `SQL Parser` project built with [ANTLR4](https://github.com/antlr/antlr4), and it's mainly for the `BigData` domain. The [ANTLR4](https://github.com/antlr/antlr4) generated the basic Parser, Visitor, and Listener, so it's easy to complete the `syntax validation`, `tokenizer`, `traverse` the AST, and so on features. +dt-sql-parser is a **SQL Parser** project built with [ANTLR4](https://github.com/antlr/antlr4), and it's mainly for the **BigData** domain. The [ANTLR4](https://github.com/antlr/antlr4) generated the basic Parser, Visitor, and Listener, so it's easy to complete the **syntax validation**, **tokenizer**, **traverse** the AST, and so on features. -Besides, it' provides some helper methods, like `split` SQL, and filter the `--` and `/**/` types of comments in SQL. +Besides, it' provides some helper methods, like **split** SQL, and filter the `--` and `/**/` types of comments in SQL. > Tips: This project is the default for Javascript language, also you can try to compile it to other languages if you need. @@ -35,10 +35,10 @@ yarn add dt-sql-parser ### Syntax Validation -First, we need to import the `Parser` object from `dt-sql-parser`, the different language needs -different Parser, so if you need to handle the `Flink SQL`, you can import the `FlinkSQL Parser`. +First, we need to import the **Parser** object from `dt-sql-parser`, the different language needs +different Parser, so if you need to handle the **Flink SQL**, you can import the **FlinkSQL Parser**. -The below is a `GenericSQL Parser` example: +The below is a **GenericSQL Parser** example: ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -82,12 +82,12 @@ output: */ ``` -We instanced a Parser object, and use the `validate` method to check the SQL syntax, if failed -returns an array object includes `error` message. +We instanced a Parser object, and use the **validate** method to check the SQL syntax, if failed +returns an array object includes **error** message. ### Tokenizer -You can also the all `tokens` by the Parser: +You can also the all **tokens** by the Parser: ```javascript import { GenericSQL } from 'dt-sql-parser'; @@ -152,7 +152,7 @@ TableName user1 ### Listener -access the specified node in the AST by Listener pattern +Access the specified node in the AST by the Listener ```javascript import { GenericSQL, SqlParserListener } from 'dt-sql-parser'; @@ -185,7 +185,7 @@ TableName user1 ### Clean -Clear the `comments` and `spaces` before and after +Clear the **comments** and **spaces** before and after ```javascript import { cleanSql } from 'dt-sql-parser'; @@ -202,7 +202,7 @@ select id,name from user1; ### Split SQL -When the SQL text is very big, you can think about to split it by `;` , and handle each line. +When the SQL text is very big, you can think about to split it by `;` , and handle it by each line. ```javascript import { splitSql } from 'dt-sql-parser'; From 858eeb31c5d39d4b829908ca602be4e6d439ca2b Mon Sep 17 00:00:00 2001 From: xiaowei Date: Thu, 17 Dec 2020 17:18:45 +0800 Subject: [PATCH 09/13] docs: prettier md format --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ac7ea7f..4d91a79 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ const errors = parser.validate(correctSql); console.log(errors); ``` -output: +Output: ```javascript /* @@ -58,7 +58,7 @@ output: */ ``` -validate failed: +Validate failed: ```javascript const incorrectSql = 'selec id,name from user1;' @@ -66,7 +66,7 @@ const errors = parser.validate(incorrectSql); console.log(errors); ``` -output: +Output: ```javascript /* From dc21e98401496d534a4473fc1b00d7eb047a2e7f Mon Sep 17 00:00:00 2001 From: xiaowei Date: Thu, 17 Dec 2020 17:21:40 +0800 Subject: [PATCH 10/13] docs: move the tips to the introduction bottom --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4d91a79..aed6472 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,6 @@ dt-sql-parser is a **SQL Parser** project built with [ANTLR4](https://github.com Besides, it' provides some helper methods, like **split** SQL, and filter the `--` and `/**/` types of comments in SQL. -> Tips: This project is the default for Javascript language, also you can try to compile it to other languages if you need. - Supported SQL: - MySQL @@ -21,6 +19,8 @@ Supported SQL: - Hive SQL - PL/SQL +>Tips: This project is the default for Javascript language, also you can try to compile it to other languages if you need. + ## Installation ```bash From 5f6503ff6e5ad85b4c2a2d1d06e0d5c0dcd70a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E5=A8=81?= Date: Thu, 17 Dec 2020 17:31:34 +0800 Subject: [PATCH 11/13] docs: Update CONTRIBUTING.md --- CONTRIBUTING.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 214e6cc..d9277b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,15 +1,10 @@ # dt-sql-parser -summary - -## How to contribute ## Prerequisites -## Semantic Versioning ## Branch Organization -## Release Process ## Source Code Organization From 42c4cb947b8fe2465e8337bdccda5e944349dd2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E5=A8=81?= Date: Thu, 17 Dec 2020 17:32:40 +0800 Subject: [PATCH 12/13] docs: update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aed6472..b3ebbb4 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ returns an array object includes **error** message. ### Tokenizer -You can also the all **tokens** by the Parser: +Get all **tokens** by the Parser: ```javascript import { GenericSQL } from 'dt-sql-parser'; From 94275835fc9a04b63f8f188c8b475f93d6ba8fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E5=A8=81?= Date: Thu, 17 Dec 2020 17:38:36 +0800 Subject: [PATCH 13/13] docs: unify the description for Split --- README-zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README-zh_CN.md b/README-zh_CN.md index c30eee3..afaa80a 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -7,7 +7,7 @@ [English](./README.md) | 简体中文 -dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的, 针对大数据领域的 **SQL Parser** 项目。通过[ANTLR4](https://github.com/antlr/antlr4) 默认生成的 Parser、Visitor 和 Listener 对象,我们可以轻松的做到对 SQL 语句的**语法检查**(Syntax Validation)、**词法分析**(Tokenizer)、 **遍历 AST** 节点等功能。此外,还提供了几个辅助方法, 例如 SQL 切分(Split)、过滤 SQL 语句中的 `--` 和 `/**/` 等类型的注释。 +dt-sql-parser 是一个基于 [ANTLR4](https://github.com/antlr/antlr4) 开发的, 针对大数据领域的 **SQL Parser** 项目。通过[ANTLR4](https://github.com/antlr/antlr4) 默认生成的 Parser、Visitor 和 Listener 对象,我们可以轻松的做到对 SQL 语句的**语法检查**(Syntax Validation)、**词法分析**(Tokenizer)、 **遍历 AST** 节点等功能。此外,还提供了几个辅助方法, 例如 SQL 切割(Split)、过滤 SQL 语句中的 `--` 和 `/**/` 等类型的注释。 已支持的 SQL 类型: