You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2021/03/11 03:48:34 UTC

[kylin] branch master updated: KYLIN-4879 Fix the function of remove sql comments

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b4716a  KYLIN-4879 Fix the function of remove sql comments
1b4716a is described below

commit 1b4716aa940779999f83832cf52c84de4a0edbbc
Author: yaqian.zhang <59...@qq.com>
AuthorDate: Wed Mar 10 16:58:10 2021 +0800

    KYLIN-4879 Fix the function of remove sql comments
---
 pom.xml                                            |   6 +
 query/pom.xml                                      |  27 +++-
 .../javacc/org/apache/kylin/query/util/.gitignore  |  10 ++
 .../org/apache/kylin/query/util/CommentParser.jj   | 161 +++++++++++++++++++++
 .../org/apache/kylin/query/util/QueryUtil.java     |  17 +--
 .../org/apache/kylin/query/util/QueryUtilTest.java |  73 ++++++++--
 6 files changed, 269 insertions(+), 25 deletions(-)

diff --git a/pom.xml b/pom.xml
index 1b05daf..ed14a73 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1493,6 +1493,12 @@
                 <bundledSignature>jdk-deprecated</bundledSignature>
                 <!--<bundledSignature>jdk-non-portable</bundledSignature>-->
               </bundledSignatures>
+              <excludes>
+                <exclude>**/ParseException.class</exclude>
+                <exclude>**/SimpleCharStream.class</exclude>
+                <exclude>**/*TokenManager.class</exclude>
+                <exclude>**/TokenMgrError.class</exclude>
+              </excludes>
               <signaturesFiles>
                 <signaturesFile>
                   ${user.dir}/dev-support/signatures.txt
diff --git a/query/pom.xml b/query/pom.xml
index ce338f2..a9f9358 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -17,7 +17,8 @@
  limitations under the License.
 -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>kylin-query</artifactId>
@@ -92,4 +93,28 @@
             <version>${mockito.version}</version>
         </dependency>
     </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>javacc-maven-plugin</artifactId>
+                <version>2.6</version>
+                <executions>
+                    <execution>
+                        <id>javacc</id>
+                        <goals>
+                            <goal>javacc</goal>
+                        </goals>
+                        <configuration>
+                            <sourceDirectory>src/main/codegen/javacc/org/apache/kylin/query/util</sourceDirectory>
+                            <includes>
+                                <include>*.jj</include>
+                            </includes>
+                            <isStatic>false</isStatic>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
 </project>
diff --git a/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore b/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore
new file mode 100644
index 0000000..b978ba1
--- /dev/null
+++ b/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore
@@ -0,0 +1,10 @@
+/EscapeTransformer.java
+/EscapeTransformerConstants.java
+/EscapeTransformerTokenManager.java
+/ParseException.java
+/SimpleCharStream.java
+/Token.java
+/TokenMgrError.java
+/EscapeParser.java
+/EscapeParserConstants.java
+/EscapeParserTokenManager.java
diff --git a/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj b/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj
new file mode 100644
index 0000000..53dcab1
--- /dev/null
+++ b/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj
@@ -0,0 +1,161 @@
+options {
+    IGNORE_CASE = true;
+    STATIC = false;
+    UNICODE_INPUT=true;
+}
+
+PARSER_BEGIN(CommentParser)
+package org.apache.kylin.query.util;
+import java.io.StringReader;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Scanner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CommentParser {
+
+    private final static Logger logger = LoggerFactory.getLogger(CommentParser.class);
+
+
+    public static void main(String args[]) throws ParseException {
+        System.out.println("Input SQL:");
+        Scanner reader = new Scanner(System.in, "UTF-8");
+        String sql = reader.nextLine();
+        reader.close();
+        CommentParser parser = new CommentParser(new StringReader(sql));
+        String parseResult = parser.Input();
+        System.out.println("Translated SQL:");
+        System.out.println(parseResult);
+    }
+
+    public CommentParser(String sql) {
+        this(new StringReader(sql));
+    }
+}
+
+PARSER_END(CommentParser)
+
+//< DEFAULT, ESCAPE, FUNCTION >
+//SKIP :
+//{
+//  "\t"
+//| "\n"
+//| "\r"
+//}
+
+<DEFAULT>
+SKIP : {
+    "/*" : WITHINCOMMENT
+    | <"--" (~["\n","\r"])*>
+}
+
+< DEFAULT >
+TOKEN :
+{
+ < REMAIN_TOKEN : [" ", "\t","\n", "\r", ",", "/", "-" ] >
+| < QUOTE: "'" >
+| < QUOTED_STRING: <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> >
+| < DOUBLE_QUOTE: "\"" >
+| < DOUBLE_QUOTE_STRING: <DOUBLE_QUOTE> ( (~["\""]) | ("\"\""))* <DOUBLE_QUOTE> >
+| < ANY : (~[" ", ",", "\t", "\n", "\r", "/", "-" ])+ >
+}
+
+<WITHINCOMMENT>
+SKIP :
+{
+  "*/" : DEFAULT
+}
+
+<WITHINCOMMENT>
+MORE :
+{
+  < ~[] >
+}
+
+/** Root production. */
+String Input() :
+{
+    String innerString;
+    StringBuilder transformedStr = new StringBuilder();
+}
+{
+    (
+    LOOKAHEAD(2)
+    innerString = Expression()
+    {
+        transformedStr.append(innerString);
+    }
+    )+
+    <EOF>
+    {
+        return transformedStr.toString();
+    }
+}
+
+/** Brace counting production. */
+String Expression() :
+{
+    String innerString = "";
+    String nextString = "";
+}
+{
+    {
+        if (Thread.currentThread().isInterrupted()) {
+            throw new ParseException("CommentParser is interrupted");
+        }
+    }
+    (
+     innerString = QuotedString()
+    | innerString = DoubleQuoteString()
+    | innerString = RemainToken()
+    | innerString = Any()
+    )
+    {
+        return innerString + nextString;
+    }
+}
+
+String RemainToken() :
+{}
+{
+    < REMAIN_TOKEN >
+    {
+        logger.trace("meet token <REMAIN_TOKEN>");
+        return getToken(0).image;
+    }
+}
+
+String QuotedString() :
+{
+    String s;
+}
+{
+    <QUOTED_STRING>
+    {
+        logger.trace("meet token in <QUOTED_STRING>: " + getToken(0).image);
+        return getToken(0).image;
+    }
+}
+
+String DoubleQuoteString() :
+{
+    String s;
+}
+{
+    <DOUBLE_QUOTE_STRING>
+    {
+        logger.trace("meet token in <QUOTED2_STRING>: " + getToken(0).image);
+        return getToken(0).image;
+    }
+}
+
+String Any() :
+{}
+{
+    < ANY >
+    {
+        logger.trace("meet token in <ANY>: " + getToken(0).image);
+        return getToken(0).image;
+    }
+}
\ No newline at end of file
diff --git a/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java b/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java
index 551a907..8841427 100644
--- a/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java
+++ b/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java
@@ -30,7 +30,6 @@ import org.apache.kylin.metadata.project.ProjectInstance;
 import org.apache.kylin.metadata.project.ProjectManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 import com.google.common.collect.Lists;
 
 /**
@@ -219,17 +218,15 @@ public class QueryUtil {
                 || (sql1.startsWith(KEYWORD_EXPLAIN) && sql1.contains(KEYWORD_SELECT));
     }
 
-    public static String removeCommentInSql(String sql1) {
-        // match two patterns, one is "-- comment", the other is "/* comment */"
-        final String[] commentPatterns = new String[] { "--(?!.*\\*/).*?[\r\n]", "/\\*(.|\r|\n)*?\\*/" };
 
-        for (int i = 0; i < commentPatterns.length; i++) {
-            sql1 = sql1.replaceAll(commentPatterns[i], "");
+    public static String removeCommentInSql(String sql) {
+        // match two patterns, one is "-- comment", the other is "/* comment */"
+        try {
+            return new CommentParser(sql).Input().trim();
+        } catch (ParseException e) {
+            logger.error("Failed to parse sql: {}", sql, e);
+            return sql;
         }
-
-        sql1 = sql1.trim();
-
-        return sql1;
     }
 
     public interface IQueryTransformer {
diff --git a/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java b/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java
index 9b769b8..fb9a431 100644
--- a/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java
+++ b/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java
@@ -200,8 +200,8 @@ public class QueryUtilTest extends LocalFileMetadataTestCase {
     @Test
     public void testRemoveCommentInSql() {
 
-        String originSql = "select count(*) from test_kylin_fact where price > 10.0";
-        String originSql2 = "select count(*) from test_kylin_fact where TEST_COLUMN != 'not--a comment'";
+        final String originSql = "select count(*) from test_kylin_fact where price > 10.0";
+        final String originSql2 = "select count(*) from test_kylin_fact where TEST_COLUMN != 'not--a comment'";
 
         {
             String sqlWithComment = "-- comment \n" + originSql;
@@ -294,18 +294,63 @@ public class QueryUtilTest extends LocalFileMetadataTestCase {
             Assert.assertEquals(originSql2, QueryUtil.removeCommentInSql(sqlWithComment2));
         }
 
-        String content = "        --  One-line comment and /**range\n" +
-                "/*\n" +
-                "Multi-line comment\r\n" +
-                "--  Multi-line comment*/\n" +
-                "select price as " +
-                "/*\n" +
-                "Multi-line comment\r\n" +
-                "--  Multi-line comment*/\n" +
-                "revenue from /*One-line comment-- One-line comment*/ v_lineitem;";
-        String expectedContent = "select price as revenue from  v_lineitem;";
-        String trimmedContent = QueryUtil.removeCommentInSql(content).replaceAll("\n", "").trim();
-        Assert.assertEquals(trimmedContent, expectedContent);
+        {
+            String content = "        --  One-line comment and /**range\n" +
+                    "/*\n" +
+                    "Multi-line comment\r\n" +
+                    "--  Multi-line comment*/\n" +
+                    "select price as " +
+                    "/*\n" +
+                    "Multi-line comment\r\n" +
+                    "--  Multi-line comment*/\n" +
+                    "revenue from /*One-line comment-- One-line comment*/ v_lineitem;";
+            String expectedContent = "select price as revenue from  v_lineitem;";
+            String trimmedContent = QueryUtil.removeCommentInSql(content).replaceAll("\n", "").trim();
+            Assert.assertEquals(trimmedContent, expectedContent);
+        }
+
+        {
+            String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '--this is not comment'\n " +
+                    "LIMIT 100 offset 0";
+            Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment));
+        }
+
+        {
+            String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '--this is not comment'";
+            Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment));
+        }
+
+        {
+            String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '/*--this is not comment*/'";
+            Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment));
+        }
+
+        {
+            String sqlWithComment = "-- comment \n" + originSql + "/* comment */;" + "-- comment \n" + originSql + "/* comment */";
+            Assert.assertEquals("select count(*) from test_kylin_fact where price > 10.0;\n" +
+                    "select count(*) from test_kylin_fact where price > 10.0", QueryUtil.removeCommentInSql(sqlWithComment));
+        }
+
+        {
+            String sqlWithComment = "select count(*) from test_kylin_fact /* this is test*/  where price > 10.0 -- comment \n" +
+                    ";" + "insert into test_kylin_fact(id) values(?); -- comment \n";
+            Assert.assertEquals("select count(*) from test_kylin_fact   where price > 10.0 \n" +
+                    ";insert into test_kylin_fact(id) values(?);", QueryUtil.removeCommentInSql(sqlWithComment));
+        }
+
+        {
+            String sqlWithoutComment = "select * from test_kylin_fact where price=\"/* this is not comment */\"";
+            Assert.assertEquals(sqlWithoutComment, QueryUtil.removeCommentInSql(sqlWithoutComment));
+        }
+
+        {
+            String sqlWithoutComment = "select count(*) from test_kylin_fact -- 'comment'\n";
+            Assert.assertEquals("select count(*) from test_kylin_fact", QueryUtil.removeCommentInSql(sqlWithoutComment));
+        }
+        {
+            Assert.assertEquals("select count(*)",
+                    QueryUtil.removeCommentInSql("select count(*) -- , --\t --/ --"));
+        }
     }
 
     @Test