You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/06/05 05:23:28 UTC

[40/67] [abbrv] kylin git commit: KYLIN-2515 add IAdhocConverter to allow customized adhoc query converte

KYLIN-2515 add IAdhocConverter to allow customized adhoc query converte


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/6ce9983f
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/6ce9983f
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/6ce9983f

Branch: refs/heads/master
Commit: 6ce9983f70fb497a50b660289cd11a8714cc731e
Parents: 9fafd27
Author: Hongbin Ma <ma...@apache.org>
Authored: Fri May 26 16:50:49 2017 +0800
Committer: Roger Shi <ro...@gmail.com>
Committed: Sat May 27 21:11:03 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    |   4 +
 .../kylin/storage/adhoc/AdHocRunnerBase.java    |   6 +-
 .../kylin/storage/adhoc/HiveAdhocConverter.java | 180 +++++++++++++++++++
 .../kylin/storage/adhoc/IAdhocConverter.java    |  25 +++
 .../storage/adhoc/HiveAdhocConverterTest.java   |  62 +++++++
 .../org/apache/kylin/rest/util/AdHocUtil.java   |  35 ++--
 6 files changed, 297 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 05be701..7f366d8 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -968,6 +968,10 @@ abstract public class KylinConfigBase implements Serializable {
     public String getAdHocRunnerClassName() {
         return getOptional("kylin.query.ad-hoc.runner.class-name", "");
     }
+    
+    public String getAdHocConverterClassName() {
+        return getOptional("kylin.query.ad-hoc.converter.class-name", "org.apache.kylin.storage.adhoc.HiveAdhocConverter");
+    }
 
     public String getJdbcUrl() {
         return getOptional("kylin.query.ad-hoc.jdbc.url", "");

http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java
----------------------------------------------------------------------
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java
index 7e811a4..7b870c6 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java
@@ -18,13 +18,13 @@
 
 package org.apache.kylin.storage.adhoc;
 
+import java.util.List;
+
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.metadata.querymeta.SelectedColumnMeta;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.List;
-
 public abstract class AdHocRunnerBase {
 
     private static final Logger logger = LoggerFactory.getLogger(AdHocRunnerBase.class);
@@ -45,4 +45,4 @@ public abstract class AdHocRunnerBase {
     public abstract void init();
 
     public abstract void executeQuery(String query, List<List<String>> results, List<SelectedColumnMeta> columnMetas) throws Exception;
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java
----------------------------------------------------------------------
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java
new file mode 100644
index 0000000..1a43557
--- /dev/null
+++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.storage.adhoc;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Stack;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+//TODO: Some workaround ways to make sql readable by hive parser, should replaced it with a more well-designed way
+public class HiveAdhocConverter implements IAdhocConverter {
+
+    private static final Logger logger = LoggerFactory.getLogger(HiveAdhocConverter.class);
+
+    private static final Pattern EXTRACT_PATTERN = Pattern.compile("\\s+extract\\s*(\\()\\s*(.*?)\\s*from(\\s+)", Pattern.CASE_INSENSITIVE);
+    private static final Pattern FROM_PATTERN = Pattern.compile("\\s+from\\s+(\\()\\s*select\\s", Pattern.CASE_INSENSITIVE);
+    private static final Pattern CAST_PATTERN = Pattern.compile("CAST\\((.*?) (?i)AS\\s*(.*?)\\s*\\)", Pattern.CASE_INSENSITIVE);
+    private static final Pattern CONCAT_PATTERN = Pattern.compile("(['_a-z0-9A-Z]+)\\|\\|(['_a-z0-9A-Z]+)", Pattern.CASE_INSENSITIVE);
+
+    public static String replaceString(String originString, String fromString, String toString) {
+        return originString.replace(fromString, toString);
+    }
+
+    public static String extractReplace(String originString) {
+        Matcher extractMatcher = EXTRACT_PATTERN.matcher(originString);
+        String replacedString = originString;
+        Map<Integer, Integer> parenthesesPairs = null;
+
+        while (extractMatcher.find()) {
+            if (parenthesesPairs == null) {
+                parenthesesPairs = findParenthesesPairs(originString);
+            }
+
+            String functionStr = extractMatcher.group(2);
+            int startIdx = extractMatcher.end(3);
+            int endIdx = parenthesesPairs.get(extractMatcher.start(1));
+            String extractInner = originString.substring(startIdx, endIdx);
+            int originStart = extractMatcher.start(0) + 1;
+            int originEnd = endIdx + 1;
+
+            replacedString = replaceString(replacedString, originString.substring(originStart, originEnd), functionStr + "(" + extractInner + ")");
+        }
+
+        return replacedString;
+    }
+
+    public static String castRepalce(String originString) {
+        Matcher castMatcher = CAST_PATTERN.matcher(originString);
+        String replacedString = originString;
+
+        while (castMatcher.find()) {
+            String castStr = castMatcher.group();
+            String type = castMatcher.group(2);
+            String supportedType = "";
+            switch (type.toUpperCase()) {
+            case "INTEGER":
+                supportedType = "int";
+                break;
+            case "SHORT":
+                supportedType = "smallint";
+                break;
+            case "LONG":
+                supportedType = "bigint";
+                break;
+            default:
+                supportedType = type;
+            }
+
+            if (!supportedType.equals(type)) {
+                String replacedCastStr = castStr.replace(type, supportedType);
+                replacedString = replaceString(replacedString, castStr, replacedCastStr);
+            }
+        }
+
+        return replacedString;
+    }
+
+    public static String subqueryRepalce(String originString) {
+        Matcher subqueryMatcher = FROM_PATTERN.matcher(originString);
+        String replacedString = originString;
+        Map<Integer, Integer> parenthesesPairs = null;
+
+        while (subqueryMatcher.find()) {
+            if (parenthesesPairs == null) {
+                parenthesesPairs = findParenthesesPairs(originString);
+            }
+
+            int startIdx = subqueryMatcher.start(1);
+            int endIdx = parenthesesPairs.get(startIdx) + 1;
+
+            replacedString = replaceString(replacedString, originString.substring(startIdx, endIdx), originString.substring(startIdx, endIdx) + " as alias");
+        }
+
+        return replacedString;
+    }
+
+    public static String concatReplace(String originString) {
+        Matcher concatMatcher = CONCAT_PATTERN.matcher(originString);
+        String replacedString = originString;
+
+        while (concatMatcher.find()) {
+            String leftString = concatMatcher.group(1);
+            String rightString = concatMatcher.group(2);
+            replacedString = replaceString(replacedString, leftString + "||" + rightString, "concat(" + leftString + "," + rightString + ")");
+        }
+
+        return replacedString;
+    }
+
+    public static String doConvert(String originStr) {
+        // Step1.Replace " with `
+        String convertedSql = replaceString(originStr, "\"", "`");
+
+        // Step2.Replace extract functions
+        convertedSql = extractReplace(convertedSql);
+
+        // Step3.Replace cast type string
+        convertedSql = castRepalce(convertedSql);
+
+        // Step4.Replace sub query
+        convertedSql = subqueryRepalce(convertedSql);
+
+        // Step5.Replace char_length with length
+        convertedSql = replaceString(convertedSql, "char_length", "length");
+
+        // Step6.Replace "||" with concat
+        convertedSql = concatReplace(convertedSql);
+
+        return convertedSql;
+    }
+
+    private static Map<Integer, Integer> findParenthesesPairs(String sql) {
+        Map<Integer, Integer> result = new HashMap<>();
+        if (sql.length() > 1) {
+            Stack<Integer> lStack = new Stack<>();
+            boolean inStrVal = false;
+            for (int i = 0; i < sql.length(); i++) {
+                switch (sql.charAt(i)) {
+                case '(':
+                    if (!inStrVal) {
+                        lStack.push(i);
+                    }
+                    break;
+                case ')':
+                    if (!inStrVal && !lStack.empty()) {
+                        result.put(lStack.pop(), i);
+                    }
+                    break;
+                default:
+                    break;
+                }
+            }
+        }
+        return result;
+    }
+
+    @Override
+    public String convert(String originSql) {
+        return doConvert(originSql);
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java
----------------------------------------------------------------------
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java
new file mode 100644
index 0000000..d5815bb
--- /dev/null
+++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.storage.adhoc;
+
+/**
+ * convert the query to satisfy the parser of adhoc query engine
+ */
+public interface IAdhocConverter {
+    String convert(String originSql);
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java
----------------------------------------------------------------------
diff --git a/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java b/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java
new file mode 100644
index 0000000..62f6792
--- /dev/null
+++ b/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.storage.adhoc;
+
+import junit.framework.TestCase;
+import org.junit.Test;
+
+
+public class HiveAdhocConverterTest extends TestCase {
+    @Test
+    public void testSringReplace() {
+        String originString = "select count(*) as cnt from test_kylin_fact where char_length(lstg_format_name) < 10";
+        String replacedString = HiveAdhocConverter
+            .replaceString(originString, "char_length", "length");
+        assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where length(lstg_format_name) < 10");
+    }
+
+    @Test
+    public void testExtractReplace() {
+        String originString = "ignore EXTRACT(YEAR FROM KYLIN_CAL_DT.CAL_DT) ignore";
+        String replacedString = HiveAdhocConverter.extractReplace(originString);
+        assertEquals(replacedString, "ignore YEAR(KYLIN_CAL_DT.CAL_DT) ignore");
+    }
+
+    @Test
+    public void testCastReplace() {
+        String originString = "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS INTEGER)) ignore";
+        String replacedString = HiveAdhocConverter.castRepalce(originString);
+        assertEquals(replacedString, "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS int)) ignore");
+    }
+
+    @Test
+    public void testSubqueryReplace() {
+        String originString = "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) group by seller_id,lstg_format_name";
+        String replacedString = HiveAdhocConverter.subqueryRepalce(originString);
+        assertEquals(replacedString, "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) as alias group by seller_id,lstg_format_name");
+    }
+
+    @Test
+    public void testConcatReplace() {
+        String originString = "select count(*) as cnt from test_kylin_fact where lstg_format_name||'a'='ABINa'";
+        String replacedString = HiveAdhocConverter.concatReplace(originString);
+        assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where concat(lstg_format_name,'a')='ABINa'");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java
----------------------------------------------------------------------
diff --git a/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java b/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java
index f6b3496..648ef91 100644
--- a/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java
+++ b/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java
@@ -18,18 +18,19 @@
 
 package org.apache.kylin.rest.util;
 
+import java.sql.SQLException;
+import java.util.List;
+
 import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.metadata.querymeta.SelectedColumnMeta;
 import org.apache.kylin.query.routing.NoRealizationFoundException;
-import org.apache.kylin.storage.adhoc.AdHocRunnerBase;
 import org.apache.kylin.rest.exception.InternalErrorException;
-import org.apache.kylin.metadata.querymeta.SelectedColumnMeta;
+import org.apache.kylin.storage.adhoc.AdHocRunnerBase;
+import org.apache.kylin.storage.adhoc.IAdhocConverter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.sql.SQLException;
-import java.util.List;
-
 public class AdHocUtil {
     private static final Logger logger = LoggerFactory.getLogger(AdHocUtil.class);
 
@@ -40,22 +41,33 @@ public class AdHocUtil {
 
         if (isExpectedCause && kylinConfig.isAdhocEnabled()) {
             Class runnerClass = Class.forName(kylinConfig.getAdHocRunnerClassName());
-            Object instance = runnerClass.newInstance();
+            Class converterClass = Class.forName(kylinConfig.getAdHocConverterClassName());
+            Object runnerObj = runnerClass.newInstance();
+            Object converterObj = converterClass.newInstance();
 
-            if (!(instance instanceof AdHocRunnerBase)) {
-                throw new InternalErrorException("Ad-hoc runner class should be sub-class of AdHocRunnerBase.");
+            if (!(runnerObj instanceof AdHocRunnerBase)) {
+                throw new InternalErrorException("Ad-hoc runner class should be sub-class of AdHocRunnerBase");
             }
 
-            AdHocRunnerBase runner = (AdHocRunnerBase) instance;
+            if (!(converterObj instanceof IAdhocConverter)) {
+                throw new InternalErrorException("Ad-hoc converter class should implement of IAdhocConverter");
+            }
+
+            AdHocRunnerBase runner = (AdHocRunnerBase) runnerObj;
+            IAdhocConverter converter = (IAdhocConverter) converterObj;
             runner.setConfig(kylinConfig);
 
             logger.debug("Ad-hoc query enabled for Kylin");
-            // running query to ad-hoc jdbc
 
             runner.init();
 
             try {
-                runner.executeQuery(sql, results, columnMetas);
+                String adhocSql = converter.convert(sql);
+                if (!sql.equals(adhocSql)) {
+                    logger.info("the original query is converted to {} before delegating to ", adhocSql);
+                }
+
+                runner.executeQuery(adhocSql, results, columnMetas);
                 isAdHoc = true;
             } catch (Exception exception) {
                 throw exception;
@@ -67,4 +79,3 @@ public class AdHocUtil {
         return isAdHoc;
     }
 }
-