You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/12/31 13:19:46 UTC

[29/50] [abbrv] kylin git commit: KYLIN-3125 Support SparkSql in Cube building step 'Create Intermediate Flat Hive Table'

KYLIN-3125 Support SparkSql in Cube building step 'Create Intermediate Flat Hive Table'


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/acc9d0c8
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/acc9d0c8
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/acc9d0c8

Branch: refs/heads/master
Commit: acc9d0c834bf48ede6c57e2cc7aee73b76531fa2
Parents: bcda327
Author: Li Yang <li...@apache.org>
Authored: Thu Dec 21 16:39:56 2017 +0800
Committer: 成 <ch...@kyligence.io>
Committed: Sat Dec 23 22:22:21 2017 +0800

----------------------------------------------------------------------
 build/bin/find-hive-dependency.sh               |  3 ++-
 .../apache/kylin/common/KylinConfigBase.java    | 16 ++++++++++++++
 .../kylin/common/util/HiveCmdBuilder.java       | 19 ++++++++++++----
 .../main/resources/kylin-defaults.properties    |  9 ++++++++
 .../kylin/common/util/HiveCmdBuilderTest.java   | 23 +++++++++++++++++++-
 .../apache/kylin/source/hive/HiveTableMeta.java |  4 ++--
 6 files changed, 66 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/build/bin/find-hive-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh
index cb9c8f5..653db83 100755
--- a/build/bin/find-hive-dependency.sh
+++ b/build/bin/find-hive-dependency.sh
@@ -29,8 +29,9 @@ hive_env=
 
 if [ "${client_mode}" == "beeline" ]
 then
+    beeline_shell=`$KYLIN_HOME/bin/get-properties.sh kylin.source.hive.beeline-shell`
     beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params`
-    hive_env=`beeline ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e set 2>&1 | grep 'env:CLASSPATH' `
+    hive_env=`${beeline_shell} ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e set 2>&1 | grep 'env:CLASSPATH' `
 else
     hive_env=`hive ${hive_conf_properties} -e set 2>&1 | grep 'env:CLASSPATH'`
 fi

http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 5620ac0..93fbc23 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -691,10 +691,26 @@ abstract public class KylinConfigBase implements Serializable {
         return getOptional("kylin.source.hive.client", "cli");
     }
 
+    public String getHiveBeelineShell() {
+        return getOptional("kylin.source.hive.beeline-shell", "beeline");
+    }
+    
     public String getHiveBeelineParams() {
         return getOptional("kylin.source.hive.beeline-params", "");
     }
 
+    public boolean getEnableSparkSqlForTableOps() {
+        return Boolean.parseBoolean(getOptional("kylin.source.hive.enable-sparksql-for-table-ops", "false"));
+    }
+    
+    public String getSparkSqlBeelineShell() {
+        return getOptional("kylin.source.hive.sparksql-beeline-shell", "");
+    }
+    
+    public String getSparkSqlBeelineParams() {
+        return getOptional("kylin.source.hive.sparksql-beeline-params", "");
+    }
+
     public String getFlatHiveTableClusterByDictColumn() {
         return getOptional("kylin.source.hive.flat-table-cluster-by-dict-column");
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
index 77dc579..add53db 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
@@ -50,18 +50,28 @@ public class HiveCmdBuilder {
         CLI, BEELINE
     }
 
-    private HiveClientMode clientMode;
     private KylinConfig kylinConfig;
     final private Map<String, String> hiveConfProps = new HashMap<>();
     final private ArrayList<String> statements = Lists.newArrayList();
 
     public HiveCmdBuilder() {
         kylinConfig = KylinConfig.getInstanceFromEnv();
-        clientMode = HiveClientMode.valueOf(kylinConfig.getHiveClientMode().toUpperCase());
         loadHiveConfiguration();
     }
 
     public String build() {
+        HiveClientMode clientMode = HiveClientMode.valueOf(kylinConfig.getHiveClientMode().toUpperCase());
+        String beelineShell = kylinConfig.getHiveBeelineShell();
+        String beelineParams = kylinConfig.getHiveBeelineParams();
+        if (kylinConfig.getEnableSparkSqlForTableOps()) {
+            clientMode = HiveClientMode.BEELINE;
+            beelineShell = kylinConfig.getSparkSqlBeelineShell();
+            beelineParams = kylinConfig.getSparkSqlBeelineParams();
+            if (StringUtils.isBlank(beelineShell)) {
+                throw new IllegalStateException("Missing config 'kylin.source.hive.sparksql-beeline-shell', please check kylin.properties");
+            }
+        }
+        
         StringBuffer buf = new StringBuffer();
 
         switch (clientMode) {
@@ -83,8 +93,9 @@ public class HiveCmdBuilder {
                     bw.write(statement);
                     bw.newLine();
                 }
-                buf.append("beeline ");
-                buf.append(kylinConfig.getHiveBeelineParams());
+                buf.append(beelineShell);
+                buf.append(" ");
+                buf.append(beelineParams);
                 buf.append(parseProps());
                 buf.append(" -f ");
                 buf.append(tmpHql.getAbsolutePath());

http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/resources/kylin-defaults.properties
----------------------------------------------------------------------
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index e19ff2e..d8b2d6a 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -61,9 +61,18 @@ kylin.restclient.connection.max-total=200
 # Hive client, valid value [cli, beeline]
 kylin.source.hive.client=cli
 
+# Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
+#kylin.source.hive.beeline-shell=beeline
+
 # Parameters for beeline client, only necessary if hive client is beeline
 #kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
 
+# While hive client uses above settings to read hive table metadata,
+# table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore.
+kylin.source.hive.enable-sparksql-for-table-ops=false
+#kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline
+#kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
+
 kylin.source.hive.keep-flat-table=false
 
 # Hive database name for putting the intermediate flat tables

http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java
----------------------------------------------------------------------
diff --git a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java
index d69d4d2..4262722 100644
--- a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java
+++ b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java
@@ -43,7 +43,12 @@ public class HiveCmdBuilderTest {
     @After
     public void after() throws Exception {
         System.clearProperty("kylin.source.hive.client");
+        System.clearProperty("kylin.source.hive.beeline-shell");
         System.clearProperty("kylin.source.hive.beeline-params");
+        
+        System.clearProperty("kylin.source.hive.enable-sparksql-for-table-ops");
+        System.clearProperty("kylin.source.hive.sparksql-beeline-shell");
+        System.clearProperty("kylin.source.hive.sparksql-beeline-params");
     }
 
     @Test
@@ -67,6 +72,7 @@ public class HiveCmdBuilderTest {
     public void testBeeline() throws IOException {
         String lineSeparator = java.security.AccessController.doPrivileged(new sun.security.action.GetPropertyAction("line.separator"));
         System.setProperty("kylin.source.hive.client", "beeline");
+        System.setProperty("kylin.source.hive.beeline-shell", "/spark-client/bin/beeline");
         System.setProperty("kylin.source.hive.beeline-params", "-u jdbc_url");
 
         HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
@@ -75,7 +81,7 @@ public class HiveCmdBuilderTest {
         hiveCmdBuilder.addStatement("SHOW\n TABLES;");
 
         String cmd = hiveCmdBuilder.build();
-        assertTrue(cmd.startsWith("beeline -u jdbc_url"));
+        assertTrue(cmd.startsWith("/spark-client/bin/beeline -u jdbc_url"));
 
         String hqlFile = cmd.substring(cmd.lastIndexOf("-f ") + 3).trim();
         hqlFile = hqlFile.substring(0, hqlFile.length() - ";exit $ret_code".length());
@@ -85,4 +91,19 @@ public class HiveCmdBuilderTest {
 
         FileUtils.forceDelete(new File(hqlFile));
     }
+
+    @Test
+    public void testSparkSqlForTableOps() throws IOException {
+        System.setProperty("kylin.source.hive.enable-sparksql-for-table-ops", "true");
+        System.setProperty("kylin.source.hive.sparksql-beeline-shell", "/spark-client/bin/beeline");
+        System.setProperty("kylin.source.hive.sparksql-beeline-params", "-u jdbc_url");
+
+        HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
+        hiveCmdBuilder.addStatement("USE default;");
+        hiveCmdBuilder.addStatement("DROP TABLE test;");
+        hiveCmdBuilder.addStatement("SHOW\n TABLES;");
+
+        String cmd = hiveCmdBuilder.build();
+        assertTrue(cmd.startsWith("/spark-client/bin/beeline -u jdbc_url"));
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java
index fa9eb29..9a26c14 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java
@@ -20,8 +20,8 @@ package org.apache.kylin.source.hive;
 
 import java.util.List;
 
-class HiveTableMeta {
-    static class HiveTableColumnMeta {
+public class HiveTableMeta {
+    public static class HiveTableColumnMeta {
         String name;
         String dataType;
         String comment;