You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/02/24 07:07:15 UTC

[2/2] kylin git commit: KYLIN-2331 refine HADOOP_CONF_DIR search logic

KYLIN-2331 refine HADOOP_CONF_DIR search logic


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/2c438602
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/2c438602
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/2c438602

Branch: refs/heads/master
Commit: 2c43860240b1c80cef1f14d0baaba989a49aa79d
Parents: 32cce58
Author: Hongbin Ma <ma...@apache.org>
Authored: Fri Feb 24 15:05:37 2017 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Fri Feb 24 15:07:08 2017 +0800

----------------------------------------------------------------------
 build/bin/find-hadoop-conf-dir.sh               | 77 ++++++++++++++++++++
 build/bin/find-hbase-dependency.sh              |  2 +
 build/bin/find-hive-dependency.sh               |  3 +-
 build/bin/find-kafka-dependency.sh              | 12 +--
 build/bin/find-spark-dependency.sh              |  3 +-
 build/bin/kylin.sh                              | 14 ++--
 build/conf/kylin.properties                     |  2 +-
 .../apache/kylin/common/KylinConfigBase.java    |  7 +-
 .../kylin/engine/spark/SparkExecutable.java     | 26 ++++---
 9 files changed, 112 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hadoop-conf-dir.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hadoop-conf-dir.sh b/build/bin/find-hadoop-conf-dir.sh
new file mode 100644
index 0000000..5334b8a
--- /dev/null
+++ b/build/bin/find-hadoop-conf-dir.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+
+echo Retrieving hadoop conf dir...
+
+override_hadoop_conf_dir=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.env.hadoop-conf-dir`
+
+if [ -n "$override_hadoop_conf_dir" ]; then
+    echo "$override_hadoop_conf_dir is override as the kylin_hadoop_conf_dir"
+    export kylin_hadoop_conf_dir=${override_hadoop_conf_dir}
+    return
+fi
+
+hbase_classpath=`hbase classpath`
+
+arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`)
+kylin_hadoop_conf_dir=
+
+for data in ${arr[@]}
+do
+    result=`echo $data | grep -v -E ".*jar"`
+    if [ $result ]
+    then
+        valid_conf_dir=true
+        
+        if [ ! -f $result/yarn-site.xml ]
+        then
+            verbose "$result is not valid hadoop dir conf because yarn-site.xml is missing"
+            valid_conf_dir=false
+            continue
+        fi
+        
+        if [ ! -f $result/mapred-site.xml ]
+        then
+            verbose "$result is not valid hadoop dir conf because mapred-site.xml is missing"
+            valid_conf_dir=false
+            continue
+        fi
+        
+        if [ ! -f $result/hdfs-site.xml ]
+        then
+            verbose "$result is not valid hadoop dir conf because hdfs-site.xml is missing"
+            valid_conf_dir=false
+            continue
+        fi
+        
+        if [ ! -f $result/core-site.xml ]
+        then
+            verbose "$result is not valid hadoop dir conf because core-site.xml is missing"
+            valid_conf_dir=false
+            continue
+        fi
+        
+        verbose "$result is chosen as the kylin_hadoop_conf_dir"
+        export kylin_hadoop_conf_dir=$result
+        return
+    fi
+done
+

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hbase-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hbase-dependency.sh b/build/bin/find-hbase-dependency.sh
index 7dbb53b..14dde3b 100644
--- a/build/bin/find-hbase-dependency.sh
+++ b/build/bin/find-hbase-dependency.sh
@@ -19,6 +19,8 @@
 
 source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
 
+echo Retrieving hbase dependency...
+
 hbase_classpath=`hbase classpath`
 
 # special handling for Amazon EMR, to prevent re-init of hbase-setenv

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hive-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh
index 453a35a..aa39da0 100644
--- a/build/bin/find-hive-dependency.sh
+++ b/build/bin/find-hive-dependency.sh
@@ -19,10 +19,11 @@
 
 source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
 
+echo Retrieving hive dependency...
+
 client_mode=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.client`
 hive_env=
 
-echo Retrieving hive dependency...
 if [ "${client_mode}" == "beeline" ]
 then
     beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params`

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-kafka-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-kafka-dependency.sh b/build/bin/find-kafka-dependency.sh
index d3219e7..999face 100644
--- a/build/bin/find-kafka-dependency.sh
+++ b/build/bin/find-kafka-dependency.sh
@@ -22,15 +22,17 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
 kafka_home=
 
 echo Retrieving kafka dependency...
-if [ -n "$KAFKA_HOME" ]
+
+if [ -z "$KAFKA_HOME" ]
 then
-    verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies."
-    kafka_home=$KAFKA_HOME
+    verbose "Couldn't find kafka home. If you want to enable streaming processing, Please set KAFKA_HOME to the path which contains kafka dependencies."
+    return
 fi
 
-if [ -z "$KAFKA_HOME" ]
+if [ -n "$KAFKA_HOME" ]
 then
-    quit "Couldn't find kafka home. Please set KAFKA_HOME to the path which contains kafka dependencies."
+    verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies."
+    kafka_home=$KAFKA_HOME
 fi
 
 # works for kafka 9+

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-spark-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh
index 6f74d8a..4ea5c3e 100644
--- a/build/bin/find-spark-dependency.sh
+++ b/build/bin/find-spark-dependency.sh
@@ -19,9 +19,10 @@
 
 source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
 
+echo Retrieving Spark dependency...
+
 spark_home=
 
-verbose Retrieving Spark dependency...
 if [ -n "$SPARK_HOME" ]
 then
     verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies."

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index 15e1aed..a87fa78 100644
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -32,6 +32,9 @@ function retrieveDependency() {
     #retrive $hive_dependency and $hbase_dependency
     source ${dir}/find-hive-dependency.sh
     source ${dir}/find-hbase-dependency.sh
+    source ${dir}/find-hadoop-conf-dir.sh
+    source ${dir}/find-kafka-dependency.sh
+    source ${dir}/find-spark-dependency.sh
 
     #retrive $KYLIN_EXTRA_START_OPTS
     if [ -f "${dir}/setenv.sh" ]; then
@@ -39,15 +42,7 @@ function retrieveDependency() {
     fi
 
     export HBASE_CLASSPATH_PREFIX=${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH_PREFIX}
-    export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}
-    if [ -n "$KAFKA_HOME" ]
-    then
-        source ${dir}/find-kafka-dependency.sh
-        export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency}
-    fi
-
-    source ${dir}/find-spark-dependency.sh
-    export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency}
+    export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}:${kafka_dependency}:${spark_dependency}
 
     verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}"
 }
@@ -114,6 +109,7 @@ then
     -Dkylin.hbase.dependency=${hbase_dependency} \
     -Dkylin.kafka.dependency=${kafka_dependency} \
     -Dkylin.spark.dependency=${spark_dependency} \
+    -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \
     -Dspring.profiles.active=${spring_profile} \
     org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar  org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
     

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index 12c4c9c..d7ba0b3 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -204,7 +204,7 @@ kylin.security.saml.context-path=/kylin
 ### Spark Engine Configs ###
 
 # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit
-#kylin.engine.spark.env.hadoop-conf-dir=/etc/hive/conf
+#kylin.env.hadoop-conf-dir=/etc/hive/conf
 
 # Estimate the RDD partition numbers
 kylin.engine.spark.rdd-partition-cut-mb=10

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 1c26c63..5317a39 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -773,12 +773,9 @@ abstract public class KylinConfigBase implements Serializable {
     // ENGINE.SPARK
     // ============================================================================
 
-    public String getHadoopConfDir() {
-        return getOptional("kylin.engine.spark.env.hadoop-conf-dir", "");
-    }
 
-    public void setHadoopConfDir(String hadoopConfDir) {
-        setProperty("kylin.engine.spark.env.hadoop-conf-dir", hadoopConfDir);
+    public String getHadoopConfDir() {
+        return getOptional("kylin.env.hadoop-conf-dir", "");
     }
 
     public String getSparkAdditionalJars() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
index c671a91..cf7438c 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
@@ -79,19 +79,22 @@ public class SparkExecutable extends AbstractExecutable {
         }
         String jars = this.getParam(JARS);
 
-        String hadoopConf = "/etc/hadoop/conf";
-        if (StringUtils.isNotEmpty(config.getHadoopConfDir())) {
-            hadoopConf = config.getHadoopConfDir();
-        } else {
-            String hiveConf = ClassLoader.getSystemClassLoader().getResource("hive-site.xml").getFile().toString();
-            File hiveConfFile = new File(hiveConf);
-            if (hiveConfFile.exists() == true) {
-                logger.info("Locate hive-site.xml in " + hiveConfFile);
-                hadoopConf = hiveConfFile.getParent();
-            }
+        //hadoop conf dir
+        String hadoopConf = null;
+        hadoopConf = System.getProperty("kylin.hadoop.conf.dir");
+
+        if (StringUtils.isEmpty(hadoopConf)) {
+            throw new RuntimeException("kylin_hadoop_conf_dir is empty, check if there's error in the output of 'kylin.sh start'");
+        }
+
+        File hiveConfFile = new File(hadoopConf, "hive-site.xml");
+        if (!hiveConfFile.exists()) {
+            throw new RuntimeException("Cannot find hive-site.xml in kylin_hadoop_conf_dir: " + hadoopConf + //
+                    ". In order to enable spark cubing, you must set kylin.env.hadoop-conf-dir to a dir which contains at least core-site.xml, hdfs-site.xml, hive-site.xml, mapred-site.xml, yarn-site.xml");
         }
         logger.info("Using " + hadoopConf + " as HADOOP_CONF_DIR");
 
+        //hbase-site.xml
         String hbaseConf = ClassLoader.getSystemClassLoader().getResource("hbase-site.xml").getFile().toString();
         logger.info("Get hbase-site.xml location from classpath: " + hbaseConf);
         File hbaseConfFile = new File(hbaseConf);
@@ -114,8 +117,7 @@ public class SparkExecutable extends AbstractExecutable {
 
         stringBuilder.append("--files %s --jars %s %s %s");
         try {
-            String cmd = String.format(stringBuilder.toString(),
-                    hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs());
+            String cmd = String.format(stringBuilder.toString(), hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs());
             logger.info("cmd:" + cmd);
             final StringBuilder output = new StringBuilder();
             CliCommandExecutor exec = new CliCommandExecutor();