You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/02/24 07:07:15 UTC
[2/2] kylin git commit: KYLIN-2331 refine HADOOP_CONF_DIR search logic
KYLIN-2331 refine HADOOP_CONF_DIR search logic
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/2c438602
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/2c438602
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/2c438602
Branch: refs/heads/master
Commit: 2c43860240b1c80cef1f14d0baaba989a49aa79d
Parents: 32cce58
Author: Hongbin Ma <ma...@apache.org>
Authored: Fri Feb 24 15:05:37 2017 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Fri Feb 24 15:07:08 2017 +0800
----------------------------------------------------------------------
build/bin/find-hadoop-conf-dir.sh | 77 ++++++++++++++++++++
build/bin/find-hbase-dependency.sh | 2 +
build/bin/find-hive-dependency.sh | 3 +-
build/bin/find-kafka-dependency.sh | 12 +--
build/bin/find-spark-dependency.sh | 3 +-
build/bin/kylin.sh | 14 ++--
build/conf/kylin.properties | 2 +-
.../apache/kylin/common/KylinConfigBase.java | 7 +-
.../kylin/engine/spark/SparkExecutable.java | 26 ++++---
9 files changed, 112 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hadoop-conf-dir.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hadoop-conf-dir.sh b/build/bin/find-hadoop-conf-dir.sh
new file mode 100644
index 0000000..5334b8a
--- /dev/null
+++ b/build/bin/find-hadoop-conf-dir.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+
+echo Retrieving hadoop conf dir...
+
+override_hadoop_conf_dir=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.env.hadoop-conf-dir`
+
+if [ -n "$override_hadoop_conf_dir" ]; then
+ echo "$override_hadoop_conf_dir is override as the kylin_hadoop_conf_dir"
+ export kylin_hadoop_conf_dir=${override_hadoop_conf_dir}
+ return
+fi
+
+hbase_classpath=`hbase classpath`
+
+arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`)
+kylin_hadoop_conf_dir=
+
+for data in ${arr[@]}
+do
+ result=`echo $data | grep -v -E ".*jar"`
+ if [ $result ]
+ then
+ valid_conf_dir=true
+
+ if [ ! -f $result/yarn-site.xml ]
+ then
+ verbose "$result is not valid hadoop dir conf because yarn-site.xml is missing"
+ valid_conf_dir=false
+ continue
+ fi
+
+ if [ ! -f $result/mapred-site.xml ]
+ then
+ verbose "$result is not valid hadoop dir conf because mapred-site.xml is missing"
+ valid_conf_dir=false
+ continue
+ fi
+
+ if [ ! -f $result/hdfs-site.xml ]
+ then
+ verbose "$result is not valid hadoop dir conf because hdfs-site.xml is missing"
+ valid_conf_dir=false
+ continue
+ fi
+
+ if [ ! -f $result/core-site.xml ]
+ then
+ verbose "$result is not valid hadoop dir conf because core-site.xml is missing"
+ valid_conf_dir=false
+ continue
+ fi
+
+ verbose "$result is chosen as the kylin_hadoop_conf_dir"
+ export kylin_hadoop_conf_dir=$result
+ return
+ fi
+done
+
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hbase-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hbase-dependency.sh b/build/bin/find-hbase-dependency.sh
index 7dbb53b..14dde3b 100644
--- a/build/bin/find-hbase-dependency.sh
+++ b/build/bin/find-hbase-dependency.sh
@@ -19,6 +19,8 @@
source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+echo Retrieving hbase dependency...
+
hbase_classpath=`hbase classpath`
# special handling for Amazon EMR, to prevent re-init of hbase-setenv
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hive-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh
index 453a35a..aa39da0 100644
--- a/build/bin/find-hive-dependency.sh
+++ b/build/bin/find-hive-dependency.sh
@@ -19,10 +19,11 @@
source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+echo Retrieving hive dependency...
+
client_mode=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.client`
hive_env=
-echo Retrieving hive dependency...
if [ "${client_mode}" == "beeline" ]
then
beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params`
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-kafka-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-kafka-dependency.sh b/build/bin/find-kafka-dependency.sh
index d3219e7..999face 100644
--- a/build/bin/find-kafka-dependency.sh
+++ b/build/bin/find-kafka-dependency.sh
@@ -22,15 +22,17 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
kafka_home=
echo Retrieving kafka dependency...
-if [ -n "$KAFKA_HOME" ]
+
+if [ -z "$KAFKA_HOME" ]
then
- verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies."
- kafka_home=$KAFKA_HOME
+ verbose "Couldn't find kafka home. If you want to enable streaming processing, Please set KAFKA_HOME to the path which contains kafka dependencies."
+ return
fi
-if [ -z "$KAFKA_HOME" ]
+if [ -n "$KAFKA_HOME" ]
then
- quit "Couldn't find kafka home. Please set KAFKA_HOME to the path which contains kafka dependencies."
+ verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies."
+ kafka_home=$KAFKA_HOME
fi
# works for kafka 9+
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-spark-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh
index 6f74d8a..4ea5c3e 100644
--- a/build/bin/find-spark-dependency.sh
+++ b/build/bin/find-spark-dependency.sh
@@ -19,9 +19,10 @@
source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+echo Retrieving Spark dependency...
+
spark_home=
-verbose Retrieving Spark dependency...
if [ -n "$SPARK_HOME" ]
then
verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies."
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index 15e1aed..a87fa78 100644
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -32,6 +32,9 @@ function retrieveDependency() {
#retrive $hive_dependency and $hbase_dependency
source ${dir}/find-hive-dependency.sh
source ${dir}/find-hbase-dependency.sh
+ source ${dir}/find-hadoop-conf-dir.sh
+ source ${dir}/find-kafka-dependency.sh
+ source ${dir}/find-spark-dependency.sh
#retrive $KYLIN_EXTRA_START_OPTS
if [ -f "${dir}/setenv.sh" ]; then
@@ -39,15 +42,7 @@ function retrieveDependency() {
fi
export HBASE_CLASSPATH_PREFIX=${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH_PREFIX}
- export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}
- if [ -n "$KAFKA_HOME" ]
- then
- source ${dir}/find-kafka-dependency.sh
- export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency}
- fi
-
- source ${dir}/find-spark-dependency.sh
- export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency}
+ export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}:${kafka_dependency}:${spark_dependency}
verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}"
}
@@ -114,6 +109,7 @@ then
-Dkylin.hbase.dependency=${hbase_dependency} \
-Dkylin.kafka.dependency=${kafka_dependency} \
-Dkylin.spark.dependency=${spark_dependency} \
+ -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \
-Dspring.profiles.active=${spring_profile} \
org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index 12c4c9c..d7ba0b3 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -204,7 +204,7 @@ kylin.security.saml.context-path=/kylin
### Spark Engine Configs ###
# Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit
-#kylin.engine.spark.env.hadoop-conf-dir=/etc/hive/conf
+#kylin.env.hadoop-conf-dir=/etc/hive/conf
# Estimate the RDD partition numbers
kylin.engine.spark.rdd-partition-cut-mb=10
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 1c26c63..5317a39 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -773,12 +773,9 @@ abstract public class KylinConfigBase implements Serializable {
// ENGINE.SPARK
// ============================================================================
- public String getHadoopConfDir() {
- return getOptional("kylin.engine.spark.env.hadoop-conf-dir", "");
- }
- public void setHadoopConfDir(String hadoopConfDir) {
- setProperty("kylin.engine.spark.env.hadoop-conf-dir", hadoopConfDir);
+ public String getHadoopConfDir() {
+ return getOptional("kylin.env.hadoop-conf-dir", "");
}
public String getSparkAdditionalJars() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
index c671a91..cf7438c 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java
@@ -79,19 +79,22 @@ public class SparkExecutable extends AbstractExecutable {
}
String jars = this.getParam(JARS);
- String hadoopConf = "/etc/hadoop/conf";
- if (StringUtils.isNotEmpty(config.getHadoopConfDir())) {
- hadoopConf = config.getHadoopConfDir();
- } else {
- String hiveConf = ClassLoader.getSystemClassLoader().getResource("hive-site.xml").getFile().toString();
- File hiveConfFile = new File(hiveConf);
- if (hiveConfFile.exists() == true) {
- logger.info("Locate hive-site.xml in " + hiveConfFile);
- hadoopConf = hiveConfFile.getParent();
- }
+ //hadoop conf dir
+ String hadoopConf = null;
+ hadoopConf = System.getProperty("kylin.hadoop.conf.dir");
+
+ if (StringUtils.isEmpty(hadoopConf)) {
+ throw new RuntimeException("kylin_hadoop_conf_dir is empty, check if there's error in the output of 'kylin.sh start'");
+ }
+
+ File hiveConfFile = new File(hadoopConf, "hive-site.xml");
+ if (!hiveConfFile.exists()) {
+ throw new RuntimeException("Cannot find hive-site.xml in kylin_hadoop_conf_dir: " + hadoopConf + //
+ ". In order to enable spark cubing, you must set kylin.env.hadoop-conf-dir to a dir which contains at least core-site.xml, hdfs-site.xml, hive-site.xml, mapred-site.xml, yarn-site.xml");
}
logger.info("Using " + hadoopConf + " as HADOOP_CONF_DIR");
+ //hbase-site.xml
String hbaseConf = ClassLoader.getSystemClassLoader().getResource("hbase-site.xml").getFile().toString();
logger.info("Get hbase-site.xml location from classpath: " + hbaseConf);
File hbaseConfFile = new File(hbaseConf);
@@ -114,8 +117,7 @@ public class SparkExecutable extends AbstractExecutable {
stringBuilder.append("--files %s --jars %s %s %s");
try {
- String cmd = String.format(stringBuilder.toString(),
- hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs());
+ String cmd = String.format(stringBuilder.toString(), hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs());
logger.info("cmd:" + cmd);
final StringBuilder output = new StringBuilder();
CliCommandExecutor exec = new CliCommandExecutor();