You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2017/01/03 06:42:50 UTC
[14/15] kylin git commit: KYLIN-2344 Package spark into Kylin binary
package
KYLIN-2344 Package spark into Kylin binary package
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/b261d551
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/b261d551
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/b261d551
Branch: refs/heads/sparkcubing-rebase
Commit: b261d55138b33263be898d2dcc526d284e2c2d41
Parents: 5a8bc05
Author: shaofengshi <sh...@apache.org>
Authored: Tue Jan 3 10:28:38 2017 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Tue Jan 3 14:40:24 2017 +0800
----------------------------------------------------------------------
build/bin/check-env.sh | 6 ++
build/bin/find-spark-dependency.sh | 45 +++++++++++++++
build/bin/kylin.sh | 6 ++
build/conf/kylin-spark-conf.properties | 2 +-
build/conf/kylin.properties | 3 -
build/script/compress.sh | 5 +-
build/script/download-spark.sh | 52 +++++++++++++++++
build/script/functions.sh | 60 ++++++++++++++++++++
build/script/package.sh | 1 +
.../org/apache/kylin/common/KylinConfig.java | 14 +++++
.../apache/kylin/common/KylinConfigBase.java | 27 ++++-----
.../spark/SparkBatchCubingJobBuilder2.java | 4 +-
12 files changed, 200 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/bin/check-env.sh
----------------------------------------------------------------------
diff --git a/build/bin/check-env.sh b/build/bin/check-env.sh
index 9cd8a64..8904557 100644
--- a/build/bin/check-env.sh
+++ b/build/bin/check-env.sh
@@ -47,3 +47,9 @@ if [ $? != 0 ]
then
quit "Failed to create $WORKING_DIR. Please make sure the user has right to access $WORKING_DIR"
fi
+
+hadoop fs -mkdir -p $WORKING_DIR/spark-history
+if [ $? != 0 ]
+then
+ quit "Failed to create $WORKING_DIR/spark-history. Please make sure the user has right to access $WORKING_DIR"
+fi
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/bin/find-spark-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh
new file mode 100644
index 0000000..6f74d8a
--- /dev/null
+++ b/build/bin/find-spark-dependency.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+
+spark_home=
+
+verbose Retrieving Spark dependency...
+if [ -n "$SPARK_HOME" ]
+then
+ verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies."
+ spark_home=$SPARK_HOME
+fi
+
+if [ -z "$SPARK_HOME" ]
+then
+ verbose "SPARK_HOME wasn't set, use $KYLIN_HOME/spark"
+ spark_home=$KYLIN_HOME/spark
+fi
+
+spark_dependency=`find -L $spark_home -name 'spark-assembly-[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'`
+if [ -z "$spark_dependency" ]
+then
+ quit "spark assembly lib not found"
+else
+ verbose "spark dependency: $spark_dependency"
+ export spark_dependency
+fi
+
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index fca1e5c..824fb8b 100644
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -45,6 +45,11 @@ function retrieveDependency() {
source ${dir}/find-kafka-dependency.sh
export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency}
fi
+
+ source ${dir}/find-spark-dependency.sh
+ export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency}
+
+ verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}"
}
# start command
@@ -112,6 +117,7 @@ then
-Dkylin.hive.dependency=${hive_dependency} \
-Dkylin.hbase.dependency=${hbase_dependency} \
-Dkylin.kafka.dependency=${kafka_dependency} \
+ -Dkylin.spark.dependency=${spark_dependency} \
-Dspring.profiles.active=${spring_profile} \
org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/conf/kylin-spark-conf.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin-spark-conf.properties b/build/conf/kylin-spark-conf.properties
index 81567bb..5e6dafe 100644
--- a/build/conf/kylin-spark-conf.properties
+++ b/build/conf/kylin-spark-conf.properties
@@ -20,7 +20,7 @@ spark.executor.cores=4
spark.executor.instances=8
spark.history.kerberos.keytab=none
spark.history.kerberos.principal=none
-#spark.yarn.jar=hdfs://sandbox.hortonworks.com:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar
+#spark.yarn.jar=hdfs://namenode:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar
spark.driver.extraJavaOptions=-Dhdp.version=current
spark.yarn.am.extraJavaOptions=-Dhdp.version=current
spark.executor.extraJavaOptions=-Dhdp.version=current
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index 98b66cb..bd0bbd4 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -133,9 +133,6 @@ kylin.engine.mr.mapper-input-rows=1000000
# Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit
kylin.engine.spark.env.hadoop-conf-dir=/etc/hadoop/conf
-# Spark install home, default be $KYLIN_HOME/spark/
-#kylin.engine.spark.spark-home=
-
# Spark job submission properties file, default be $KYLIN_HOME/conf/kylin-spark-conf.properties
#kylin.engine.spark.properties-file=
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/script/compress.sh
----------------------------------------------------------------------
diff --git a/build/script/compress.sh b/build/script/compress.sh
index 4e3592e..39e429c 100755
--- a/build/script/compress.sh
+++ b/build/script/compress.sh
@@ -34,11 +34,12 @@ package_name=apache-kylin-${version}-bin
cd build/
rm -rf ${package_name}
mkdir ${package_name}
-cp -r lib tool bin conf tomcat ../examples/sample_cube commit_SHA1 ${package_name}
-rm -rf lib tomcat commit_SHA1
+cp -r lib tool bin conf tomcat spark ../examples/sample_cube commit_SHA1 ${package_name}
+rm -rf lib tomcat spark commit_SHA1
find ${package_name} -type d -exec chmod 755 {} \;
find ${package_name} -type f -exec chmod 644 {} \;
find ${package_name} -type f -name "*.sh" -exec chmod 755 {} \;
+find ${package_name}/spark/bin/ -type f -exec chmod +x {} \;
mkdir -p ../dist
tar -cvzf ../dist/${package_name}.tar.gz ${package_name}
rm -rf ${package_name}
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/script/download-spark.sh
----------------------------------------------------------------------
diff --git a/build/script/download-spark.sh b/build/script/download-spark.sh
new file mode 100755
index 0000000..dcbcbe7
--- /dev/null
+++ b/build/script/download-spark.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+dir=$(dirname ${0})
+cd ${dir}/../..
+
+source build/script/functions.sh
+
+rm -rf build/spark
+
+spark_version="1.6.3"
+spark_pkg_md5="ce8a2e7529aac0f0175194061769dbd4"
+
+if [ ! -f "build/spark-${spark_version}-bin-hadoop2.6.tgz" ]
+then
+ echo "no binary file found"
+ wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed"
+else
+ if [ `calMd5 build/spark-${spark_version}-bin-hadoop2.6.tgz | awk '{print $1}'` != "${spark_pkg_md5}" ]
+ then
+ echo "md5 check failed"
+ rm build/spark-${spark_version}-bin-hadoop2.6.tgz
+ wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed"
+
+ fi
+fi
+
+tar -zxvf build/spark-${spark_version}-bin-hadoop2.6.tgz -C build/ || { exit 1; }
+mv build/spark-${spark_version}-bin-hadoop2.6 build/spark
+
+# Remove unused components in Spark
+rm -rf build/spark/lib/spark-examples-*
+rm -rf build/spark/examples
+rm -rf build/spark/data
+rm -rf build/spark/python
+rm -rf build/spark/R
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/script/functions.sh
----------------------------------------------------------------------
diff --git a/build/script/functions.sh b/build/script/functions.sh
new file mode 100755
index 0000000..2eed617
--- /dev/null
+++ b/build/script/functions.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function checkCommandExits() {
+ echo "Checking ${1}..."
+ if [ -z "$(command -v ${1})" ]
+ then
+ echo "Please install ${1} first so that Kylin packaging can proceed"
+ exit 1
+ else
+ echo "${1} check passed"
+ fi
+}
+
+function exportProjectVersions() {
+ if [ -z "${kylin_versoin}" ]; then
+ export kylin_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version -f kylin | grep -Ev '(^\[|Download\w+:)'`
+ echo "Apache Kylin Version: ${kylin_version}"
+ fi
+ if [ -z "${release_version}" ]; then
+ export release_version=$kap_version
+ fi
+}
+
+function detectOSType() {
+ OS_TYPE="linux"
+ if [[ `uname -a` =~ "Darwin" ]]; then
+ OS_TYPE="mac"
+ elif [[ `uname -a` =~ "Cygwin" ]]; then
+ OS_TYPE="windows"
+ fi
+ echo $OS_TYPE
+}
+
+function calMd5() {
+ OS_TYPE=`detectOSType`
+ if [[ "$OS_TYPE" == "mac" ]]; then
+ md5 -q $1
+ elif [[ "$OS_TYPE" == "windows" ]]; then
+ md5sum $1
+ else
+ md5sum $1
+ fi
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/build/script/package.sh
----------------------------------------------------------------------
diff --git a/build/script/package.sh b/build/script/package.sh
index 1f9fbbd..c850ec3 100755
--- a/build/script/package.sh
+++ b/build/script/package.sh
@@ -76,6 +76,7 @@ git rev-parse HEAD >> build/commit_SHA1
sh build/script/build.sh || { exit 1; }
sh build/script/download-tomcat.sh || { exit 1; }
+sh build/script/download-spark.sh || { exit 1; }
sh build/script/prepare.sh || { exit 1; }
sh build/script/compress.sh || { exit 1; }
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
index f169142..4eac92a 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -187,6 +187,20 @@ public class KylinConfig extends KylinConfigBase {
return kylinConfig;
}
+ public static String getKylinConfPath() {
+ String kylinConfHome = System.getProperty(KYLIN_CONF);
+ if (!StringUtils.isEmpty(kylinConfHome)) {
+ logger.info("Use KYLIN_CONF=" + kylinConfHome);
+ return kylinConfHome;
+ }
+
+ String kylinHome = getKylinHome();
+ if (StringUtils.isEmpty(kylinHome))
+ throw new KylinConfigCannotInitException("Didn't find KYLIN_CONF or KYLIN_HOME, please set one of them");
+
+ return kylinHome + File.separator + "conf";
+ }
+
static File getKylinPropertiesFile() {
String kylinConfHome = System.getProperty(KYLIN_CONF);
if (!StringUtils.isEmpty(kylinConfHome)) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index b0b18ce..1ec0aab 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -61,6 +61,16 @@ abstract public class KylinConfigBase implements Serializable {
return kylinHome;
}
+ public static String getSparkHome() {
+ String sparkHome = System.getenv("SPARK_HOME");
+ if (StringUtils.isNotEmpty(sparkHome)) {
+ logger.info("SPARK_HOME was set to " + sparkHome);
+ return sparkHome;
+ }
+
+ return getKylinHome() + File.separator + "spark";
+ }
+
// backward compatibility check happens when properties is loaded or updated
static BackwardCompatibilityConfig BCC = new BackwardCompatibilityConfig();
@@ -717,23 +727,6 @@ abstract public class KylinConfigBase implements Serializable {
// ENGINE.SPARK
// ============================================================================
- public String getSparkHome() {
- String sparkHome = getOptional("kylin.engine.spark.spark-home", "spark");
- File f = new File(sparkHome);
- if (f.exists()) {
- return f.getAbsolutePath();
- } else {
- String home = getKylinHome();
- f = new File(home, sparkHome);
- if (f.exists()) {
- return f.getAbsolutePath();
- }
- }
-
- throw new IllegalArgumentException("Spark home '" + sparkHome + "' does not exist, check 'kylin.engine.spark.spark-home' in kylin.properties");
-
- }
-
public String getSparkHadoopConfDir() {
return getRequired("kylin.engine.spark.env.hadoop-conf-dir");
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/b261d551/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
index 9532d31..c5d47e7 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
@@ -19,6 +19,7 @@
package org.apache.kylin.engine.spark;
import org.apache.hadoop.util.ClassUtil;
+import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.EngineFactory;
@@ -52,7 +53,7 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 {
sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), flatTableDesc.getTableName());
- sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), "/Users/shishaofeng/workspace/kylin-15/examples/test_case_data/sandbox/"); //FIXME
+ sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), KylinConfig.getKylinConfPath());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
StringBuilder jars = new StringBuilder();
@@ -65,7 +66,6 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 {
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
- // sparkExecutable.setJars("/Users/shishaofeng/.m2/repository/org/cloudera/htrace/htrace-core/2.01/htrace-core-2.01.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-protocol/0.98.8-hadoop2/hbase-protocol-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-common/0.98.8-hadoop2/hbase-common-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2//repository/org/apache/hbase/hbase-client/0.98.8-hadoop2/hbase-client-0.98.8-hadoop2.jar");
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE + " with Spark");
return sparkExecutable;