You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2017/01/09 09:14:57 UTC

[6/8] kylin git commit: KYLIN-2344 Package spark into Kylin binary package

KYLIN-2344 Package spark into Kylin binary package


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e864cd3b
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e864cd3b
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e864cd3b

Branch: refs/heads/master
Commit: e864cd3b1c06700b1a1054d1f520eaabedc25d82
Parents: b7d87bb
Author: shaofengshi <sh...@apache.org>
Authored: Tue Jan 3 10:28:38 2017 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jan 9 16:58:11 2017 +0800

----------------------------------------------------------------------
 build/bin/check-env.sh                          |  6 ++
 build/bin/find-spark-dependency.sh              | 45 +++++++++++++++
 build/bin/kylin.sh                              |  6 ++
 build/conf/kylin-spark-conf.properties          |  2 +-
 build/conf/kylin.properties                     |  3 -
 build/script/compress.sh                        |  5 +-
 build/script/download-spark.sh                  | 52 +++++++++++++++++
 build/script/functions.sh                       | 60 ++++++++++++++++++++
 build/script/package.sh                         |  1 +
 .../org/apache/kylin/common/KylinConfig.java    | 14 +++++
 .../apache/kylin/common/KylinConfigBase.java    | 27 ++++-----
 .../spark/SparkBatchCubingJobBuilder2.java      |  4 +-
 12 files changed, 200 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/check-env.sh
----------------------------------------------------------------------
diff --git a/build/bin/check-env.sh b/build/bin/check-env.sh
index a4003c9..e446d66 100644
--- a/build/bin/check-env.sh
+++ b/build/bin/check-env.sh
@@ -47,3 +47,9 @@ if [ $? != 0 ]
 then
     quit "Failed to create $WORKING_DIR. Please make sure the user has right to access $WORKING_DIR"
 fi
+
+hadoop fs -mkdir -p $WORKING_DIR/spark-history
+if [ $? != 0 ]
+then
+    quit "Failed to create $WORKING_DIR/spark-history. Please make sure the user has right to access $WORKING_DIR"
+fi
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/find-spark-dependency.sh
----------------------------------------------------------------------
diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh
new file mode 100644
index 0000000..6f74d8a
--- /dev/null
+++ b/build/bin/find-spark-dependency.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+
+spark_home=
+
+verbose Retrieving Spark dependency...
+if [ -n "$SPARK_HOME" ]
+then
+    verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies."
+    spark_home=$SPARK_HOME
+fi
+
+if [ -z "$SPARK_HOME" ]
+then
+    verbose "SPARK_HOME wasn't set, use $KYLIN_HOME/spark"
+    spark_home=$KYLIN_HOME/spark
+fi
+
+spark_dependency=`find -L $spark_home -name 'spark-assembly-[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'`
+if [ -z "$spark_dependency" ]
+then
+    quit "spark assembly lib not found"
+else
+    verbose "spark dependency: $spark_dependency"
+    export spark_dependency
+fi
+

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index 0cdbbc6..7813b79 100644
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -45,6 +45,11 @@ function retrieveDependency() {
         source ${dir}/find-kafka-dependency.sh
         export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency}
     fi
+
+    source ${dir}/find-spark-dependency.sh
+    export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency}
+
+    verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}"
 }
 
 # start command
@@ -112,6 +117,7 @@ then
     -Dkylin.hive.dependency=${hive_dependency} \
     -Dkylin.hbase.dependency=${hbase_dependency} \
     -Dkylin.kafka.dependency=${kafka_dependency} \
+    -Dkylin.spark.dependency=${spark_dependency} \
     -Dspring.profiles.active=${spring_profile} \
     org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar  org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
     

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/conf/kylin-spark-conf.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin-spark-conf.properties b/build/conf/kylin-spark-conf.properties
index 81567bb..5e6dafe 100644
--- a/build/conf/kylin-spark-conf.properties
+++ b/build/conf/kylin-spark-conf.properties
@@ -20,7 +20,7 @@ spark.executor.cores=4
 spark.executor.instances=8
 spark.history.kerberos.keytab=none
 spark.history.kerberos.principal=none
-#spark.yarn.jar=hdfs://sandbox.hortonworks.com:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar
+#spark.yarn.jar=hdfs://namenode:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar
 spark.driver.extraJavaOptions=-Dhdp.version=current
 spark.yarn.am.extraJavaOptions=-Dhdp.version=current
 spark.executor.extraJavaOptions=-Dhdp.version=current

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index 98b66cb..bd0bbd4 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -133,9 +133,6 @@ kylin.engine.mr.mapper-input-rows=1000000
 # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit
 kylin.engine.spark.env.hadoop-conf-dir=/etc/hadoop/conf
 
-# Spark install home, default be $KYLIN_HOME/spark/
-#kylin.engine.spark.spark-home=
-
 # Spark job submission properties file, default be $KYLIN_HOME/conf/kylin-spark-conf.properties
 #kylin.engine.spark.properties-file=
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/compress.sh
----------------------------------------------------------------------
diff --git a/build/script/compress.sh b/build/script/compress.sh
index 4e3592e..39e429c 100755
--- a/build/script/compress.sh
+++ b/build/script/compress.sh
@@ -34,11 +34,12 @@ package_name=apache-kylin-${version}-bin
 cd build/
 rm -rf ${package_name}
 mkdir ${package_name}
-cp -r lib tool bin conf tomcat ../examples/sample_cube commit_SHA1 ${package_name}
-rm -rf lib tomcat commit_SHA1
+cp -r lib tool bin conf tomcat spark ../examples/sample_cube commit_SHA1 ${package_name}
+rm -rf lib tomcat spark commit_SHA1
 find ${package_name} -type d -exec chmod 755 {} \;
 find ${package_name} -type f -exec chmod 644 {} \;
 find ${package_name} -type f -name "*.sh" -exec chmod 755 {} \;
+find ${package_name}/spark/bin/ -type f -exec chmod +x {} \;
 mkdir -p ../dist
 tar -cvzf ../dist/${package_name}.tar.gz ${package_name}
 rm -rf ${package_name}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/download-spark.sh
----------------------------------------------------------------------
diff --git a/build/script/download-spark.sh b/build/script/download-spark.sh
new file mode 100755
index 0000000..dcbcbe7
--- /dev/null
+++ b/build/script/download-spark.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+dir=$(dirname ${0})
+cd ${dir}/../..
+
+source build/script/functions.sh
+
+rm -rf build/spark
+
+spark_version="1.6.3"
+spark_pkg_md5="ce8a2e7529aac0f0175194061769dbd4"
+
+if [ ! -f "build/spark-${spark_version}-bin-hadoop2.6.tgz" ]
+then
+    echo "no binary file found"
+    wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed"
+else
+    if [ `calMd5 build/spark-${spark_version}-bin-hadoop2.6.tgz | awk '{print $1}'` != "${spark_pkg_md5}" ]
+    then
+        echo "md5 check failed"
+        rm build/spark-${spark_version}-bin-hadoop2.6.tgz
+        wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed"
+
+    fi
+fi
+
+tar -zxvf build/spark-${spark_version}-bin-hadoop2.6.tgz -C build/   || { exit 1; }
+mv build/spark-${spark_version}-bin-hadoop2.6 build/spark
+
+# Remove unused components in Spark
+rm -rf build/spark/lib/spark-examples-*
+rm -rf build/spark/examples
+rm -rf build/spark/data
+rm -rf build/spark/python
+rm -rf build/spark/R

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/functions.sh
----------------------------------------------------------------------
diff --git a/build/script/functions.sh b/build/script/functions.sh
new file mode 100755
index 0000000..2eed617
--- /dev/null
+++ b/build/script/functions.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function checkCommandExits() {
+    echo "Checking ${1}..."
+    if [ -z "$(command -v ${1})" ]
+    then
+        echo "Please install ${1} first so that Kylin packaging can proceed"
+        exit 1
+    else
+        echo "${1} check passed"
+    fi
+}
+
+function exportProjectVersions() {
+    if [ -z "${kylin_versoin}" ]; then
+        export kylin_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version -f kylin | grep -Ev '(^\[|Download\w+:)'`
+        echo "Apache Kylin Version: ${kylin_version}"
+    fi
+    if [ -z "${release_version}" ]; then
+        export release_version=$kap_version
+    fi
+}
+
+function detectOSType() {
+    OS_TYPE="linux"
+    if [[ `uname -a` =~ "Darwin" ]]; then
+        OS_TYPE="mac"
+    elif [[ `uname -a` =~ "Cygwin" ]]; then
+        OS_TYPE="windows"
+    fi
+    echo $OS_TYPE
+}
+
+function calMd5() {
+    OS_TYPE=`detectOSType`
+    if [[ "$OS_TYPE" == "mac" ]]; then
+        md5 -q $1
+    elif [[ "$OS_TYPE" == "windows" ]]; then
+        md5sum $1
+    else
+        md5sum $1
+    fi
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/package.sh
----------------------------------------------------------------------
diff --git a/build/script/package.sh b/build/script/package.sh
index 1f9fbbd..c850ec3 100755
--- a/build/script/package.sh
+++ b/build/script/package.sh
@@ -76,6 +76,7 @@ git rev-parse HEAD >> build/commit_SHA1
 
 sh build/script/build.sh || { exit 1; }
 sh build/script/download-tomcat.sh || { exit 1; }
+sh build/script/download-spark.sh || { exit 1; }
 sh build/script/prepare.sh || { exit 1; }
 sh build/script/compress.sh || { exit 1; }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
index f169142..4eac92a 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -187,6 +187,20 @@ public class KylinConfig extends KylinConfigBase {
         return kylinConfig;
     }
 
+    public static String getKylinConfPath() {
+        String kylinConfHome = System.getProperty(KYLIN_CONF);
+        if (!StringUtils.isEmpty(kylinConfHome)) {
+            logger.info("Use KYLIN_CONF=" + kylinConfHome);
+            return kylinConfHome;
+        }
+
+        String kylinHome = getKylinHome();
+        if (StringUtils.isEmpty(kylinHome))
+            throw new KylinConfigCannotInitException("Didn't find KYLIN_CONF or KYLIN_HOME, please set one of them");
+
+        return kylinHome + File.separator + "conf";
+    }
+
     static File getKylinPropertiesFile() {
         String kylinConfHome = System.getProperty(KYLIN_CONF);
         if (!StringUtils.isEmpty(kylinConfHome)) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 77b1e1c..7d6ac2b 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -61,6 +61,16 @@ abstract public class KylinConfigBase implements Serializable {
         return kylinHome;
     }
 
+    public static String getSparkHome() {
+        String sparkHome = System.getenv("SPARK_HOME");
+        if (StringUtils.isNotEmpty(sparkHome)) {
+            logger.info("SPARK_HOME was set to " + sparkHome);
+            return sparkHome;
+        }
+
+        return getKylinHome() + File.separator + "spark";
+    }
+
     // backward compatibility check happens when properties is loaded or updated
     static BackwardCompatibilityConfig BCC = new BackwardCompatibilityConfig();
 
@@ -729,23 +739,6 @@ abstract public class KylinConfigBase implements Serializable {
     // ENGINE.SPARK
     // ============================================================================
 
-    public String getSparkHome() {
-        String sparkHome = getOptional("kylin.engine.spark.spark-home", "spark");
-        File f = new File(sparkHome);
-        if (f.exists()) {
-            return f.getAbsolutePath();
-        } else {
-            String home = getKylinHome();
-            f = new File(home, sparkHome);
-            if (f.exists()) {
-                return f.getAbsolutePath();
-            }
-        }
-
-        throw new IllegalArgumentException("Spark home '" + sparkHome + "' does not exist, check 'kylin.engine.spark.spark-home' in kylin.properties");
-
-    }
-
     public String getSparkHadoopConfDir() {
         return getRequired("kylin.engine.spark.env.hadoop-conf-dir");
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
index 9532d31..c5d47e7 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java
@@ -19,6 +19,7 @@
 package org.apache.kylin.engine.spark;
 
 import org.apache.hadoop.util.ClassUtil;
+import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.StringUtil;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.engine.EngineFactory;
@@ -52,7 +53,7 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 {
         sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
         sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
         sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), flatTableDesc.getTableName());
-        sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), "/Users/shishaofeng/workspace/kylin-15/examples/test_case_data/sandbox/"); //FIXME
+        sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), KylinConfig.getKylinConfPath());
         sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
 
         StringBuilder jars = new StringBuilder();
@@ -65,7 +66,6 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 {
 
         StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
         sparkExecutable.setJars(jars.toString());
-        //        sparkExecutable.setJars("/Users/shishaofeng/.m2/repository/org/cloudera/htrace/htrace-core/2.01/htrace-core-2.01.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-protocol/0.98.8-hadoop2/hbase-protocol-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-common/0.98.8-hadoop2/hbase-common-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2//repository/org/apache/hbase/hbase-client/0.98.8-hadoop2/hbase-client-0.98.8-hadoop2.jar");
 
         sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE + " with Spark");
         return sparkExecutable;