You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by gw...@apache.org on 2017/02/17 22:56:49 UTC
incubator-systemml git commit: [SYSTEMML-1246] Use correct jar name
in sparkDML.sh of -bin artifact
Repository: incubator-systemml
Updated Branches:
refs/heads/master 21b96855b -> cf92e8417
[SYSTEMML-1246] Use correct jar name in sparkDML.sh of -bin artifact
Added resource filtering to set jar file name used in sparkDML.sh for -bin
artifact only.
Closes #399.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/cf92e841
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/cf92e841
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/cf92e841
Branch: refs/heads/master
Commit: cf92e8417382812943d88ec3a55b3e4a9b9cc533
Parents: 21b9685
Author: Glenn Weidner <gw...@us.ibm.com>
Authored: Fri Feb 17 14:52:18 2017 -0800
Committer: Glenn Weidner <gw...@us.ibm.com>
Committed: Fri Feb 17 14:52:18 2017 -0800
----------------------------------------------------------------------
pom.xml | 21 +++++
scripts/sparkDML.sh | 2 +-
src/assembly/bin.xml | 9 +-
src/main/resources/scripts/sparkDML.sh | 123 ++++++++++++++++++++++++++++
4 files changed, 153 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2f4abdc..30dbe28 100644
--- a/pom.xml
+++ b/pom.xml
@@ -282,6 +282,27 @@
<outputDirectory>${basedir}/target/lib/hadoop/bin</outputDirectory>
</configuration>
</execution>
+
+ <execution>
+ <id>copy-resources-filtered</id>
+ <phase>compile</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <overwrite>true</overwrite>
+ <outputDirectory>${basedir}/target/scripts</outputDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}/src/main/resources/scripts</directory>
+ <includes>
+ <include>sparkDML.sh</include>
+ </includes>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
</executions>
</plugin>
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh
index a404fee..5548859 100755
--- a/scripts/sparkDML.sh
+++ b/scripts/sparkDML.sh
@@ -26,7 +26,7 @@
# Environment
# Following variables must be rewritten by your installation paths.
-DEFAULT_SPARK_HOME=/usr/local/spark-1.4.0/spark-1.4.0-SNAPSHOT-bin-hadoop2.4
+DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6
DEFAULT_SYSTEMML_HOME=.
if [ -z ${SPARK_HOME} ]; then
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml
index c1d5f8a..fb1fdf6 100644
--- a/src/assembly/bin.xml
+++ b/src/assembly/bin.xml
@@ -41,10 +41,17 @@
<exclude>perftest</exclude>
<exclude>staging/**/*</exclude>
<exclude>staging</exclude>
- <!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh -->
+ <exclude>sparkDML.sh</exclude>
</excludes>
<outputDirectory>scripts</outputDirectory>
</fileSet>
+ <fileSet>
+ <directory>${basedir}/target/scripts</directory>
+ <includes>
+ <include>sparkDML.sh</include>
+ </includes>
+ <outputDirectory>scripts</outputDirectory>
+ </fileSet>
<fileSet>
<directory>${basedir}/src/main/standalone</directory>
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/main/resources/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/src/main/resources/scripts/sparkDML.sh b/src/main/resources/scripts/sparkDML.sh
new file mode 100644
index 0000000..cd57ae0
--- /dev/null
+++ b/src/main/resources/scripts/sparkDML.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#set -x
+
+
+# Environment
+
+# Following variables must be rewritten by your installation paths.
+DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6
+DEFAULT_SYSTEMML_HOME=.
+
+if [ -z ${SPARK_HOME} ]; then
+ SPARK_HOME=${DEFAULT_SPARK_HOME}
+fi
+
+if [ -z ${SYSTEMML_HOME} ]; then
+ SYSTEMML_HOME=${DEFAULT_SYSTEMML_HOME}
+fi
+
+# Default Values
+
+master="--master yarn-client"
+driver_memory="--driver-memory 20G"
+num_executors="--num-executors 5"
+executor_memory="--executor-memory 60G"
+executor_cores="--executor-cores 24"
+conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128"
+
+
+# error help print
+
+printUsageExit()
+{
+cat <<EOF
+
+Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS]
+
+ Examples:
+ $0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50
+ $0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ...
+ $0 --master yarn-cluster -f hdfs:/user/GNMF.dml
+
+ -h | -? Print this usage message and exit
+
+ SPARK-SUBMIT OPTIONS:
+ --conf <property>=<value> Configuration settings:
+ spark.driver.maxResultSize Default: 0
+ spark.akka.frameSize Default: 128
+ --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G
+ --master <string> local | yarn-client | yarn-cluster] Default: yarn-client
+ --num-executors <num> Number of executors to launch (e.g. 2) Default: 5
+ --executor-memory <num> Memory per executor (e.g. 1G) Default: 60G
+ --executor-cores <num> Memory per executor (e.g. ) Default: 24
+
+ -f DML script file name, e.g. hdfs:/user/biadmin/test.dml
+
+ SYSTEMML OPTIONS:
+ --stats Monitor and report caching/recompilation statistics
+ --explain Explain plan (runtime)
+ --explain2 <string> Explain plan (hops, runtime, recompile_hops, recompile_runtime)
+ --nvargs <varName>=<value> List of attributeName-attributeValue pairs
+ --args <string> List of positional argument values
+EOF
+ exit 1
+}
+
+# command line parameter processing
+
+while true ; do
+ case "$1" in
+ -h) printUsageExit ; exit 1 ;;
+ --master) master="--master "$2 ; shift 2 ;;
+ --driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;;
+ --num-executors) num_executors="--num-executors "$2 ; shift 2 ;;
+ --executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;;
+ --executor-cores) executor_cores="--executor-cores "$2 ; shift 2 ;;
+ --conf) conf=${conf}' --conf '$2 ; shift 2 ;;
+ -f) if [ -z "$2" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else f=$2 ; shift 2 ; fi ;;
+ --stats) stats="-stats" ; shift 1 ;;
+ --explain) explain="-explain" ; shift 1 ;;
+ --explain2) explain="-explain "$2 ; shift 2 ;;
+ --nvargs) shift 1 ; nvargs="-nvargs "$@ ; break ;;
+ --args) shift 1 ; args="-args "$@ ; break ;;
+ *) if [ -z "$f" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else break ; fi ;;
+ esac
+done
+
+# SystemML Spark invocation
+
+$SPARK_HOME/bin/spark-submit \
+ ${master} \
+ ${driver_memory} \
+ ${num_executors} \
+ ${executor_memory} \
+ ${executor_cores} \
+ ${conf} \
+ ${SYSTEMML_HOME}/${project.artifactId}-${project.version}.jar \
+ -f ${f} \
+ -config=${SYSTEMML_HOME}/SystemML-config.xml \
+ -exec hybrid_spark \
+ $explain \
+ $stats \
+ $nvargs $args