You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by gw...@apache.org on 2017/02/17 22:56:49 UTC

incubator-systemml git commit: [SYSTEMML-1246] Use correct jar name in sparkDML.sh of -bin artifact

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 21b96855b -> cf92e8417


[SYSTEMML-1246] Use correct jar name in sparkDML.sh of -bin artifact

Added resource filtering to set jar file name used in sparkDML.sh for -bin
artifact only.

Closes #399.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/cf92e841
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/cf92e841
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/cf92e841

Branch: refs/heads/master
Commit: cf92e8417382812943d88ec3a55b3e4a9b9cc533
Parents: 21b9685
Author: Glenn Weidner <gw...@us.ibm.com>
Authored: Fri Feb 17 14:52:18 2017 -0800
Committer: Glenn Weidner <gw...@us.ibm.com>
Committed: Fri Feb 17 14:52:18 2017 -0800

----------------------------------------------------------------------
 pom.xml                                |  21 +++++
 scripts/sparkDML.sh                    |   2 +-
 src/assembly/bin.xml                   |   9 +-
 src/main/resources/scripts/sparkDML.sh | 123 ++++++++++++++++++++++++++++
 4 files changed, 153 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2f4abdc..30dbe28 100644
--- a/pom.xml
+++ b/pom.xml
@@ -282,6 +282,27 @@
 							<outputDirectory>${basedir}/target/lib/hadoop/bin</outputDirectory>
 						</configuration>
 					</execution>
+					
+					<execution>
+						<id>copy-resources-filtered</id>
+						<phase>compile</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<overwrite>true</overwrite>
+							<outputDirectory>${basedir}/target/scripts</outputDirectory>
+							<resources>
+								<resource>
+									<directory>${basedir}/src/main/resources/scripts</directory>
+									<includes>
+										<include>sparkDML.sh</include>
+									</includes>
+									<filtering>true</filtering>
+								</resource>
+							</resources>
+						</configuration>
+					</execution>
 				</executions>
 			</plugin>
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh
index a404fee..5548859 100755
--- a/scripts/sparkDML.sh
+++ b/scripts/sparkDML.sh
@@ -26,7 +26,7 @@
 # Environment
 
 # Following variables must be rewritten by your installation paths.
-DEFAULT_SPARK_HOME=/usr/local/spark-1.4.0/spark-1.4.0-SNAPSHOT-bin-hadoop2.4
+DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6
 DEFAULT_SYSTEMML_HOME=.
 
 if [ -z ${SPARK_HOME} ]; then

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml
index c1d5f8a..fb1fdf6 100644
--- a/src/assembly/bin.xml
+++ b/src/assembly/bin.xml
@@ -41,10 +41,17 @@
 				<exclude>perftest</exclude>
 				<exclude>staging/**/*</exclude>
 				<exclude>staging</exclude>
-				<!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh -->
+				<exclude>sparkDML.sh</exclude>
 			</excludes>
 			<outputDirectory>scripts</outputDirectory>
 		</fileSet>
+		<fileSet>
+			<directory>${basedir}/target/scripts</directory>
+			<includes>
+				<include>sparkDML.sh</include>
+			</includes>
+			<outputDirectory>scripts</outputDirectory>
+		</fileSet>
 
 		<fileSet>
 			<directory>${basedir}/src/main/standalone</directory>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/main/resources/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/src/main/resources/scripts/sparkDML.sh b/src/main/resources/scripts/sparkDML.sh
new file mode 100644
index 0000000..cd57ae0
--- /dev/null
+++ b/src/main/resources/scripts/sparkDML.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#set -x
+
+
+# Environment
+
+# Following variables must be rewritten by your installation paths.
+DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6
+DEFAULT_SYSTEMML_HOME=.
+
+if [ -z ${SPARK_HOME} ]; then
+  SPARK_HOME=${DEFAULT_SPARK_HOME}
+fi
+
+if [ -z ${SYSTEMML_HOME} ]; then
+  SYSTEMML_HOME=${DEFAULT_SYSTEMML_HOME}
+fi
+
+# Default Values
+
+master="--master yarn-client"
+driver_memory="--driver-memory 20G"
+num_executors="--num-executors 5"
+executor_memory="--executor-memory 60G"
+executor_cores="--executor-cores 24"
+conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128"
+
+
+# error help print
+
+printUsageExit()
+{
+cat <<EOF
+
+Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS]
+
+   Examples:
+      $0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50
+      $0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ...
+      $0 --master yarn-cluster -f hdfs:/user/GNMF.dml
+
+   -h | -?  Print this usage message and exit
+
+   SPARK-SUBMIT OPTIONS:
+   --conf <property>=<value>   Configuration settings:                  
+                                 spark.driver.maxResultSize            Default: 0
+                                 spark.akka.frameSize                  Default: 128
+   --driver-memory <num>       Memory for driver (e.g. 512M)]          Default: 20G
+   --master <string>           local | yarn-client | yarn-cluster]     Default: yarn-client
+   --num-executors <num>       Number of executors to launch (e.g. 2)  Default: 5
+   --executor-memory <num>     Memory per executor (e.g. 1G)           Default: 60G
+   --executor-cores <num>      Memory per executor (e.g. )             Default: 24
+
+   -f                          DML script file name, e.g. hdfs:/user/biadmin/test.dml
+
+   SYSTEMML OPTIONS:
+   --stats                     Monitor and report caching/recompilation statistics
+   --explain                   Explain plan (runtime)
+   --explain2 <string>         Explain plan (hops, runtime, recompile_hops, recompile_runtime)
+   --nvargs <varName>=<value>  List of attributeName-attributeValue pairs
+   --args <string>             List of positional argument values
+EOF
+  exit 1
+}
+
+# command line parameter processing
+
+while true ; do
+  case "$1" in
+    -h)                printUsageExit ; exit 1 ;;
+    --master)          master="--master "$2 ; shift 2 ;;
+    --driver-memory)   driver_memory="--driver-memory "$2 ; shift 2 ;;
+    --num-executors)   num_executors="--num-executors "$2 ; shift 2 ;;
+    --executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;;
+    --executor-cores)  executor_cores="--executor-cores "$2 ; shift 2 ;;
+    --conf)            conf=${conf}' --conf '$2 ; shift 2 ;;
+    -f)                if [ -z "$2" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else f=$2 ; shift 2 ; fi ;;
+    --stats)           stats="-stats" ; shift 1 ;;
+    --explain)         explain="-explain" ; shift 1 ;;
+    --explain2)        explain="-explain "$2 ; shift 2 ;;  
+    --nvargs)          shift 1 ; nvargs="-nvargs "$@ ; break ;;
+    --args)            shift 1 ; args="-args "$@ ; break ;; 
+    *)                 if [ -z "$f" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else break ; fi ;;
+  esac
+done
+
+# SystemML Spark invocation
+
+$SPARK_HOME/bin/spark-submit \
+     ${master} \
+     ${driver_memory} \
+     ${num_executors} \
+     ${executor_memory} \
+     ${executor_cores} \
+     ${conf} \
+     ${SYSTEMML_HOME}/${project.artifactId}-${project.version}.jar \
+         -f ${f} \
+         -config=${SYSTEMML_HOME}/SystemML-config.xml \
+         -exec hybrid_spark \
+         $explain \
+         $stats \
+         $nvargs $args