You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mrql.apache.org by fe...@apache.org on 2015/05/24 23:23:40 UTC

incubator-mrql git commit: [MRQL-73] Set the max number of tasks in Spark mode

Repository: incubator-mrql
Updated Branches:
  refs/heads/master a89375316 -> 5eb81d992


[MRQL-73] Set the max number of tasks in Spark mode


Project: http://git-wip-us.apache.org/repos/asf/incubator-mrql/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mrql/commit/5eb81d99
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mrql/tree/5eb81d99
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mrql/diff/5eb81d99

Branch: refs/heads/master
Commit: 5eb81d992cec29084d2a97686f95b08cdc727809
Parents: a893753
Author: fegaras <fe...@cse.uta.edu>
Authored: Sun May 24 16:16:16 2015 -0500
Committer: fegaras <fe...@cse.uta.edu>
Committed: Sun May 24 16:16:16 2015 -0500

----------------------------------------------------------------------
 bin/mrql.spark   |  8 +++++++-
 conf/mrql-env.sh | 21 ++++++++++-----------
 2 files changed, 17 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/5eb81d99/bin/mrql.spark
----------------------------------------------------------------------
diff --git a/bin/mrql.spark b/bin/mrql.spark
index 5e966de..5c06943 100755
--- a/bin/mrql.spark
+++ b/bin/mrql.spark
@@ -38,8 +38,14 @@ MRQL_JAR=`ls "$MRQL_HOME"/lib/mrql-spark-*.jar`
 
 export SPARK_HOME FS_DEFAULT_NAME SPARK_MASTER SPARK_WORKER_INSTANCES SPARK_WORKER_CORES SPARK_WORKER_MEMORY SPARK_MASTER_MEMORY
 
-
 if [ "$1" == "-local" ] || [ "$1" == "-dist" ]; then
+   ARGS=($*)
+   for (( i = 0; i < $#; i++ )); do
+        if [ "${ARGS[i]}" = "-nodes" ]; then
+           # calculate the number of Yarn containers from the number of requested nodes
+           export SPARK_WORKER_INSTANCES=$(( ${ARGS[i+1]} / $SPARK_WORKER_CORES ))
+        fi
+   done
    $SPARK_HOME/bin/spark-submit --class org.apache.mrql.Main --jars $CUP_JAR,$JLINE_JAR,$GEN_JAR,$CORE_JAR --driver-class-path $CUP_JAR:$JLINE_JAR:$GEN_JAR:$CORE_JAR:$MRQL_JAR $MRQL_JAR -spark $*
 else
    $JAVA_HOME/bin/java -classpath $CUP_JAR:$JLINE_JAR:$GEN_JAR:$CORE_JAR:$MRQL_JAR:$SPARK_JARS:$HADOOP_JARS org.apache.mrql.Main -spark $*

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/5eb81d99/conf/mrql-env.sh
----------------------------------------------------------------------
diff --git a/conf/mrql-env.sh b/conf/mrql-env.sh
index 9d83e45..4b50858 100644
--- a/conf/mrql-env.sh
+++ b/conf/mrql-env.sh
@@ -70,7 +70,7 @@ HAMA_ZOOKEEPER_QUORUM=localhost
 BSP_SPLIT_INPUT=
 
 
-# Optional: Spark configuration. Supports versions 1.0.0, 1.0.2, 1.1.0, 1.1.1, and 1.2.0
+# Optional: Spark configuration. Supports versions 1.0.0, 1.0.2, 1.1.0, 1.1.1, 1.2.0, 1.3.0, and 1.3.1
 # (Spark versions 0.8.1, 0.9.0, and 0.9.1 are supported by MRQL 0.9.0)
 # You may use the Spark prebuilts bin-hadoop1 or bin-hadoop2 (Yarn)
 # For distributed mode, give write permission to /tmp: hadoop fs -chmod -R 777 /tmp
@@ -80,24 +80,23 @@ SPARK_HOME=${HOME}/spark-1.2.0-bin-hadoop2.3
 #   to run Spark on Standalone Mode, set it to spark://`hostname`:7077
 #   to run Spark on a YARN cluster, set it to "yarn-client"
 SPARK_MASTER=yarn-client
-# For a Yarn cluster set it to the number of workers to start on,
-#   for local/standalone set it to 1
-SPARK_WORKER_INSTANCES=1
-# Number of cores for the workers
-SPARK_WORKER_CORES=2
-# Memory per Worker (e.g. 1000M, 2G)
-SPARK_WORKER_MEMORY=1G
 # Memory for Master (e.g. 1000M, 2G)
 SPARK_MASTER_MEMORY=512M
+# For Spark 1.3.*, use _EXECUTOR_ instead of _WORKER_ (deprecated)
+# The default number of cores. For a Yarn cluster, set it to the number of available containers minus 1.
+#   For local/standalone mode, set it to 2. It can be changed with the MRQL parameter -nodes.
+SPARK_WORKER_INSTANCES=2
+# Number of cores for each worker. For Yarn, it is the number of cores per container.
+SPARK_WORKER_CORES=1
+# Memory per Worker (e.g. 1000M, 2G)
+SPARK_WORKER_MEMORY=1G
 
 
-# Optional: Flink configuration. Supports versions 0.6-incubating, 0.6.1-incubating, 0.7.0-incubating, and 0.8.0
+# Optional: Flink configuration. Supports versions 0.6-incubating, 0.6.1-incubating, 0.7.0-incubating, 0.8.0, and 0.8.1
 # Note: for yarn, set yarn.nodemanager.vmem-check-enabled to false in yarn-site.xml
 FLINK_VERSION=yarn-0.8.0
 # Flink installation directory
 FLINK_HOME=${HOME}/flink-${FLINK_VERSION}
-#   (use this for a Flink snapshot):
-#FLINK_HOME=${HOME}/flink-${FLINK_VERSION}/flink-dist/target/flink-${FLINK_VERSION}-bin/flink-${FLINK_VERSION}
 # Hadoop HDFS: needed for Sequence files in Flink mode
 HDFS_JAR=${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-${HADOOP_VERSION}.jar
 # Flink JobManager (it is derived automatically on a yarn cluster)