You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mrql.apache.org by fe...@apache.org on 2014/02/09 17:00:08 UTC

git commit: MRQL-30: Improved run scripts and support for Spark on YARN

Updated Branches:
  refs/heads/master b03385829 -> 25fa7d378


MRQL-30: Improved run scripts and support for Spark on YARN


Project: http://git-wip-us.apache.org/repos/asf/incubator-mrql/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mrql/commit/25fa7d37
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mrql/tree/25fa7d37
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mrql/diff/25fa7d37

Branch: refs/heads/master
Commit: 25fa7d3784f7badc559b6c6ec5e29eb480a447e7
Parents: b033858
Author: fegaras <fe...@cse.uta.edu>
Authored: Sun Feb 9 09:59:16 2014 -0600
Committer: fegaras <fe...@cse.uta.edu>
Committed: Sun Feb 9 09:59:16 2014 -0600

----------------------------------------------------------------------
 Spark/pom.xml                          |  2 +-
 bin/mrql                               |  6 +++---
 bin/mrql.bsp                           |  6 +++---
 bin/mrql.spark                         | 28 +++++++++++++++++++++-------
 conf/mrql-env.sh                       | 20 +++++++++++---------
 pom.xml                                |  1 +
 src/main/java/spark/SparkEvaluator.gen |  7 +++++++
 7 files changed, 47 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/Spark/pom.xml
----------------------------------------------------------------------
diff --git a/Spark/pom.xml b/Spark/pom.xml
index 8a8efa4..a8dcb27 100644
--- a/Spark/pom.xml
+++ b/Spark/pom.xml
@@ -45,7 +45,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.10</artifactId>
+      <artifactId>spark-core_${scala.version}</artifactId>
       <version>${spark.version}</version>
     </dependency>
   </dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/bin/mrql
----------------------------------------------------------------------
diff --git a/bin/mrql b/bin/mrql
index 11b8b89..319eb41 100755
--- a/bin/mrql
+++ b/bin/mrql
@@ -30,8 +30,8 @@ MRQL_HOME="$(cd `dirname $0`/..; pwd -P)"
 
 GEN_JAR=`ls "$MRQL_HOME"/lib/mrql-gen-*.jar`
 CORE_JAR=`ls "$MRQL_HOME"/lib/mrql-core-*.jar`
-MR_JAR=`ls "$MRQL_HOME"/lib/mrql-mr-*.jar`
-FULL_JAR="$MRQL_HOME/lib/mrql-mr-all.jar"
+MRQL_JAR=`ls "$MRQL_HOME"/lib/mrql-mr-*.jar`
+FULL_JAR="$MRQL_HOME/lib/mrql-all-mr.jar"
 
 export JAVA_HOME MAPRED_JOB_TRACKER FS_DEFAULT_NAME
 
@@ -43,7 +43,7 @@ if (! [ -a $FULL_JAR ]); then
    $JAVA_HOME/bin/jar xf $JLINE_JAR
    $JAVA_HOME/bin/jar xf $GEN_JAR
    $JAVA_HOME/bin/jar xf $CORE_JAR
-   $JAVA_HOME/bin/jar xf $MR_JAR
+   $JAVA_HOME/bin/jar xf $MRQL_JAR
    cd ..
    $JAVA_HOME/bin/jar cf $FULL_JAR -C classes/ .
    popd > /dev/null

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/bin/mrql.bsp
----------------------------------------------------------------------
diff --git a/bin/mrql.bsp b/bin/mrql.bsp
index 651dbf5..2f4c8a9 100755
--- a/bin/mrql.bsp
+++ b/bin/mrql.bsp
@@ -30,8 +30,8 @@ MRQL_HOME="$(cd `dirname $0`/..; pwd -P)"
 
 GEN_JAR=`ls "$MRQL_HOME"/lib/mrql-gen-*.jar`
 CORE_JAR=`ls "$MRQL_HOME"/lib/mrql-core-*.jar`
-BSP_JAR=`ls "$MRQL_HOME"/lib/mrql-bsp-*.jar`
-FULL_JAR="$MRQL_HOME/lib/mrql-bsp-all.jar"
+MRQL_JAR=`ls "$MRQL_HOME"/lib/mrql-bsp-*.jar`
+FULL_JAR="$MRQL_HOME/lib/mrql-all-bsp.jar"
 
 export JAVA_HOME FS_DEFAULT_NAME BSP_MASTER_ADDRESS HAMA_ZOOKEEPER_QUORUM
 
@@ -43,7 +43,7 @@ if (! [ -a $FULL_JAR ]); then
    $JAVA_HOME/bin/jar xf $JLINE_JAR
    $JAVA_HOME/bin/jar xf $GEN_JAR
    $JAVA_HOME/bin/jar xf $CORE_JAR
-   $JAVA_HOME/bin/jar xf $BSP_JAR
+   $JAVA_HOME/bin/jar xf $MRQL_JAR
    cd ..
    $JAVA_HOME/bin/jar cf $FULL_JAR -C classes/ .
    popd > /dev/null

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/bin/mrql.spark
----------------------------------------------------------------------
diff --git a/bin/mrql.spark b/bin/mrql.spark
index 529fe2f..a57fee6 100755
--- a/bin/mrql.spark
+++ b/bin/mrql.spark
@@ -30,12 +30,26 @@ MRQL_HOME="$(cd `dirname $0`/..; pwd -P)"
 
 GEN_JAR=`ls "$MRQL_HOME"/lib/mrql-gen-*.jar`
 CORE_JAR=`ls "$MRQL_HOME"/lib/mrql-core-*.jar`
-SPARK_JAR=`ls "$MRQL_HOME"/lib/mrql-spark-*.jar`
-
-
-export SPARK_HOME FS_DEFAULT_NAME SPARK_MASTER SPARK_MEM
-
-SPARK_CLASSPATH="$CUP_JAR:$JLINE_JAR:$GEN_JAR:$CORE_JAR:$SPARK_JAR:$SPARK_JARS:$HADOOP_JARS"
-
+MRQL_JAR=`ls "$MRQL_HOME"/lib/mrql-spark-*.jar`
+FULL_JAR="$MRQL_HOME/lib/mrql-all-spark.jar"
+
+export SPARK_HOME SPARK_JAR FS_DEFAULT_NAME SPARK_MASTER SPARK_MEM
+export SPARK_YARN_APP_JAR=$FULL_JAR
+
+if (! [ -a $FULL_JAR ]); then
+   rm -rf "$MRQL_HOME/tmp/classes"
+   mkdir -p "$MRQL_HOME/tmp/classes"
+   pushd $MRQL_HOME/tmp/classes > /dev/null
+   $JAVA_HOME/bin/jar xf $CUP_JAR
+   $JAVA_HOME/bin/jar xf $JLINE_JAR
+   $JAVA_HOME/bin/jar xf $GEN_JAR
+   $JAVA_HOME/bin/jar xf $CORE_JAR
+   $JAVA_HOME/bin/jar xf $MRQL_JAR
+   cd ..
+   $JAVA_HOME/bin/jar cf $FULL_JAR -C classes/ .
+   popd > /dev/null
+fi
+
+SPARK_CLASSPATH="$CUP_JAR:$JLINE_JAR:$GEN_JAR:$CORE_JAR:$MRQL_JAR:$SPARK_JAR:$HADOOP_JARS"
 
 $JAVA_HOME/bin/java -classpath $SPARK_CLASSPATH org.apache.mrql.Main -spark $*

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/conf/mrql-env.sh
----------------------------------------------------------------------
diff --git a/conf/mrql-env.sh b/conf/mrql-env.sh
index 9b4dd97..d189cbd 100644
--- a/conf/mrql-env.sh
+++ b/conf/mrql-env.sh
@@ -48,18 +48,18 @@ CUP_JAR=${HOME}/.m2/repository/net/sf/squirrel-sql/thirdparty/non-maven/java-cup
 JLINE_JAR=${HOME}/.m2/repository/jline/jline/1.0/jline-1.0.jar
 
 
-# Required: Hadoop configuration. Supports versions 0.20.x, 1.x, 2.x
+# Required: Hadoop configuration. Supports versions 0.20.x, 1.x, and 2.x (YARN)
 HADOOP_VERSION=1.2.1
 # The Hadoop installation directory
 HADOOP_HOME=${HOME}/hadoop-${HADOOP_VERSION}
-# The Hadoop job trackeer (as defined in hdfs-site.xml)
+# The Hadoop job tracker (as defined in hdfs-site.xml)
 MAPRED_JOB_TRACKER=localhost:9001
 # The HDFS namenode URI (as defined in hdfs-site.xml)
 FS_DEFAULT_NAME=hdfs://localhost:9000/
 
 
-# Optional: Hama configuration
-HAMA_VERSION=0.6.2
+# Optional: Hama configuration. Supports versions 0.5.0, 0.6.0, 0.6.2, and 0.6.3
+HAMA_VERSION=0.6.3
 # The Hadoop installation directory
 HAMA_HOME=${HOME}/hama-${HAMA_VERSION}
 # The Hama configuration as defined in hama-site.xml
@@ -67,9 +67,9 @@ BSP_MASTER_ADDRESS=localhost:40000
 HAMA_ZOOKEEPER_QUORUM=localhost
 
 
-# Optional: Spark configuration. Supports 0.8.1 only
-SPARK_HOME=${HOME}/spark-0.8.1-incubating-bin-hadoop1
-# URI of the Spark master node
+# Optional: Spark configuration. Supports versions 0.8.1 and 0.9.0
+SPARK_HOME=${HOME}/spark-0.9.0-incubating-bin-hadoop1
+# URI of the Spark master node (to run Spark on a YARN cluster, set it to "yarn-client")
 SPARK_MASTER=spark://crete:7077
 # Spark memory per node
 SPARK_MEM="1g"
@@ -79,10 +79,12 @@ SPARK_MEM="1g"
 
 HAMA_JAR=${HAMA_HOME}/hama-core-${HAMA_VERSION}.jar
 
-SPARK_JARS=${SPARK_HOME}/assembly/target/scala-2.9.3/*
+SPARK_JAR=`ls ${SPARK_HOME}/assembly/target/scala-*/*`
 
-if [[ ${HADOOP_VERSION} =~ "^2.*$" ]]; then
+if [[ -f ${HADOOP_HOME}/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar ]]; then
+   # hadoop 2.x (YARN)
    HADOOP_JARS=${HADOOP_HOME}/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-client-core-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-annotations-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/log4j-1.2.17.jar:${HADOOP_HOME}/share/hadoop/common/lib/commons-cli-1.2.jar
 else
+   # hadoop 1.x or 0.20.x
    HADOOP_JARS=${HADOOP_HOME}/hadoop-core-${HADOOP_VERSION}.jar:${HADOOP_HOME}/lib/commons-logging-1.1.1.jar:${HADOOP_HOME}/lib/log4j-1.2.15.jar:${HADOOP_HOME}/lib/commons-cli-1.2.jar
 fi

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index f477dbf..dd680aa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
     <yarn.version>2.2.0</yarn.version>
     <hama.version>0.6.3</hama.version>
     <spark.version>0.9.0-incubating</spark.version>
+    <scala.version>2.10</scala.version>
   </properties>
 
   <modules>

http://git-wip-us.apache.org/repos/asf/incubator-mrql/blob/25fa7d37/src/main/java/spark/SparkEvaluator.gen
----------------------------------------------------------------------
diff --git a/src/main/java/spark/SparkEvaluator.gen b/src/main/java/spark/SparkEvaluator.gen
index beb1e2a..611eca2 100644
--- a/src/main/java/spark/SparkEvaluator.gen
+++ b/src/main/java/spark/SparkEvaluator.gen
@@ -25,6 +25,8 @@ import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.io.*;
+import java.util.Enumeration;
+import org.apache.log4j.*;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
@@ -74,6 +76,11 @@ final public class SparkEvaluator extends Evaluator implements Serializable {
                                                      env);
                 Plan.conf = spark_context.hadoopConfiguration();
                 FileSystem.setDefaultUri(Plan.conf,System.getenv("FS_DEFAULT_NAME"));
+            };
+            if (!Config.info) {
+                for ( Enumeration en = LogManager.getCurrentLoggers(); en.hasMoreElements(); )
+                    ((Logger)en.nextElement()).setLevel(Level.WARN);
+                LogManager.getRootLogger().setLevel(Level.WARN);
             }
         } catch (IOException ex) {
             throw new Error(ex);