You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by rk...@apache.org on 2015/02/20 23:48:21 UTC

oozie git commit: OOZIE-2071 Add a Spark example (pavan kumar via rkanter)

Repository: oozie
Updated Branches:
  refs/heads/master ebb3ec6d0 -> 5228eb8fe


OOZIE-2071 Add a Spark example (pavan kumar via rkanter)


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/5228eb8f
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/5228eb8f
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/5228eb8f

Branch: refs/heads/master
Commit: 5228eb8fef99146a066b9b609944f26a1b5a5828
Parents: ebb3ec6
Author: Robert Kanter <rk...@cloudera.com>
Authored: Fri Feb 20 14:43:29 2015 -0800
Committer: Robert Kanter <rk...@cloudera.com>
Committed: Fri Feb 20 14:43:29 2015 -0800

----------------------------------------------------------------------
 .../action/hadoop/SparkActionExecutor.java      |  6 ++-
 examples/pom.xml                                | 51 +++++++++++++++-----
 examples/src/main/apps/spark/job.properties     | 24 +++++++++
 examples/src/main/apps/spark/workflow.xml       | 42 ++++++++++++++++
 .../org/apache/oozie/example/SparkFileCopy.java | 41 ++++++++++++++++
 release-log.txt                                 |  1 +
 sharelib/spark/pom.xml                          | 51 +++-----------------
 .../action/hadoop/TestSparkActionExecutor.java  | 10 ++--
 .../oozie/action/hadoop/TestSparkMain.java      | 11 ++---
 src/main/assemblies/examples.xml                |  7 ++-
 10 files changed, 173 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/core/src/main/java/org/apache/oozie/action/hadoop/SparkActionExecutor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/SparkActionExecutor.java b/core/src/main/java/org/apache/oozie/action/hadoop/SparkActionExecutor.java
index 65f8d5a..732d5f0 100644
--- a/core/src/main/java/org/apache/oozie/action/hadoop/SparkActionExecutor.java
+++ b/core/src/main/java/org/apache/oozie/action/hadoop/SparkActionExecutor.java
@@ -32,7 +32,8 @@ import java.util.List;
 
 public class SparkActionExecutor extends JavaActionExecutor {
     public static final String SPARK_MAIN_CLASS_NAME = "org.apache.oozie.action.hadoop.SparkMain";
-    public static final String TASK_USER_PRECEDENCE = "mapreduce.task.classpath.user.precedence";
+    public static final String TASK_USER_PRECEDENCE = "mapreduce.task.classpath.user.precedence"; // hadoop-2
+    public static final String TASK_USER_CLASSPATH_PRECEDENCE = "mapreduce.user.classpath.first";  // hadoop-1
     public static final String SPARK_MASTER = "oozie.spark.master";
     public static final String SPARK_MODE = "oozie.spark.mode";
     public static final String SPARK_OPTS = "oozie.spark.spark-opts";
@@ -85,6 +86,9 @@ public class SparkActionExecutor extends JavaActionExecutor {
         if (launcherJobConf.get("oozie.launcher." + TASK_USER_PRECEDENCE) == null) {
             launcherJobConf.set(TASK_USER_PRECEDENCE, "true");
         }
+        if (launcherJobConf.get("oozie.launcher." + TASK_USER_CLASSPATH_PRECEDENCE) == null) {
+            launcherJobConf.set(TASK_USER_CLASSPATH_PRECEDENCE, "true");
+        }
         return launcherJobConf;
     }
 

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index c5af129..442a520 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -109,21 +109,50 @@
             <artifactId>hsqldb</artifactId>
             <scope>compile</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_2.10</artifactId>
+            <version>${spark.version}</version>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.mesos</groupId>
+                    <artifactId>mesos</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.guava</groupId>
+                    <artifactId>guava</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jul-to-slf4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jcl-over-slf4j</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
     </dependencies>
     <build>
         <plugins>
             <plugin>
-                 <groupId>org.apache.rat</groupId>
-                 <artifactId>apache-rat-plugin</artifactId>
-                 <configuration>
-                     <excludeSubProjects>false</excludeSubProjects>
-                     <excludes>
-                         <!-- excluding all as the root POM does the full check-->
-                         <exclude>**</exclude>
-                     </excludes>
-                 </configuration>
-             </plugin>
-             <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+                <configuration>
+                    <excludeSubProjects>false</excludeSubProjects>
+                    <excludes>
+                        <!-- excluding all as the root POM does the full check-->
+                        <exclude>**</exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <configuration>

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/examples/src/main/apps/spark/job.properties
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/spark/job.properties b/examples/src/main/apps/spark/job.properties
new file mode 100644
index 0000000..4bf67df
--- /dev/null
+++ b/examples/src/main/apps/spark/job.properties
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+oozie.use.system.libpath=true
+oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/spark
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/examples/src/main/apps/spark/workflow.xml
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/spark/workflow.xml b/examples/src/main/apps/spark/workflow.xml
new file mode 100644
index 0000000..f5ac7f6
--- /dev/null
+++ b/examples/src/main/apps/spark/workflow.xml
@@ -0,0 +1,42 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<workflow-app xmlns='uri:oozie:workflow:0.5' name='SparkFileCopy'>
+    <start to='spark-node' />
+
+    <action name='spark-node'>
+        <spark xmlns="uri:oozie:spark-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <master>local[*]</master>
+            <name>Spark-FileCopy</name>
+            <class>org.apache.oozie.example.SparkFileCopy</class>
+            <jar>${nameNode}/user/${wf:user()}/${examplesRoot}/apps/spark/lib/oozie-examples.jar</jar>
+            <arg>${nameNode}/user/${wf:user()}/${examplesRoot}/input-data/text/data.txt</arg>
+            <arg>${nameNode}/user/${wf:user()}/${examplesRoot}/output</arg>
+        </spark>
+        <ok to="end" />
+        <error to="fail" />
+    </action>
+
+    <kill name="fail">
+        <message>Workflow failed, error
+            message[${wf:errorMessage(wf:lastErrorNode())}]
+        </message>
+    </kill>
+    <end name='end' />
+</workflow-app>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/examples/src/main/java/org/apache/oozie/example/SparkFileCopy.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/oozie/example/SparkFileCopy.java b/examples/src/main/java/org/apache/oozie/example/SparkFileCopy.java
new file mode 100644
index 0000000..d17534d
--- /dev/null
+++ b/examples/src/main/java/org/apache/oozie/example/SparkFileCopy.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oozie.example;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public final class SparkFileCopy {
+
+    public static void main(String[] args) throws Exception {
+
+        if (args.length < 2) {
+            System.err.println("Usage: SparkFileCopy <file> <file>");
+            System.exit(1);
+        }
+
+        SparkConf sparkConf = new SparkConf().setAppName("SparkFileCopy");
+        JavaSparkContext ctx = new JavaSparkContext(sparkConf);
+        JavaRDD<String> lines = ctx.textFile(args[0]);
+        lines.saveAsTextFile(args[1]);
+        System.out.println("Copied file from " + args[0] + " to " + args[1]);
+        ctx.stop();
+    }
+}

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index d82e578..be11f2c 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 4.2.0 release (trunk - unreleased)
 
+OOZIE-2071 Add a Spark example (pavan kumar via rkanter)
 OOZIE-2145 ZooKeeper paths should start with a "/" (rkanter)
 OOZIE-2113 Oozie Command Line Utilities are failing as hadoop-auth jar not found (shwethags)
 OOZIE-1688 New configuration to specify server-server authentication type (puru)

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/sharelib/spark/pom.xml
----------------------------------------------------------------------
diff --git a/sharelib/spark/pom.xml b/sharelib/spark/pom.xml
index db46b8c..c532532 100644
--- a/sharelib/spark/pom.xml
+++ b/sharelib/spark/pom.xml
@@ -61,10 +61,6 @@
                     <artifactId>mesos</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.spark-project.protobuf</groupId>
-                    <artifactId>protobuf-java</artifactId>
-                </exclusion>
-                <exclusion>
                     <groupId>org.apache.hadoop</groupId>
                     <artifactId>hadoop-client</artifactId>
                 </exclusion>
@@ -92,40 +88,6 @@
 
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-examples_2.10</artifactId>
-            <version>${spark.version}</version>
-            <scope>test</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-client</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.spark-project.protobuf</groupId>
-                    <artifactId>protobuf-java</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-mllib_2.10</artifactId>
-            <version>${spark.version}</version>
-            <scope>compile</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-client</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.spark-project.protobuf</groupId>
-                    <artifactId>protobuf-java</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.spark</groupId>
             <artifactId>spark-graphx_2.10</artifactId>
             <version>${spark.version}</version>
             <scope>compile</scope>
@@ -133,13 +95,6 @@
 
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-repl_2.10</artifactId>
-            <version>${spark.version}</version>
-            <scope>compile</scope>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.spark</groupId>
             <artifactId>spark-streaming_2.10</artifactId>
             <version>${spark.version}</version>
             <scope>compile</scope>
@@ -167,6 +122,12 @@
             <artifactId>junit</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.oozie</groupId>
+            <artifactId>oozie-examples</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkActionExecutor.java
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkActionExecutor.java b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkActionExecutor.java
index b83be47..a15e76b 100644
--- a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkActionExecutor.java
+++ b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkActionExecutor.java
@@ -65,14 +65,11 @@ public class TestSparkActionExecutor extends ActionExecutorTestCase {
                 "<name-node>{1}</name-node>" +
                 "<master>local[*]</master>" +
                 "<mode>client</mode>" +
-                "<name>SparkRegression</name>" +
-                "<class>org.apache.spark.examples.mllib.JavaALS</class>" +
+                "<name>SparkFileCopy</name>" +
+                "<class>org.apache.oozie.example.SparkFileCopy</class>" +
                 "<jar>" + getAppPath() +"/lib/test.jar</jar>" +
                 "<arg>" + getAppPath() + "/" + SPARK_FILENAME + "</arg>" +
-                "<arg>1</arg>" +
-                "<arg>2</arg>" +
                 "<arg>" + getAppPath() + "/" + OUTPUT + "</arg>" +
-                "<arg>2</arg>" +
                 "</spark>";
         return MessageFormat.format(script, getJobTrackerUri(), getNameNodeUri());
     }
@@ -99,8 +96,7 @@ public class TestSparkActionExecutor extends ActionExecutorTestCase {
         SparkActionExecutor ae = new SparkActionExecutor();
         ae.check(context, context.getAction());
         assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
-        assertTrue(fs.exists(new Path(getAppPath() + "/" + OUTPUT + "/userFeatures")));
-        assertTrue(fs.exists(new Path(getAppPath() + "/" + OUTPUT + "/productFeatures")));
+        assertTrue(fs.exists(new Path(getAppPath() + "/" + OUTPUT)));
         ae.end(context, context.getAction());
         assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
 

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java
index 1d41c6d..7707622 100644
--- a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java
+++ b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java
@@ -56,10 +56,10 @@ public class TestSparkMain extends MainTestCase {
 
         jobConf.set(SparkActionExecutor.SPARK_MASTER, "local[*]");
         jobConf.set(SparkActionExecutor.SPARK_MODE, "client");
-        jobConf.set(SparkActionExecutor.SPARK_CLASS, "org.apache.spark.examples.mllib.JavaALS");
-        jobConf.set(SparkActionExecutor.SPARK_JOB_NAME, "Spark ALS");
+        jobConf.set(SparkActionExecutor.SPARK_CLASS, "org.apache.oozie.example.SparkFileCopy");
+        jobConf.set(SparkActionExecutor.SPARK_JOB_NAME, "Spark Copy File");
         jobConf.set(SparkActionExecutor.SPARK_OPTS, "--driver-memory 1024M");
-        jobConf.set(SparkActionExecutor.SPARK_JAR, getFsTestCaseDir()+"/lib/test.jar");
+        jobConf.set(SparkActionExecutor.SPARK_JAR, getFsTestCaseDir() + "/lib/test.jar");
 
 
         File actionXml = new File(getTestCaseDir(), "action.xml");
@@ -76,10 +76,9 @@ public class TestSparkMain extends MainTestCase {
 
         String input  = getFsTestCaseDir() + "/" + INPUT;
         String output = getFsTestCaseDir() + "/" + OUTPUT;
-        String[] args = {input, "1", "2", output, "2"};
+        String[] args = {input, output};
         SparkMain.main(args);
-        assertTrue(getFileSystem().exists(new Path(getFsTestCaseDir() + "/" + OUTPUT + "/userFeatures")));
-        assertTrue(getFileSystem().exists(new Path(getFsTestCaseDir() + "/" + OUTPUT + "/productFeatures")));
+        assertTrue(getFileSystem().exists(new Path(getFsTestCaseDir() + "/" + OUTPUT)));
         return null;
     }
 }

http://git-wip-us.apache.org/repos/asf/oozie/blob/5228eb8f/src/main/assemblies/examples.xml
----------------------------------------------------------------------
diff --git a/src/main/assemblies/examples.xml b/src/main/assemblies/examples.xml
index 819545c..d70c280 100644
--- a/src/main/assemblies/examples.xml
+++ b/src/main/assemblies/examples.xml
@@ -114,7 +114,12 @@
         <file>
             <source>${basedir}/target/${artifact.artifactId}-${artifact.version}.jar</source>
             <outputDirectory>/examples/apps/map-reduce/lib</outputDirectory>
-        </file> 
+        </file>
+        <file>
+            <source>${basedir}/target/${artifact.artifactId}-${artifact.version}.jar</source>
+            <outputDirectory>/examples/apps/spark/lib</outputDirectory>
+            <destName>${artifact.artifactId}.jar</destName>
+        </file>
     </files>
 
 </assembly>