You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by pb...@apache.org on 2017/03/22 11:23:48 UTC

[43/50] [abbrv] oozie git commit: OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter)

OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter)


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c8748d22
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c8748d22
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c8748d22

Branch: refs/heads/oya
Commit: c8748d221f8265c16e37383e8b3f1572bb613083
Parents: e62ffc3
Author: Robert Kanter <rk...@apache.org>
Authored: Thu Mar 9 16:16:24 2017 -0800
Committer: Robert Kanter <rk...@apache.org>
Committed: Thu Mar 9 16:16:24 2017 -0800

----------------------------------------------------------------------
 examples/src/main/apps/pyspark/job.properties | 26 ++++++++++++++
 examples/src/main/apps/pyspark/lib/pi.py      | 41 ++++++++++++++++++++++
 examples/src/main/apps/pyspark/workflow.xml   | 41 ++++++++++++++++++++++
 release-log.txt                               |  1 +
 4 files changed, 109 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/job.properties
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/job.properties b/examples/src/main/apps/pyspark/job.properties
new file mode 100644
index 0000000..72e09b9
--- /dev/null
+++ b/examples/src/main/apps/pyspark/job.properties
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+oozie.use.system.libpath=true
+oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/pyspark
+master=yarn-client
+

http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/lib/pi.py
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/lib/pi.py b/examples/src/main/apps/pyspark/lib/pi.py
new file mode 100644
index 0000000..a74dc93
--- /dev/null
+++ b/examples/src/main/apps/pyspark/lib/pi.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+from random import random
+from operator import add
+
+from pyspark import SparkContext
+
+
+if __name__ == "__main__":
+    """
+        Usage: pi [partitions]
+    """
+    sc = SparkContext(appName="Python-Spark-Pi")
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    n = 100000 * partitions
+
+    def f(_):
+        x = random() * 2 - 1
+        y = random() * 2 - 1
+        return 1 if x ** 2 + y ** 2 < 1 else 0
+
+    count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
+    print("Pi is roughly %f" % (4.0 * count / n))
+
+    sc.stop()
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/workflow.xml
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/workflow.xml b/examples/src/main/apps/pyspark/workflow.xml
new file mode 100644
index 0000000..4768a6c
--- /dev/null
+++ b/examples/src/main/apps/pyspark/workflow.xml
@@ -0,0 +1,41 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<workflow-app xmlns='uri:oozie:workflow:0.5' name='SparkPythonPi'>
+
+    <start to='spark-node' />
+
+    <action name='spark-node'>
+        <spark xmlns="uri:oozie:spark-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <master>${master}</master>
+            <name>Python-Spark-Pi</name>
+            <jar>pi.py</jar>
+        </spark>
+        <ok to="end" />
+        <error to="fail" />
+    </action>
+
+    <kill name="fail">
+        <message>Workflow failed, error message [${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <end name='end' />
+
+</workflow-app>

http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 73cd8fa..5c55cad 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 4.4.0 release (trunk - unreleased)
 
+OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter)
 OOZIE-807 Docs can be explicit about multiple sub-workflow definitions being possible (qwertymaniac via rkanter)
 OOZIE-2811 Add support for filtering out properties from SparkConfigurationService (gezapeti via rkanter)
 OOZIE-2802 Spark action failure on Spark 2.1.0 due to duplicate sharelibs (gezapeti via rkanter)