You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by pb...@apache.org on 2017/03/22 11:23:48 UTC
[43/50] [abbrv] oozie git commit: OOZIE-2540 Create a PySpark example
(abhishekbafna via rkanter)
OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c8748d22
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c8748d22
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c8748d22
Branch: refs/heads/oya
Commit: c8748d221f8265c16e37383e8b3f1572bb613083
Parents: e62ffc3
Author: Robert Kanter <rk...@apache.org>
Authored: Thu Mar 9 16:16:24 2017 -0800
Committer: Robert Kanter <rk...@apache.org>
Committed: Thu Mar 9 16:16:24 2017 -0800
----------------------------------------------------------------------
examples/src/main/apps/pyspark/job.properties | 26 ++++++++++++++
examples/src/main/apps/pyspark/lib/pi.py | 41 ++++++++++++++++++++++
examples/src/main/apps/pyspark/workflow.xml | 41 ++++++++++++++++++++++
release-log.txt | 1 +
4 files changed, 109 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/job.properties
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/job.properties b/examples/src/main/apps/pyspark/job.properties
new file mode 100644
index 0000000..72e09b9
--- /dev/null
+++ b/examples/src/main/apps/pyspark/job.properties
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+oozie.use.system.libpath=true
+oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/pyspark
+master=yarn-client
+
http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/lib/pi.py
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/lib/pi.py b/examples/src/main/apps/pyspark/lib/pi.py
new file mode 100644
index 0000000..a74dc93
--- /dev/null
+++ b/examples/src/main/apps/pyspark/lib/pi.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+from random import random
+from operator import add
+
+from pyspark import SparkContext
+
+
+if __name__ == "__main__":
+ """
+ Usage: pi [partitions]
+ """
+ sc = SparkContext(appName="Python-Spark-Pi")
+ partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+ n = 100000 * partitions
+
+ def f(_):
+ x = random() * 2 - 1
+ y = random() * 2 - 1
+ return 1 if x ** 2 + y ** 2 < 1 else 0
+
+ count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
+ print("Pi is roughly %f" % (4.0 * count / n))
+
+ sc.stop()
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/examples/src/main/apps/pyspark/workflow.xml
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/workflow.xml b/examples/src/main/apps/pyspark/workflow.xml
new file mode 100644
index 0000000..4768a6c
--- /dev/null
+++ b/examples/src/main/apps/pyspark/workflow.xml
@@ -0,0 +1,41 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<workflow-app xmlns='uri:oozie:workflow:0.5' name='SparkPythonPi'>
+
+ <start to='spark-node' />
+
+ <action name='spark-node'>
+ <spark xmlns="uri:oozie:spark-action:0.1">
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <master>${master}</master>
+ <name>Python-Spark-Pi</name>
+ <jar>pi.py</jar>
+ </spark>
+ <ok to="end" />
+ <error to="fail" />
+ </action>
+
+ <kill name="fail">
+ <message>Workflow failed, error message [${wf:errorMessage(wf:lastErrorNode())}]</message>
+ </kill>
+
+ <end name='end' />
+
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/oozie/blob/c8748d22/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 73cd8fa..5c55cad 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
-- Oozie 4.4.0 release (trunk - unreleased)
+OOZIE-2540 Create a PySpark example (abhishekbafna via rkanter)
OOZIE-807 Docs can be explicit about multiple sub-workflow definitions being possible (qwertymaniac via rkanter)
OOZIE-2811 Add support for filtering out properties from SparkConfigurationService (gezapeti via rkanter)
OOZIE-2802 Spark action failure on Spark 2.1.0 due to duplicate sharelibs (gezapeti via rkanter)