You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by an...@apache.org on 2018/08/08 09:18:49 UTC

oozie git commit: OOZIE-3321 PySpark example fails (daniel.becker via andras.piros)

Repository: oozie
Updated Branches:
  refs/heads/master 3d1fe2877 -> ae11fe7a9


OOZIE-3321 PySpark example fails (daniel.becker via andras.piros)


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/ae11fe7a
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/ae11fe7a
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/ae11fe7a

Branch: refs/heads/master
Commit: ae11fe7a9f07bed71faaec3a91232514c67f8b0f
Parents: 3d1fe28
Author: Andras Piros <an...@cloudera.com>
Authored: Wed Aug 8 11:17:41 2018 +0200
Committer: Andras Piros <an...@cloudera.com>
Committed: Wed Aug 8 11:17:41 2018 +0200

----------------------------------------------------------------------
 examples/src/main/apps/pyspark/lib/pi.py        |  41 -------------------
 release-log.txt                                 |   1 +
 sharelib/pom.xml                                |  15 +++++++
 sharelib/spark/src/main/resources/pi.py         |  41 +++++++++++++++++++
 .../spark/src/main/resources/py4j-0.9-src.zip   | Bin 0 -> 44846 bytes
 sharelib/spark/src/main/resources/pyspark.zip   | Bin 0 -> 357051 bytes
 .../apache/oozie/action/hadoop/TestPyspark.java |   6 ---
 sharelib/spark/src/test/resources/pi.py         |  41 -------------------
 .../spark/src/test/resources/py4j-0.9-src.zip   | Bin 44846 -> 0 bytes
 sharelib/spark/src/test/resources/pyspark.zip   | Bin 357051 -> 0 bytes
 src/main/assemblies/examples.xml                |   4 ++
 11 files changed, 61 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/examples/src/main/apps/pyspark/lib/pi.py
----------------------------------------------------------------------
diff --git a/examples/src/main/apps/pyspark/lib/pi.py b/examples/src/main/apps/pyspark/lib/pi.py
deleted file mode 100644
index a74dc93..0000000
--- a/examples/src/main/apps/pyspark/lib/pi.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-from random import random
-from operator import add
-
-from pyspark import SparkContext
-
-
-if __name__ == "__main__":
-    """
-        Usage: pi [partitions]
-    """
-    sc = SparkContext(appName="Python-Spark-Pi")
-    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
-    n = 100000 * partitions
-
-    def f(_):
-        x = random() * 2 - 1
-        y = random() * 2 - 1
-        return 1 if x ** 2 + y ** 2 < 1 else 0
-
-    count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
-    print("Pi is roughly %f" % (4.0 * count / n))
-
-    sc.stop()
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 1979e6d..5ed9f79 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 5.1.0 release (trunk - unreleased)
 
+OOZIE-3321 PySpark example fails (daniel.becker via andras.piros)
 OOZIE-3315 DateList example fails (daniel.becker via andras.piros)
 OOZIE-3313 Hive example action fails (daniel.becker via gezapeti)
 OOZIE-3193 Applications are not killed when submitted via subworkflow (kmarton via gezapeti, andras.piros)

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/pom.xml
----------------------------------------------------------------------
diff --git a/sharelib/pom.xml b/sharelib/pom.xml
index 6a0864d..39cea25 100644
--- a/sharelib/pom.xml
+++ b/sharelib/pom.xml
@@ -61,6 +61,21 @@
                             <goal>resources</goal>
                         </goals>
                     </execution>
+                    <execution>
+                        <id>copy-resources</id>
+                        <phase>generate-test-resources</phase>
+                        <goals>
+                            <goal>copy-resources</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${basedir}/spark/target/test-classes</outputDirectory>
+                            <resources>
+                                <resource>
+                                    <directory>spark/src/main/resources</directory>
+                                </resource>
+                            </resources>
+                        </configuration>
+                    </execution>
                 </executions>
             </plugin>
             <plugin>

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/pi.py
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/main/resources/pi.py b/sharelib/spark/src/main/resources/pi.py
new file mode 100644
index 0000000..e9836b2
--- /dev/null
+++ b/sharelib/spark/src/main/resources/pi.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+from random import random
+from operator import add
+
+from pyspark import SparkContext
+
+
+if __name__ == "__main__":
+    """
+        Usage: pi [partitions]
+    """
+    sc = SparkContext(appName="PythonPi")
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    n = 100000 * partitions
+
+    def f(_):
+        x = random() * 2 - 1
+        y = random() * 2 - 1
+        return 1 if x ** 2 + y ** 2 < 1 else 0
+
+    count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
+    print("Pi is roughly %f" % (4.0 * count / n))
+
+    sc.stop()
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/py4j-0.9-src.zip
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/main/resources/py4j-0.9-src.zip b/sharelib/spark/src/main/resources/py4j-0.9-src.zip
new file mode 100644
index 0000000..dace2d0
Binary files /dev/null and b/sharelib/spark/src/main/resources/py4j-0.9-src.zip differ

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/main/resources/pyspark.zip
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/main/resources/pyspark.zip b/sharelib/spark/src/main/resources/pyspark.zip
new file mode 100644
index 0000000..9ff8bd8
Binary files /dev/null and b/sharelib/spark/src/main/resources/pyspark.zip differ

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java
index 9d8d4aa..f83bbfe 100644
--- a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java
+++ b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestPyspark.java
@@ -24,15 +24,9 @@ import java.util.ArrayList;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobID;
-import org.apache.hadoop.mapred.RunningJob;
 import org.apache.oozie.WorkflowActionBean;
 import org.apache.oozie.WorkflowJobBean;
 import org.apache.oozie.client.WorkflowAction;
-import org.apache.oozie.service.HadoopAccessorService;
-import org.apache.oozie.service.Services;
 import org.apache.oozie.service.WorkflowAppService;
 import org.apache.oozie.util.IOUtils;
 import org.apache.oozie.util.XConfiguration;

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/pi.py
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/resources/pi.py b/sharelib/spark/src/test/resources/pi.py
deleted file mode 100644
index e9836b2..0000000
--- a/sharelib/spark/src/test/resources/pi.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-from random import random
-from operator import add
-
-from pyspark import SparkContext
-
-
-if __name__ == "__main__":
-    """
-        Usage: pi [partitions]
-    """
-    sc = SparkContext(appName="PythonPi")
-    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
-    n = 100000 * partitions
-
-    def f(_):
-        x = random() * 2 - 1
-        y = random() * 2 - 1
-        return 1 if x ** 2 + y ** 2 < 1 else 0
-
-    count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
-    print("Pi is roughly %f" % (4.0 * count / n))
-
-    sc.stop()
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/py4j-0.9-src.zip
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/resources/py4j-0.9-src.zip b/sharelib/spark/src/test/resources/py4j-0.9-src.zip
deleted file mode 100644
index dace2d0..0000000
Binary files a/sharelib/spark/src/test/resources/py4j-0.9-src.zip and /dev/null differ

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/sharelib/spark/src/test/resources/pyspark.zip
----------------------------------------------------------------------
diff --git a/sharelib/spark/src/test/resources/pyspark.zip b/sharelib/spark/src/test/resources/pyspark.zip
deleted file mode 100644
index 9ff8bd8..0000000
Binary files a/sharelib/spark/src/test/resources/pyspark.zip and /dev/null differ

http://git-wip-us.apache.org/repos/asf/oozie/blob/ae11fe7a/src/main/assemblies/examples.xml
----------------------------------------------------------------------
diff --git a/src/main/assemblies/examples.xml b/src/main/assemblies/examples.xml
index ee485dc..c365ccd 100644
--- a/src/main/assemblies/examples.xml
+++ b/src/main/assemblies/examples.xml
@@ -31,6 +31,10 @@
             <directory>${basedir}/src/main/apps</directory>
             <outputDirectory>/examples/apps</outputDirectory>
         </fileSet>
+        <fileSet>
+            <directory>${basedir}/../sharelib/spark/src/main/resources</directory>
+            <outputDirectory>/examples/apps/pyspark/lib</outputDirectory>
+        </fileSet>
     </fileSets>
 
     <files>