You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2015/05/12 10:39:25 UTC

spark git commit: [SPARK-7485] [BUILD] Remove pyspark files from assembly.

Repository: spark
Updated Branches:
  refs/heads/master 984787526 -> 82e890fb1


[SPARK-7485] [BUILD] Remove pyspark files from assembly.

The sbt part of the build is hacky; it basically tricks sbt
into generating the zip by using a generator, but returns
an empty list for the generated files so that nothing is
actually added to the assembly.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #6022 from vanzin/SPARK-7485 and squashes the following commits:

22c1e04 [Marcelo Vanzin] Remove unneeded code.
4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82e890fb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82e890fb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82e890fb

Branch: refs/heads/master
Commit: 82e890fb19d6fbaffa69856eecb4699f2f8a81eb
Parents: 9847875
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue May 12 01:39:21 2015 -0700
Committer: Andrew Or <an...@databricks.com>
Committed: Tue May 12 01:39:21 2015 -0700

----------------------------------------------------------------------
 core/pom.xml             | 47 -------------------------------------------
 mllib/pom.xml            | 11 ----------
 project/SparkBuild.scala | 44 +++-------------------------------------
 sql/core/pom.xml         |  8 --------
 streaming/pom.xml        |  8 --------
 5 files changed, 3 insertions(+), 115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index fc42f48..262a332 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <!-- Unzip py4j so we can include its files in the jar -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>generate-resources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <target>
-            <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
-          </target>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-clean-plugin</artifactId>
-        <configuration>
-          <filesets>
-            <fileset>
-              <directory>${basedir}/../python/build</directory>
-            </fileset>
-          </filesets>
-          <verbose>true</verbose>
-        </configuration>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
@@ -438,24 +409,6 @@
         </executions>
       </plugin>
     </plugins>
-
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/*.py</include>
-        </includes>
-      </resource>
-      <resource>
-        <directory>../python/build</directory>
-        <includes>
-          <include>py4j/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae..0c07ca1 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/mllib/*.py</include>
-          <include>pyspark/mllib/stat/*.py</include>
-          <include>pyspark/ml/*.py</include>
-          <include>pyspark/ml/param/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>

http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345a..1b87e4e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))
 
-  /* Package pyspark artifacts in the main assembly. */
+  /* Package pyspark artifacts in a separate zip file for YARN. */
   enable(PySparkAssembly.settings)(assembly)
 
   /* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
   import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
-    unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
     // Use a resource generator to copy all .py files from python/pyspark into a managed directory
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
       val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
       val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
       zipFile.delete()
       zipRecursive(src, zipFile)
-
-      val dst = new File(outDir, "pyspark")
-      if (!dst.isDirectory()) {
-        require(dst.mkdirs())
-      }
-      copy(src, dst)
+      Seq[File]()
     }
   )
 
@@ -416,42 +409,11 @@ object PySparkAssembly {
           output.write(buf, 0, n)
         }
       }
+      output.closeEntry()
       in.close()
     }
   }
 
-  private def copy(src: File, dst: File): Seq[File] = {
-    src.listFiles().flatMap { f =>
-      val child = new File(dst, f.getName())
-      if (f.isDirectory()) {
-        child.mkdir()
-        copy(f, child)
-      } else if (f.getName().endsWith(".py")) {
-        var in: Option[FileInputStream] = None
-        var out: Option[FileOutputStream] = None
-        try {
-          in = Some(new FileInputStream(f))
-          out = Some(new FileOutputStream(child))
-
-          val bytes = new Array[Byte](1024)
-          var read = 0
-          while (read >= 0) {
-            read = in.get.read(bytes)
-            if (read > 0) {
-              out.get.write(bytes, 0, read)
-            }
-          }
-
-          Some(child)
-        } finally {
-          in.foreach(_.close())
-          out.foreach(_.close())
-        }
-      } else {
-        None
-      }
-    }
-  }
 }
 
 object Unidoc {

http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/sql/core/pom.xml
----------------------------------------------------------------------
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a7..ffe95bb 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../../python</directory>
-        <includes>
-          <include>pyspark/sql/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>

http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4..5ab7f44 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
         </configuration>
       </plugin>
     </plugins>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/streaming/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org