You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2015/05/12 10:39:25 UTC
spark git commit: [SPARK-7485] [BUILD] Remove pyspark files from
assembly.
Repository: spark
Updated Branches:
refs/heads/master 984787526 -> 82e890fb1
[SPARK-7485] [BUILD] Remove pyspark files from assembly.
The sbt part of the build is hacky; it basically tricks sbt
into generating the zip by using a generator, but returns
an empty list for the generated files so that nothing is
actually added to the assembly.
Author: Marcelo Vanzin <va...@cloudera.com>
Closes #6022 from vanzin/SPARK-7485 and squashes the following commits:
22c1e04 [Marcelo Vanzin] Remove unneeded code.
4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82e890fb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82e890fb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82e890fb
Branch: refs/heads/master
Commit: 82e890fb19d6fbaffa69856eecb4699f2f8a81eb
Parents: 9847875
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue May 12 01:39:21 2015 -0700
Committer: Andrew Or <an...@databricks.com>
Committed: Tue May 12 01:39:21 2015 -0700
----------------------------------------------------------------------
core/pom.xml | 47 -------------------------------------------
mllib/pom.xml | 11 ----------
project/SparkBuild.scala | 44 +++-------------------------------------
sql/core/pom.xml | 8 --------
streaming/pom.xml | 8 --------
5 files changed, 3 insertions(+), 115 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index fc42f48..262a332 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
- <!-- Unzip py4j so we can include its files in the jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <execution>
- <phase>generate-resources</phase>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <target>
- <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
- </target>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <configuration>
- <filesets>
- <fileset>
- <directory>${basedir}/../python/build</directory>
- </fileset>
- </filesets>
- <verbose>true</verbose>
- </configuration>
- </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
@@ -438,24 +409,6 @@
</executions>
</plugin>
</plugins>
-
- <resources>
- <resource>
- <directory>src/main/resources</directory>
- </resource>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/*.py</include>
- </includes>
- </resource>
- <resource>
- <directory>../python/build</directory>
- <includes>
- <include>py4j/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
<profiles>
http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae..0c07ca1 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
- <resources>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/mllib/*.py</include>
- <include>pyspark/mllib/stat/*.py</include>
- <include>pyspark/ml/*.py</include>
- <include>pyspark/ml/param/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>
http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345a..1b87e4e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
- /* Package pyspark artifacts in the main assembly. */
+ /* Package pyspark artifacts in a separate zip file for YARN. */
enable(PySparkAssembly.settings)(assembly)
/* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
import java.util.zip.{ZipOutputStream, ZipEntry}
lazy val settings = Seq(
- unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
// Use a resource generator to copy all .py files from python/pyspark into a managed directory
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
zipFile.delete()
zipRecursive(src, zipFile)
-
- val dst = new File(outDir, "pyspark")
- if (!dst.isDirectory()) {
- require(dst.mkdirs())
- }
- copy(src, dst)
+ Seq[File]()
}
)
@@ -416,42 +409,11 @@ object PySparkAssembly {
output.write(buf, 0, n)
}
}
+ output.closeEntry()
in.close()
}
}
- private def copy(src: File, dst: File): Seq[File] = {
- src.listFiles().flatMap { f =>
- val child = new File(dst, f.getName())
- if (f.isDirectory()) {
- child.mkdir()
- copy(f, child)
- } else if (f.getName().endsWith(".py")) {
- var in: Option[FileInputStream] = None
- var out: Option[FileOutputStream] = None
- try {
- in = Some(new FileInputStream(f))
- out = Some(new FileOutputStream(child))
-
- val bytes = new Array[Byte](1024)
- var read = 0
- while (read >= 0) {
- read = in.get.read(bytes)
- if (read > 0) {
- out.get.write(bytes, 0, read)
- }
- }
-
- Some(child)
- } finally {
- in.foreach(_.close())
- out.foreach(_.close())
- }
- } else {
- None
- }
- }
- }
}
object Unidoc {
http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/sql/core/pom.xml
----------------------------------------------------------------------
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a7..ffe95bb 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
- <resources>
- <resource>
- <directory>../../python</directory>
- <includes>
- <include>pyspark/sql/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>
http://git-wip-us.apache.org/repos/asf/spark/blob/82e890fb/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4..5ab7f44 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
</configuration>
</plugin>
</plugins>
- <resources>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/streaming/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org