You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2015/04/14 22:41:44 UTC
spark git commit: [SPARK-5808] [build] Package pyspark files in sbt
assembly.
Repository: spark
Updated Branches:
refs/heads/master 6adb8bcbf -> 65774370a
[SPARK-5808] [build] Package pyspark files in sbt assembly.
This turned out to be more complicated than I wanted because the
layout of python/ doesn't really follow the usual maven conventions.
So some extra code is needed to copy just the right things.
Author: Marcelo Vanzin <va...@cloudera.com>
Closes #5461 from vanzin/SPARK-5808 and squashes the following commits:
7153dac [Marcelo Vanzin] Only try to create resource dir if it doesn't already exist.
ee90e84 [Marcelo Vanzin] [SPARK-5808] [build] Package pyspark files in sbt assembly.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65774370
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65774370
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65774370
Branch: refs/heads/master
Commit: 65774370a1275e25cd8a3357e397d116767793a9
Parents: 6adb8bc
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue Apr 14 13:41:38 2015 -0700
Committer: Andrew Or <an...@databricks.com>
Committed: Tue Apr 14 13:41:38 2015 -0700
----------------------------------------------------------------------
project/SparkBuild.scala | 60 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/65774370/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 5f51f4b..09b4976 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-import java.io.File
+import java.io._
import scala.util.Properties
import scala.collection.JavaConversions._
@@ -166,6 +166,9 @@ object SparkBuild extends PomBuild {
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
+ /* Package pyspark artifacts in the main assembly. */
+ enable(PySparkAssembly.settings)(assembly)
+
/* Enable unidoc only for the root spark project */
enable(Unidoc.settings)(spark)
@@ -316,6 +319,7 @@ object Hive {
}
object Assembly {
+ import sbtassembly.AssemblyUtils._
import sbtassembly.Plugin._
import AssemblyKeys._
@@ -347,6 +351,60 @@ object Assembly {
)
}
+object PySparkAssembly {
+ import sbtassembly.Plugin._
+ import AssemblyKeys._
+
+ lazy val settings = Seq(
+ unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
+ // Use a resource generator to copy all .py files from python/pyspark into a managed directory
+ // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
+ // list since that will copy unneeded / unwanted files.
+ resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
+ val dst = new File(outDir, "pyspark")
+ if (!dst.isDirectory()) {
+ require(dst.mkdirs())
+ }
+
+ val src = new File(BuildCommons.sparkHome, "python/pyspark")
+ copy(src, dst)
+ }
+ )
+
+ private def copy(src: File, dst: File): Seq[File] = {
+ src.listFiles().flatMap { f =>
+ val child = new File(dst, f.getName())
+ if (f.isDirectory()) {
+ child.mkdir()
+ copy(f, child)
+ } else if (f.getName().endsWith(".py")) {
+ var in: Option[FileInputStream] = None
+ var out: Option[FileOutputStream] = None
+ try {
+ in = Some(new FileInputStream(f))
+ out = Some(new FileOutputStream(child))
+
+ val bytes = new Array[Byte](1024)
+ var read = 0
+ while (read >= 0) {
+ read = in.get.read(bytes)
+ if (read > 0) {
+ out.get.write(bytes, 0, read)
+ }
+ }
+
+ Some(child)
+ } finally {
+ in.foreach(_.close())
+ out.foreach(_.close())
+ }
+ } else {
+ None
+ }
+ }
+ }
+}
+
object Unidoc {
import BuildCommons._
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org