You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2016/04/05 01:53:12 UTC
spark git commit: [SPARK-13579][BUILD] Stop building the main Spark assembly.

Repository: spark
Updated Branches:
  refs/heads/master 400b2f863 -> 24d7d2e45


[SPARK-13579][BUILD] Stop building the main Spark assembly.

This change modifies the "assembly/" module to just copy needed
dependencies to its build directory, and modifies the packaging
script to pick those up (and remove duplicate jars packages in the
examples module).

I also made some minor adjustments to dependencies to remove some
test jars from the final packaging, and remove jars that conflict with each
other when packaged separately (e.g. servlet api).

Also note that this change restores guava in applications' classpaths, even
though it's still shaded inside Spark. This is now needed for the Hadoop
libraries that are packaged with Spark, which now are not processed by
the shade plugin.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #11796 from vanzin/SPARK-13579.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24d7d2e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24d7d2e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24d7d2e4

Branch: refs/heads/master
Commit: 24d7d2e453ab5eef6099a32fb9e8ed60f6ada93a
Parents: 400b2f8
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Mon Apr 4 16:52:21 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Mon Apr 4 16:52:22 2016 -0700

----------------------------------------------------------------------
 assembly/pom.xml                                | 101 ++++++-------------
 bin/spark-class                                 |  11 +-
 bin/spark-class2.cmd                            |   5 +-
 .../scala/org/apache/spark/util/Utils.scala     |   4 +-
 .../apache/spark/util/FileAppenderSuite.scala   |  72 +++++++------
 dev/deps/spark-deps-hadoop-2.2                  |   4 +-
 dev/deps/spark-deps-hadoop-2.3                  |   4 +-
 dev/deps/spark-deps-hadoop-2.4                  |   4 +-
 dev/deps/spark-deps-hadoop-2.6                  |   4 +-
 dev/deps/spark-deps-hadoop-2.7                  |   4 +-
 dev/make-distribution.sh                        |  25 +++--
 dev/mima                                        |   6 +-
 dev/run-tests.py                                |  11 +-
 docs/sql-programming-guide.md                   |   7 +-
 examples/pom.xml                                |  80 ++-------------
 .../spark/launcher/AbstractCommandBuilder.java  |  47 ++++-----
 .../spark/launcher/CommandBuilderUtils.java     |   4 +-
 .../launcher/SparkSubmitCommandBuilder.java     |  11 +-
 pom.xml                                         |  44 +++++---
 project/SparkBuild.scala                        |  45 ++++-----
 python/pyspark/streaming/tests.py               |   6 +-
 python/run-tests.py                             |  18 +++-
 .../thriftserver/HiveThriftServer2Suites.scala  |   4 +
 sql/hive/pom.xml                                |  24 -----
 .../org/apache/spark/deploy/yarn/Client.scala   |   3 -
 .../apache/spark/deploy/yarn/ClientSuite.scala  |   2 +-
 26 files changed, 231 insertions(+), 319 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/assembly/pom.xml
----------------------------------------------------------------------
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 477d493..22cbac0 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -33,9 +33,8 @@
 
   <properties>
     <sbt.project.name>assembly</sbt.project.name>
-    <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
-    <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
-    <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
+    <build.testJarPhase>none</build.testJarPhase>
+    <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
   </properties>
 
   <dependencies>
@@ -69,6 +68,17 @@
       <artifactId>spark-repl_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+
+    <!--
+      Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
+      that the libraries Spark depend on have it available. We'll package the version that Spark
+      uses (14.0.1) which is not the same as Hadoop dependencies, but works.
+    -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
   </dependencies>
 
   <build>
@@ -87,75 +97,26 @@
           <skip>true</skip>
         </configuration>
       </plugin>
-        <!-- zip pyspark archives to run python application on yarn mode -->
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-antrun-plugin</artifactId>
-            <executions>
-              <execution>
-                <phase>package</phase>
-                  <goals>
-                    <goal>run</goal>
-                  </goals>
-              </execution>
-            </executions>
-            <configuration>
-              <target>
-                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
-                <zip destfile="${basedir}/../python/lib/pyspark.zip">
-                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
-                </zip>
-              </target>
-            </configuration>
-        </plugin>
-      <!-- Use the shade plugin to create a big JAR with all the dependencies -->
+      <!-- zip pyspark archives to run python application on yarn mode -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <configuration>
-          <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${spark.jar}</outputFile>
-          <artifactSet>
-            <includes>
-              <include>*:*</include>
-            </includes>
-          </artifactSet>
-          <filters>
-            <filter>
-              <artifact>*:*</artifact>
-              <excludes>
-                <exclude>org/datanucleus/**</exclude>
-                <exclude>META-INF/*.SF</exclude>
-                <exclude>META-INF/*.DSA</exclude>
-                <exclude>META-INF/*.RSA</exclude>
-              </excludes>
-            </filter>
-          </filters>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>reference.conf</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
-                  <resource>log4j.properties</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
+          <artifactId>maven-antrun-plugin</artifactId>
+          <executions>
+            <execution>
+              <phase>package</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <target>
+              <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+              <zip destfile="${basedir}/../python/lib/pyspark.zip">
+                <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+              </zip>
+            </target>
+          </configuration>
       </plugin>
     </plugins>
   </build>

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/bin/spark-class
----------------------------------------------------------------------
diff --git a/bin/spark-class b/bin/spark-class
index e710e38..b489591 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -36,21 +36,20 @@ else
 fi
 
 # Find Spark jars.
-# TODO: change the directory name when Spark jars move from "lib".
 if [ -f "${SPARK_HOME}/RELEASE" ]; then
-  SPARK_JARS_DIR="${SPARK_HOME}/lib"
+  SPARK_JARS_DIR="${SPARK_HOME}/jars"
 else
-  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
+  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
 fi
 
-if [ ! -d "$SPARK_JARS_DIR" ]; then
+if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then
   echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
   echo "You need to build Spark before running this program." 1>&2
   exit 1
+else
+  LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
 fi
 
-LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
-
 # Add the launcher build dir to the classpath if requested.
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
   LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/bin/spark-class2.cmd
----------------------------------------------------------------------
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 565b87c..579efff 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -29,11 +29,10 @@ if "x%1"=="x" (
 )
 
 rem Find Spark jars.
-rem TODO: change the directory name when Spark jars move from "lib".
 if exist "%SPARK_HOME%\RELEASE" (
-  set SPARK_JARS_DIR="%SPARK_HOME%\lib"
+  set SPARK_JARS_DIR="%SPARK_HOME%\jars"
 ) else (
-  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
+  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars"
 )
 
 if not exist "%SPARK_JARS_DIR%"\ (

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 50bcf85..c304629 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1121,9 +1121,9 @@ private[spark] object Utils extends Logging {
       extraEnvironment: Map[String, String] = Map.empty,
       redirectStderr: Boolean = true): String = {
     val process = executeCommand(command, workingDir, extraEnvironment, redirectStderr)
-    val output = new StringBuffer
+    val output = new StringBuilder
     val threadName = "read stdout for " + command(0)
-    def appendToOutput(s: String): Unit = output.append(s)
+    def appendToOutput(s: String): Unit = output.append(s).append("\n")
     val stdoutThread = processStreamByLine(threadName, process.getInputStream, appendToOutput)
     val exitCode = process.waitFor()
     stdoutThread.join()   // Wait for it to finish reading output

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 280e496..4fa9f9a 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -201,24 +201,29 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
 
     // Make sure only logging errors
     val logger = Logger.getRootLogger
+    val oldLogLevel = logger.getLevel
     logger.setLevel(Level.ERROR)
-    logger.addAppender(mockAppender)
+    try {
+      logger.addAppender(mockAppender)
 
-    val testOutputStream = new PipedOutputStream()
-    val testInputStream = new PipedInputStream(testOutputStream)
+      val testOutputStream = new PipedOutputStream()
+      val testInputStream = new PipedInputStream(testOutputStream)
 
-    // Close the stream before appender tries to read will cause an IOException
-    testInputStream.close()
-    testOutputStream.close()
-    val appender = FileAppender(testInputStream, testFile, new SparkConf)
+      // Close the stream before appender tries to read will cause an IOException
+      testInputStream.close()
+      testOutputStream.close()
+      val appender = FileAppender(testInputStream, testFile, new SparkConf)
 
-    appender.awaitTermination()
+      appender.awaitTermination()
 
-    // If InputStream was closed without first stopping the appender, an exception will be logged
-    verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture)
-    val loggingEvent = loggingEventCaptor.getValue
-    assert(loggingEvent.getThrowableInformation !== null)
-    assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      // If InputStream was closed without first stopping the appender, an exception will be logged
+      verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture)
+      val loggingEvent = loggingEventCaptor.getValue
+      assert(loggingEvent.getThrowableInformation !== null)
+      assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+    } finally {
+      logger.setLevel(oldLogLevel)
+    }
   }
 
   test("file appender async close stream gracefully") {
@@ -228,30 +233,35 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
 
     // Make sure only logging errors
     val logger = Logger.getRootLogger
+    val oldLogLevel = logger.getLevel
     logger.setLevel(Level.ERROR)
-    logger.addAppender(mockAppender)
+    try {
+      logger.addAppender(mockAppender)
 
-    val testOutputStream = new PipedOutputStream()
-    val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream
+      val testOutputStream = new PipedOutputStream()
+      val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream
 
-    // Close the stream before appender tries to read will cause an IOException
-    testInputStream.close()
-    testOutputStream.close()
-    val appender = FileAppender(testInputStream, testFile, new SparkConf)
+      // Close the stream before appender tries to read will cause an IOException
+      testInputStream.close()
+      testOutputStream.close()
+      val appender = FileAppender(testInputStream, testFile, new SparkConf)
 
-    // Stop the appender before an IOException is called during read
-    testInputStream.latchReadStarted.await()
-    appender.stop()
-    testInputStream.latchReadProceed.countDown()
+      // Stop the appender before an IOException is called during read
+      testInputStream.latchReadStarted.await()
+      appender.stop()
+      testInputStream.latchReadProceed.countDown()
 
-    appender.awaitTermination()
+      appender.awaitTermination()
 
-    // Make sure no IOException errors have been logged as a result of appender closing gracefully
-    verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture)
-    import scala.collection.JavaConverters._
-    loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent =>
-      assert(loggingEvent.getThrowableInformation === null
-        || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      // Make sure no IOException errors have been logged as a result of appender closing gracefully
+      verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture)
+      import scala.collection.JavaConverters._
+      loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent =>
+        assert(loggingEvent.getThrowableInformation === null
+          || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      }
+    } finally {
+      logger.setLevel(oldLogLevel)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/deps/spark-deps-hadoop-2.2
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 3865a9f..2c24366 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
@@ -61,6 +60,7 @@ grizzly-http-2.1.2.jar
 grizzly-http-server-2.1.2.jar
 grizzly-http-servlet-2.1.2.jar
 grizzly-rcm-2.1.2.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.2.0.jar
@@ -164,7 +164,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@@ -177,7 +176,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/deps/spark-deps-hadoop-2.3
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 4313799..e9cb0d8 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar
 geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.3.0.jar
@@ -155,7 +155,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@@ -168,7 +167,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/deps/spark-deps-hadoop-2.4
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 910ea68..d8d1840 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar
 geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.4.0.jar
@@ -156,7 +156,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@@ -169,7 +168,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 0692f24..8beede1 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -16,7 +16,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
 gson-2.2.4.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.6.0.jar
@@ -162,7 +162,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@@ -175,7 +174,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index e397558..a9d814f 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -16,7 +16,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
 gson-2.2.4.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.7.0.jar
@@ -163,7 +163,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@@ -176,7 +175,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/make-distribution.sh
----------------------------------------------------------------------
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index dbdd42f..4f7544f 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -160,28 +160,35 @@ echo -e "\$ ${BUILD_COMMAND[@]}\n"
 
 # Make directories
 rm -rf "$DISTDIR"
-mkdir -p "$DISTDIR/lib"
+mkdir -p "$DISTDIR/jars"
 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
 echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 
 # Copy jars
-cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
-# This will fail if the -Pyarn profile is not provided
-# In this case, silence the error and ignore the return code of this command
-cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
+cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
+
+# Only create the yarn directory if the yarn artifacts were build.
+if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
+  mkdir "$DISTDIR"/yarn
+  cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
+fi
 
 # Copy examples and dependencies
 mkdir -p "$DISTDIR/examples/jars"
 cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
 
+# Deduplicate jars that have already been packaged as part of the main Spark dependencies.
+for f in "$DISTDIR/examples/jars/"*; do
+  name=$(basename "$f")
+  if [ -f "$DISTDIR/jars/$name" ]; then
+    rm "$DISTDIR/examples/jars/$name"
+  fi
+done
+
 # Copy example sources (needed for python and SQL)
 mkdir -p "$DISTDIR/examples/src/main"
 cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"
 
-if [ "$SPARK_HIVE" == "1" ]; then
-  cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
-fi
-
 # Copy license and ASF files
 cp "$SPARK_HOME/LICENSE" "$DISTDIR"
 cp -r "$SPARK_HOME/licenses" "$DISTDIR"

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/mima
----------------------------------------------------------------------
diff --git a/dev/mima b/dev/mima
index ea746e6..c355349 100755
--- a/dev/mima
+++ b/dev/mima
@@ -25,8 +25,8 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
 SPARK_PROFILES="-Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
-TOOLS_CLASSPATH="$(build/sbt "export tools/fullClasspath" | tail -n1)"
-OLD_DEPS_CLASSPATH="$(build/sbt $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
+TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
+OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
 
 rm -f .generated-mima*
 
@@ -36,7 +36,7 @@ java \
   -cp "$TOOLS_CLASSPATH:$OLD_DEPS_CLASSPATH" \
   org.apache.spark.tools.GenerateMIMAIgnore
 
-echo -e "q\n" | build/sbt mimaReportBinaryIssues | grep -v -e "info.*Resolving"
+echo -e "q\n" | build/sbt -DcopyDependencies=false "$@" mimaReportBinaryIssues | grep -v -e "info.*Resolving"
 ret_val=$?
 
 if [ $ret_val != 0 ]; then

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/dev/run-tests.py
----------------------------------------------------------------------
diff --git a/dev/run-tests.py b/dev/run-tests.py
index c294474..cbe3472 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -350,7 +350,7 @@ def build_spark_sbt(hadoop_version):
 def build_spark_assembly_sbt(hadoop_version):
     # Enable all of the profiles for the build:
     build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
-    sbt_goals = ["assembly/assembly"]
+    sbt_goals = ["assembly/package"]
     profiles_and_goals = build_profiles + sbt_goals
     print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
           " ".join(profiles_and_goals))
@@ -371,9 +371,10 @@ def build_apache_spark(build_tool, hadoop_version):
         build_spark_sbt(hadoop_version)
 
 
-def detect_binary_inop_with_mima():
+def detect_binary_inop_with_mima(hadoop_version):
+    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
     set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
-    run_cmd([os.path.join(SPARK_HOME, "dev", "mima")])
+    run_cmd([os.path.join(SPARK_HOME, "dev", "mima")] + build_profiles)
 
 
 def run_scala_tests_maven(test_profiles):
@@ -571,8 +572,8 @@ def main():
     # backwards compatibility checks
     if build_tool == "sbt":
         # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima()
-        # Since we did not build assembly/assembly before running dev/mima, we need to
+        detect_binary_inop_with_mima(hadoop_version)
+        # Since we did not build assembly/package before running dev/mima, we need to
         # do it here because the tests still rely on it; see SPARK-13294 for details.
         build_spark_assembly_sbt(hadoop_version)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/docs/sql-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 2fdc97f..274a8ed 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1687,12 +1687,7 @@ on all of the worker nodes, as they will need access to the Hive serialization a
 (SerDes) in order to access data stored in Hive.
 
 Configuration of Hive is done by placing your `hive-site.xml`, `core-site.xml` (for security configuration),
- `hdfs-site.xml` (for HDFS configuration) file in `conf/`. Please note when running
-the query on a YARN cluster (`cluster` mode), the `datanucleus` jars under the `lib` directory
-and `hive-site.xml` under `conf/` directory need to be available on the driver and all executors launched by the
-YARN cluster. The convenient way to do this is adding them through the `--jars` option and `--file` option of the
-`spark-submit` command.
-
+`hdfs-site.xml` (for HDFS configuration) file in `conf/`.
 
 <div class="codetabs">
 

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index b7f3797..4a20370 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -27,13 +27,16 @@
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-examples_2.11</artifactId>
-  <properties>
-    <sbt.project.name>examples</sbt.project.name>
-  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
+  <properties>
+    <sbt.project.name>examples</sbt.project.name>
+    <build.testJarPhase>none</build.testJarPhase>
+    <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -77,23 +80,6 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-testing-util</artifactId>
-      <version>${hbase.version}</version>
-      <scope>${hbase.deps.scope}</scope>
-      <exclusions>
-        <exclusion>
-          <!-- SPARK-4455 -->
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-annotations</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jruby</groupId>
-          <artifactId>jruby-complete</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-protocol</artifactId>
       <version>${hbase.version}</version>
       <scope>${hbase.deps.scope}</scope>
@@ -140,6 +126,10 @@
           <artifactId>hbase-annotations</artifactId>
         </exclusion>
         <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-common</artifactId>
+        </exclusion>
+        <exclusion>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-core</artifactId>
         </exclusion>
@@ -209,13 +199,6 @@
       <scope>${hbase.deps.scope}</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-hadoop-compat</artifactId>
-      <version>${hbase.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
       <scope>provided</scope>
@@ -294,17 +277,6 @@
       <artifactId>scopt_${scala.binary.version}</artifactId>
       <version>3.3.0</version>
     </dependency>
-
-    <!--
-      The following dependencies are already present in the Spark assembly, so we want to force
-      them to be provided.
-    -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
   </dependencies>
 
   <build>
@@ -325,38 +297,6 @@
           <skip>true</skip>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>prepare-test-jar</id>
-            <phase>none</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <outputDirectory>${jars.target.dir}</outputDirectory>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <includeScope>runtime</includeScope>
-              <outputDirectory>${jars.target.dir}</outputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
   <profiles>

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index d02b2a4..7a5e37c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -144,10 +144,26 @@ abstract class AbstractCommandBuilder {
     boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
     if (prependClasses || isTesting) {
       String scala = getScalaVersion();
-      List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
-        "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
-        "yarn", "launcher",
-        "common/network-common", "common/network-shuffle", "common/network-yarn");
+      List<String> projects = Arrays.asList(
+        "common/network-common",
+        "common/network-shuffle",
+        "common/network-yarn",
+        "common/sketch",
+        "common/tags",
+        "common/unsafe",
+        "core",
+        "examples",
+        "graphx",
+        "launcher",
+        "mllib",
+        "repl",
+        "sql/catalyst",
+        "sql/core",
+        "sql/hive",
+        "sql/hive-thriftserver",
+        "streaming",
+        "yarn"
+      );
       if (prependClasses) {
         if (!isTesting) {
           System.err.println(
@@ -174,31 +190,12 @@ abstract class AbstractCommandBuilder {
     // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
     // propagate the test classpath appropriately. For normal invocation, look for the jars
     // directory under SPARK_HOME.
-    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting);
+    boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING"));
+    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql);
     if (jarsDir != null) {
       addToClassPath(cp, join(File.separator, jarsDir, "*"));
     }
 
-    // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
-    // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
-    // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive
-    File libdir;
-    if (new File(sparkHome, "RELEASE").isFile()) {
-      libdir = new File(sparkHome, "lib");
-    } else {
-      libdir = new File(sparkHome, "lib_managed/jars");
-    }
-
-    if (libdir.isDirectory()) {
-      for (File jar : libdir.listFiles()) {
-        if (jar.getName().startsWith("datanucleus-")) {
-          addToClassPath(cp, jar.getAbsolutePath());
-        }
-      }
-    } else {
-      checkState(isTesting, "Library directory '%s' does not exist.", libdir.getAbsolutePath());
-    }
-
     addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
     addToClassPath(cp, getenv("YARN_CONF_DIR"));
     addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index a08c8dc..91586aa 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -358,12 +358,12 @@ class CommandBuilderUtils {
     // TODO: change to the correct directory once the assembly build is changed.
     File libdir;
     if (new File(sparkHome, "RELEASE").isFile()) {
-      libdir = new File(sparkHome, "lib");
+      libdir = new File(sparkHome, "jars");
       checkState(!failIfNotFound || libdir.isDirectory(),
         "Library directory '%s' does not exist.",
         libdir.getAbsolutePath());
     } else {
-      libdir = new File(sparkHome, String.format("assembly/target/scala-%s", scalaVersion));
+      libdir = new File(sparkHome, String.format("assembly/target/scala-%s/jars", scalaVersion));
       if (!libdir.isDirectory()) {
         checkState(!failIfNotFound,
           "Library directory '%s' does not exist; make sure Spark is built.",

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 56e4107..c31c42c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -336,6 +336,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
   }
 
   private List<String> findExamplesJars() {
+    boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
     List<String> examplesJars = new ArrayList<>();
     String sparkHome = getSparkHome();
 
@@ -346,11 +347,15 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
       jarsDir = new File(sparkHome,
         String.format("examples/target/scala-%s/jars", getScalaVersion()));
     }
-    checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.",
+
+    boolean foundDir = jarsDir.isDirectory();
+    checkState(isTesting || foundDir, "Examples jars directory '%s' does not exist.",
         jarsDir.getAbsolutePath());
 
-    for (File f: jarsDir.listFiles()) {
-      examplesJars.add(f.getAbsolutePath());
+    if (foundDir) {
+      for (File f: jarsDir.listFiles()) {
+        examplesJars.add(f.getAbsolutePath());
+      }
     }
     return examplesJars;
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e135c92..984b285 100644
--- a/pom.xml
+++ b/pom.xml
@@ -185,6 +185,10 @@
     <!-- Modules that copy jars to the build directory should do so under this location. -->
     <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
 
+    <!-- Allow modules to enable / disable certain build plugins easily. -->
+    <build.testJarPhase>prepare-package</build.testJarPhase>
+    <build.copyDependenciesPhase>none</build.copyDependenciesPhase>
+
     <!--
       Dependency scopes that can be overridden by enabling certain profiles. These profiles are
       declared in the projects that build assemblies.
@@ -238,15 +242,6 @@
   </pluginRepositories>
   <dependencies>
     <!--
-      This is a dummy dependency that is used along with the shading plug-in
-      to create effective poms on publishing (see SPARK-3812).
-    -->
-    <dependency>
-      <groupId>org.spark-project.spark</groupId>
-      <artifactId>unused</artifactId>
-      <version>1.0.0</version>
-    </dependency>
-    <!--
          This is needed by the scalatest plugin, and so is declared here to be available in
          all child modules, just as scalatest is run in all children
     -->
@@ -833,6 +828,14 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <!-- avro-mapred for some reason depends on avro-ipc's test jar, so undo that. -->
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-ipc</artifactId>
+        <classifier>tests</classifier>
+        <version>${avro.version}</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro-mapred</artifactId>
@@ -1521,6 +1524,10 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
 
@@ -1916,6 +1923,7 @@
               -->
               <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
               <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
               <SPARK_TESTING>1</SPARK_TESTING>
               <JAVA_HOME>${test.java.home}</JAVA_HOME>
             </environmentVariables>
@@ -1964,6 +1972,7 @@
               -->
               <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
               <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
               <SPARK_TESTING>1</SPARK_TESTING>
               <JAVA_HOME>${test.java.home}</JAVA_HOME>
             </environmentVariables>
@@ -2146,6 +2155,7 @@
         <version>2.10</version>
         <executions>
           <execution>
+            <id>generate-test-classpath</id>
             <phase>test-compile</phase>
             <goals>
               <goal>build-classpath</goal>
@@ -2155,6 +2165,17 @@
               <outputProperty>test_classpath</outputProperty>
             </configuration>
           </execution>
+          <execution>
+            <id>copy-module-dependencies</id>
+            <phase>${build.copyDependenciesPhase}</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <includeScope>runtime</includeScope>
+              <outputDirectory>${jars.target.dir}</outputDirectory>
+            </configuration>
+          </execution>
         </executions>
       </plugin>
 
@@ -2169,9 +2190,6 @@
           <shadedArtifactAttached>false</shadedArtifactAttached>
           <artifactSet>
             <includes>
-              <!-- At a minimum we must include this to force effective pom generation -->
-              <include>org.spark-project.spark:unused</include>
-
               <include>org.eclipse.jetty:jetty-io</include>
               <include>org.eclipse.jetty:jetty-http</include>
               <include>org.eclipse.jetty:jetty-continuation</include>
@@ -2302,7 +2320,7 @@
         <executions>
           <execution>
             <id>prepare-test-jar</id>
-            <phase>prepare-package</phase>
+            <phase>${build.testJarPhase}</phase>
             <goals>
               <goal>test-jar</goal>
             </goals>

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 5d62b68..b32480b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -57,11 +57,12 @@ object BuildCommons {
     Seq("yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl",
       "docker-integration-tests").map(ProjectRef(buildLocation, _))
 
-  val assemblyProjects@Seq(assembly, networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) =
-    Seq("assembly", "network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
+  val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) =
+    Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
       .map(ProjectRef(buildLocation, _))
 
-  val copyJarsProjects@Seq(examples) = Seq("examples").map(ProjectRef(buildLocation, _))
+  val copyJarsProjects@Seq(assembly, examples) = Seq("assembly", "examples")
+    .map(ProjectRef(buildLocation, _))
 
   val tools = ProjectRef(buildLocation, "tools")
   // Root project.
@@ -263,8 +264,14 @@ object SparkBuild extends PomBuild {
   /* Unsafe settings */
   enable(Unsafe.settings)(unsafe)
 
-  /* Set up tasks to copy dependencies during packaging. */
-  copyJarsProjects.foreach(enable(CopyDependencies.settings))
+  /*
+   * Set up tasks to copy dependencies during packaging. This step can be disabled in the command
+   * line, so that dev/mima can run without trying to copy these files again and potentially
+   * causing issues.
+   */
+  if (!"false".equals(System.getProperty("copyDependencies"))) {
+    copyJarsProjects.foreach(enable(CopyDependencies.settings))
+  }
 
   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))
@@ -477,8 +484,6 @@ object Assembly {
 
   val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
 
-  val deployDatanucleusJars = taskKey[Unit]("Deploy datanucleus jars to the spark/lib_managed/jars directory")
-
   lazy val settings = assemblySettings ++ Seq(
     test in assembly := {},
     hadoopVersion := {
@@ -497,27 +502,13 @@ object Assembly {
       s"${mName}-test-${v}.jar"
     },
     mergeStrategy in assembly := {
-      case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
       case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
       case m if m.toLowerCase.matches("meta-inf.*\\.sf$")      => MergeStrategy.discard
       case "log4j.properties"                                  => MergeStrategy.discard
       case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
       case "reference.conf"                                    => MergeStrategy.concat
       case _                                                   => MergeStrategy.first
-    },
-    deployDatanucleusJars := {
-      val jars: Seq[File] = (fullClasspath in assembly).value.map(_.data)
-        .filter(_.getPath.contains("org.datanucleus"))
-      var libManagedJars = new File(BuildCommons.sparkHome, "lib_managed/jars")
-      libManagedJars.mkdirs()
-      jars.foreach { jar =>
-        val dest = new File(libManagedJars, jar.getName)
-        if (!dest.exists()) {
-          Files.copy(jar.toPath, dest.toPath)
-        }
-      }
-    },
-    assembly <<= assembly.dependsOn(deployDatanucleusJars)
+    }
   )
 }
 
@@ -698,6 +689,13 @@ object Java8TestSettings {
 object TestSettings {
   import BuildCommons._
 
+  private val scalaBinaryVersion =
+    if (System.getProperty("scala-2.10") == "true") {
+      "2.10"
+    } else {
+      "2.11"
+    }
+
   lazy val settings = Seq (
     // Fork new JVMs for tests and set Java options for those
     fork := true,
@@ -707,6 +705,7 @@ object TestSettings {
       "SPARK_DIST_CLASSPATH" ->
         (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
       "SPARK_PREPEND_CLASSES" -> "1",
+      "SPARK_SCALA_VERSION" -> scalaBinaryVersion,
       "SPARK_TESTING" -> "1",
       "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
     javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir",
@@ -744,7 +743,7 @@ object TestSettings {
     // Make sure the test temp directory exists.
     resourceGenerators in Test <+= resourceManaged in Test map { outDir: File =>
       if (!new File(testTempDir).isDirectory()) {
-        require(new File(testTempDir).mkdirs())
+        require(new File(testTempDir).mkdirs(), s"Error creating temp directory $testTempDir.")
       }
       Seq[File]()
     },

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/python/pyspark/streaming/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index d010c0e..148bf7e 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -1482,7 +1482,7 @@ def search_kafka_assembly_jar():
         raise Exception(
             ("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
             "You need to build Spark with "
-            "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or "
+            "'build/sbt assembly/package streaming-kafka-assembly/assembly' or "
             "'build/mvn package' before running this test.")
     elif len(jars) > 1:
         raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please "
@@ -1548,7 +1548,7 @@ if __name__ == "__main__":
     elif are_kinesis_tests_enabled is False:
         sys.stderr.write("Skipping all Kinesis Python tests as the optional Kinesis project was "
                          "not compiled into a JAR. To run these tests, "
-                         "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/assembly "
+                         "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package "
                          "streaming-kinesis-asl-assembly/assembly' or "
                          "'build/mvn -Pkinesis-asl package' before running this test.")
     else:
@@ -1556,7 +1556,7 @@ if __name__ == "__main__":
             ("Failed to find Spark Streaming Kinesis assembly jar in %s. "
              % kinesis_asl_assembly_dir) +
             "You need to build Spark with 'build/sbt -Pkinesis-asl "
-            "assembly/assembly streaming-kinesis-asl-assembly/assembly'"
+            "assembly/package streaming-kinesis-asl-assembly/assembly'"
             "or 'build/mvn -Pkinesis-asl package' before running this test.")
 
     sys.stderr.write("Running tests: %s \n" % (str(testcases)))

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/python/run-tests.py
----------------------------------------------------------------------
diff --git a/python/run-tests.py b/python/run-tests.py
index a9f8854..38b3bb8 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -53,11 +53,25 @@ LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log")
 FAILURE_REPORTING_LOCK = Lock()
 LOGGER = logging.getLogger()
 
+# Find out where the assembly jars are located.
+for scala in ["2.11", "2.10"]:
+    build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
+    if os.path.isdir(build_dir):
+        SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
+        break
+else:
+    raise Exception("Cannot find assembly build directory, please build Spark first.")
+
 
 def run_individual_python_test(test_name, pyspark_python):
     env = dict(os.environ)
-    env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
-                'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
+    env.update({
+        'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
+        'SPARK_TESTING': '1',
+        'SPARK_PREPEND_CLASSES': '1',
+        'PYSPARK_PYTHON': which(pyspark_python),
+        'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
+    })
     LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
     start_time = time.time()
     try:

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
----------------------------------------------------------------------
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 33af624..2c7358e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -763,11 +763,15 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
         extraEnvironment = Map(
           // Disables SPARK_TESTING to exclude log4j.properties in test directories.
           "SPARK_TESTING" -> "0",
+          // But set SPARK_SQL_TESTING to make spark-class happy.
+          "SPARK_SQL_TESTING" -> "1",
           // Points SPARK_PID_DIR to SPARK_HOME, otherwise only 1 Thrift server instance can be
           // started at a time, which is not Jenkins friendly.
           "SPARK_PID_DIR" -> pidDir.getCanonicalPath),
         redirectStderr = true)
 
+      logInfo(s"COMMAND: $command")
+      logInfo(s"OUTPUT: $lines")
       lines.split("\n").collectFirst {
         case line if line.contains(LOG_FILE_MARK) => new File(line.drop(LOG_FILE_MARK.length))
       }.getOrElse {

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/sql/hive/pom.xml
----------------------------------------------------------------------
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 58efd80..61504be 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -225,30 +225,6 @@
           <argLine>-da -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
         </configuration>
       </plugin>
-
-      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-dependencies</id>
-            <phase>package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <!-- basedir is spark/sql/hive/ -->
-              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
-              <overWriteReleases>false</overWriteReleases>
-              <overWriteSnapshots>false</overWriteSnapshots>
-              <overWriteIfNewer>true</overWriteIfNewer>
-              <includeGroupIds>org.datanucleus</includeGroupIds>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
     </plugins>
   </build>
 </project>

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 4dd3ccd..336e29f 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -447,9 +447,6 @@ private[spark] class Client(
      *
      * Note that the archive cannot be a "local" URI. If none of the above settings are found,
      * then upload all files found in $SPARK_HOME/jars.
-     *
-     * TODO: currently the code looks in $SPARK_HOME/lib while the work to replace assemblies
-     * with a directory full of jars is ongoing.
      */
     val sparkArchive = sparkConf.get(SPARK_ARCHIVE)
     if (sparkArchive.isDefined) {

http://git-wip-us.apache.org/repos/asf/spark/blob/24d7d2e4/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
----------------------------------------------------------------------
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 2eaafa0..74e268d 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -273,7 +273,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
 
   test("distribute local spark jars") {
     val temp = Utils.createTempDir()
-    val jarsDir = new File(temp, "lib")
+    val jarsDir = new File(temp, "jars")
     assert(jarsDir.mkdir())
     val jar = TestUtils.createJarWithFiles(Map(), jarsDir)
     new FileOutputStream(new File(temp, "RELEASE")).close()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org