You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2015/12/30 21:48:01 UTC

spark git commit: [SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests

Repository: spark
Updated Branches:
  refs/heads/master d1ca634db -> 27a42c710


[SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests

This patch adds a new build check which enumerates Spark's resolved runtime classpath and saves it to a file, then diffs against that file to detect whether pull requests have introduced dependency changes. The aim of this check is to make it simpler to reason about whether pull request which modify the build have introduced new dependencies or changed transitive dependencies in a way that affects the final classpath.

This supplants the checks added in SPARK-4123 / #5093, which are currently disabled due to bugs.

This patch is based on pwendell's work in #8531.

Closes #8531.

Author: Josh Rosen <jo...@databricks.com>
Author: Patrick Wendell <pa...@databricks.com>

Closes #10461 from JoshRosen/SPARK-10359.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27a42c71
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27a42c71
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27a42c71

Branch: refs/heads/master
Commit: 27a42c7108ced48a7f558990de2e4fc7ed340119
Parents: d1ca634
Author: Josh Rosen <jo...@databricks.com>
Authored: Wed Dec 30 12:47:42 2015 -0800
Committer: Josh Rosen <jo...@databricks.com>
Committed: Wed Dec 30 12:47:42 2015 -0800

----------------------------------------------------------------------
 .rat-excludes                    |   1 +
 dev/deps/spark-deps-hadoop-2.3   | 184 +++++++++++++++++++++++++++++++++
 dev/deps/spark-deps-hadoop-2.4   | 185 ++++++++++++++++++++++++++++++++++
 dev/run-tests-jenkins.py         |   2 +-
 dev/run-tests.py                 |   8 ++
 dev/sparktestsupport/__init__.py |   1 +
 dev/sparktestsupport/modules.py  |  15 ++-
 dev/test-dependencies.sh         | 102 +++++++++++++++++++
 dev/tests/pr_new_dependencies.sh | 117 ---------------------
 pom.xml                          |  17 ++++
 10 files changed, 512 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/.rat-excludes
----------------------------------------------------------------------
diff --git a/.rat-excludes b/.rat-excludes
index 3544c0f..bf071eb 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -85,3 +85,4 @@ org.apache.spark.sql.sources.DataSourceRegister
 org.apache.spark.scheduler.SparkHistoryListenerFactory
 .*parquet
 LZ4BlockInputStream.java
+spark-deps-.*

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/deps/spark-deps-hadoop-2.3
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
new file mode 100644
index 0000000..6014d50
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -0,0 +1,184 @@
+JavaEWAH-0.3.2.jar
+RoaringBitmap-0.5.11.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.2.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.3.0.jar
+hadoop-auth-2.3.0.jar
+hadoop-client-2.3.0.jar
+hadoop-common-2.3.0.jar
+hadoop-hdfs-2.3.0.jar
+hadoop-mapreduce-client-app-2.3.0.jar
+hadoop-mapreduce-client-common-2.3.0.jar
+hadoop-mapreduce-client-core-2.3.0.jar
+hadoop-mapreduce-client-jobclient-2.3.0.jar
+hadoop-mapreduce-client-shuffle-2.3.0.jar
+hadoop-yarn-api-2.3.0.jar
+hadoop-yarn-client-2.3.0.jar
+hadoop-yarn-common-2.3.0.jar
+hadoop-yarn-server-common-2.3.0.jar
+hadoop-yarn-server-web-proxy-2.3.0.jar
+httpclient-4.3.2.jar
+httpcore-4.3.2.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.9.13.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl-1.9.13.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.9.13.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+java-xmlbuilder-1.0.jar
+javax.inject-1.jar
+javax.servlet-3.0.0.v201112011016.jar
+javolution-5.5.1.jar
+jaxb-api-2.2.2.jar
+jaxb-impl-2.2.3-1.jar
+jcl-over-slf4j-1.7.10.jar
+jdo-api-3.0.1.jar
+jersey-core-1.9.jar
+jersey-guice-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.9.3.jar
+jettison-1.1.jar
+jetty-6.1.26.jar
+jetty-all-7.6.0.v20120127.jar
+jetty-util-6.1.26.jar
+jline-2.10.5.jar
+jline-2.12.jar
+joda-time-2.9.jar
+jodd-core-3.5.2.jar
+jpam-1.1.jar
+json-20090211.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jta-1.1.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+libfb303-0.9.2.jar
+libthrift-0.9.2.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mail-1.4.7.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-hadoop-bundle-1.6.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.2.7.jar
+pmml-model-1.2.7.jar
+pmml-schema-1.2.7.jar
+protobuf-java-2.5.0.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.5.jar
+scala-library-2.10.5.jar
+scala-reflect-2.10.5.jar
+scalap-2.10.5.jar
+servlet-api-2.5.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-0.2.jar
+snappy-java-1.1.2.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stax-api-1.0-2.jar
+stax-api-1.0.1.jar
+stream-2.7.0.jar
+stringtemplate-3.2.1.jar
+super-csv-2.2.0.jar
+tachyon-client-0.8.2.jar
+tachyon-underfs-hdfs-0.8.2.jar
+tachyon-underfs-local-0.8.2.jar
+tachyon-underfs-s3-0.8.2.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xbean-asm5-shaded-4.4.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/deps/spark-deps-hadoop-2.4
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
new file mode 100644
index 0000000..f56e6f4
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -0,0 +1,185 @@
+JavaEWAH-0.3.2.jar
+RoaringBitmap-0.5.11.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.2.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.4.0.jar
+hadoop-auth-2.4.0.jar
+hadoop-client-2.4.0.jar
+hadoop-common-2.4.0.jar
+hadoop-hdfs-2.4.0.jar
+hadoop-mapreduce-client-app-2.4.0.jar
+hadoop-mapreduce-client-common-2.4.0.jar
+hadoop-mapreduce-client-core-2.4.0.jar
+hadoop-mapreduce-client-jobclient-2.4.0.jar
+hadoop-mapreduce-client-shuffle-2.4.0.jar
+hadoop-yarn-api-2.4.0.jar
+hadoop-yarn-client-2.4.0.jar
+hadoop-yarn-common-2.4.0.jar
+hadoop-yarn-server-common-2.4.0.jar
+hadoop-yarn-server-web-proxy-2.4.0.jar
+httpclient-4.3.2.jar
+httpcore-4.3.2.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.9.13.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl-1.9.13.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.9.13.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+java-xmlbuilder-1.0.jar
+javax.inject-1.jar
+javax.servlet-3.0.0.v201112011016.jar
+javolution-5.5.1.jar
+jaxb-api-2.2.2.jar
+jaxb-impl-2.2.3-1.jar
+jcl-over-slf4j-1.7.10.jar
+jdo-api-3.0.1.jar
+jersey-client-1.9.jar
+jersey-core-1.9.jar
+jersey-guice-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.9.3.jar
+jettison-1.1.jar
+jetty-6.1.26.jar
+jetty-all-7.6.0.v20120127.jar
+jetty-util-6.1.26.jar
+jline-2.10.5.jar
+jline-2.12.jar
+joda-time-2.9.jar
+jodd-core-3.5.2.jar
+jpam-1.1.jar
+json-20090211.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jta-1.1.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+libfb303-0.9.2.jar
+libthrift-0.9.2.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mail-1.4.7.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-hadoop-bundle-1.6.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.2.7.jar
+pmml-model-1.2.7.jar
+pmml-schema-1.2.7.jar
+protobuf-java-2.5.0.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.5.jar
+scala-library-2.10.5.jar
+scala-reflect-2.10.5.jar
+scalap-2.10.5.jar
+servlet-api-2.5.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-0.2.jar
+snappy-java-1.1.2.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stax-api-1.0-2.jar
+stax-api-1.0.1.jar
+stream-2.7.0.jar
+stringtemplate-3.2.1.jar
+super-csv-2.2.0.jar
+tachyon-client-0.8.2.jar
+tachyon-underfs-hdfs-0.8.2.jar
+tachyon-underfs-local-0.8.2.jar
+tachyon-underfs-s3-0.8.2.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xbean-asm5-shaded-4.4.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/run-tests-jenkins.py
----------------------------------------------------------------------
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 42afca0..6501721 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -124,6 +124,7 @@ def run_tests(tests_timeout):
         ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests',
         ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation',
         ERROR_CODES["BLOCK_BUILD"]: 'to build',
+        ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests',
         ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
         ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
         ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
@@ -193,7 +194,6 @@ def main():
     pr_tests = [
         "pr_merge_ability",
         "pr_public_classes"
-        # DISABLED (pwendell) "pr_new_dependencies"
     ]
 
     # `bind_message_base` returns a function to generate messages for Github posting

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/run-tests.py
----------------------------------------------------------------------
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 6129f87..706e2d1 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -417,6 +417,11 @@ def run_python_tests(test_modules, parallelism):
     run_cmd(command)
 
 
+def run_build_tests():
+    set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
+    run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
+
+
 def run_sparkr_tests():
     set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
 
@@ -537,6 +542,9 @@ def main():
     # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
     #    build_spark_documentation()
 
+    if any(m.should_run_build_tests for m in test_modules):
+        run_build_tests()
+
     # spark build
     build_apache_spark(build_tool, hadoop_version)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/sparktestsupport/__init__.py
----------------------------------------------------------------------
diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py
index 0e8032d..89015f8 100644
--- a/dev/sparktestsupport/__init__.py
+++ b/dev/sparktestsupport/__init__.py
@@ -32,5 +32,6 @@ ERROR_CODES = {
     "BLOCK_PYSPARK_UNIT_TESTS": 19,
     "BLOCK_SPARKR_UNIT_TESTS": 20,
     "BLOCK_JAVA_STYLE": 21,
+    "BLOCK_BUILD_TESTS": 22,
     "BLOCK_TIMEOUT": 124
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/sparktestsupport/modules.py
----------------------------------------------------------------------
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index d65547e..4667b28 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -31,7 +31,7 @@ class Module(object):
 
     def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={},
                  sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(),
-                 test_tags=(), should_run_r_tests=False):
+                 test_tags=(), should_run_r_tests=False, should_run_build_tests=False):
         """
         Define a new module.
 
@@ -53,6 +53,7 @@ class Module(object):
         :param test_tags A set of tags that will be excluded when running unit tests if the module
             is not explicitly changed.
         :param should_run_r_tests: If true, changes in this module will trigger all R tests.
+        :param should_run_build_tests: If true, changes in this module will trigger build tests.
         """
         self.name = name
         self.dependencies = dependencies
@@ -64,6 +65,7 @@ class Module(object):
         self.blacklisted_python_implementations = blacklisted_python_implementations
         self.test_tags = test_tags
         self.should_run_r_tests = should_run_r_tests
+        self.should_run_build_tests = should_run_build_tests
 
         self.dependent_modules = set()
         for dep in dependencies:
@@ -394,6 +396,14 @@ docs = Module(
     ]
 )
 
+build = Module(
+    name="build",
+    dependencies=[],
+    source_file_regexes=[
+        ".*pom.xml",
+        "dev/test-dependencies.sh",
+    ]
+)
 
 ec2 = Module(
     name="ec2",
@@ -433,5 +443,6 @@ root = Module(
         "test",
     ],
     python_test_goals=list(itertools.chain.from_iterable(m.python_test_goals for m in all_modules)),
-    should_run_r_tests=True
+    should_run_r_tests=True,
+    should_run_build_tests=True
 )

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/test-dependencies.sh
----------------------------------------------------------------------
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
new file mode 100755
index 0000000..984e29d
--- /dev/null
+++ b/dev/test-dependencies.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+FWDIR="$(cd "`dirname $0`"/..; pwd)"
+cd "$FWDIR"
+
+# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
+
+# NOTE: These should match those in the release publishing script
+HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
+MVN="build/mvn --force"
+HADOOP_PROFILES=(
+    hadoop-2.3
+    hadoop-2.4
+)
+
+# We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
+# the old version. We need to do this because the `dependency:build-classpath` task needs to
+# resolve Spark's internal submodule dependencies.
+
+# See http://stackoverflow.com/a/3545363 for an explanation of this one-liner:
+OLD_VERSION=$(mvn help:evaluate -Dexpression=project.version|grep -Ev '(^\[|Download\w+:)')
+TEMP_VERSION="spark-$(date +%s | tail -c6)"
+
+function reset_version {
+  # Delete the temporary POMs that we wrote to the local Maven repo:
+  find "$HOME/.m2/" | grep "$TEMP_VERSION" | xargs rm -rf
+
+  # Restore the original version number:
+  $MVN -q versions:set -DnewVersion=$OLD_VERSION -DgenerateBackupPoms=false > /dev/null
+}
+trap reset_version EXIT
+
+$MVN -q versions:set -DnewVersion=$TEMP_VERSION -DgenerateBackupPoms=false > /dev/null
+
+# Generate manifests for each Hadoop profile:
+for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
+  echo "Performing Maven install for $HADOOP_PROFILE"
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar install:install -q \
+    -pl '!assembly' \
+    -pl '!examples' \
+    -pl '!external/flume-assembly' \
+    -pl '!external/kafka-assembly' \
+    -pl '!external/twitter' \
+    -pl '!external/flume' \
+    -pl '!external/mqtt' \
+    -pl '!external/mqtt-assembly' \
+    -pl '!external/zeromq' \
+    -pl '!external/kafka' \
+    -pl '!tags' \
+    -DskipTests
+
+  echo "Generating dependency manifest for $HADOOP_PROFILE"
+  mkdir -p dev/pr-deps
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
+    | grep "Building Spark Project Assembly" -A 5 \
+    | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
+    | grep -v spark > dev/pr-deps/spark-deps-$HADOOP_PROFILE
+done
+
+if [[ $@ == **replace-manifest** ]]; then
+  echo "Replacing manifests and creating new files at dev/deps"
+  rm -rf dev/deps
+  mv dev/pr-deps dev/deps
+  exit 0
+fi
+
+for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
+  set +e
+  dep_diff="$(
+    git diff \
+    --no-index \
+    dev/deps/spark-deps-$HADOOP_PROFILE \
+    dev/pr-deps/spark-deps-$HADOOP_PROFILE \
+  )"
+  set -e
+  if [ "$dep_diff" != "" ]; then
+    echo "Spark's published dependencies DO NOT MATCH the manifest file (dev/spark-deps)."
+    echo "To update the manifest file, run './dev/test-dependencies.sh --replace-manifest'."
+    echo "$dep_diff"
+    rm -rf dev/pr-deps
+    exit 1
+  fi
+done

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/tests/pr_new_dependencies.sh
----------------------------------------------------------------------
diff --git a/dev/tests/pr_new_dependencies.sh b/dev/tests/pr_new_dependencies.sh
deleted file mode 100755
index fdfb3c6..0000000
--- a/dev/tests/pr_new_dependencies.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This script follows the base format for testing pull requests against
-# another branch and returning results to be published. More details can be
-# found at dev/run-tests-jenkins.
-#
-# Arg1: The Github Pull Request Actual Commit
-#+ known as `ghprbActualCommit` in `run-tests-jenkins`
-# Arg2: The SHA1 hash
-#+ known as `sha1` in `run-tests-jenkins`
-# Arg3: Current PR Commit Hash
-#+ the PR hash for the current commit
-#
-
-ghprbActualCommit="$1"
-sha1="$2"
-current_pr_head="$3"
-
-MVN_BIN="build/mvn"
-CURR_CP_FILE="my-classpath.txt"
-MASTER_CP_FILE="master-classpath.txt"
-
-# First switch over to the master branch
-git checkout -f master
-# Find and copy all pom.xml files into a *.gate file that we can check
-# against through various `git` changes
-find -name "pom.xml" -exec cp {} {}.gate \;
-# Switch back to the current PR
-git checkout -f "${current_pr_head}"
-
-# Check if any *.pom files from the current branch are different from the master
-difference_q=""
-for p in $(find -name "pom.xml"); do
-  [[ -f "${p}" && -f "${p}.gate" ]] && \
-    difference_q="${difference_q}$(diff $p.gate $p)"
-done
-
-# If no pom files were changed we can easily say no new dependencies were added
-if [ -z "${difference_q}" ]; then
-  echo " * This patch does not change any dependencies."
-else
-  # Else we need to manually build spark to determine what, if any, dependencies
-  # were added into the Spark assembly jar
-  ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
-    sed -n -e '/Building Spark Project Assembly/,$p' | \
-    grep --context=1 -m 2 "Dependencies classpath:" | \
-    head -n 3 | \
-    tail -n 1 | \
-    tr ":" "\n" | \
-    rev | \
-    cut -d "/" -f 1 | \
-    rev | \
-    sort > ${CURR_CP_FILE}
-
-  # Checkout the master branch to compare against
-  git checkout -f master
-
-  ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
-    sed -n -e '/Building Spark Project Assembly/,$p' | \
-    grep --context=1 -m 2 "Dependencies classpath:" | \
-    head -n 3 | \
-    tail -n 1 | \
-    tr ":" "\n" | \
-    rev | \
-    cut -d "/" -f 1 | \
-    rev | \
-    sort > ${MASTER_CP_FILE}
-
-  DIFF_RESULTS="`diff ${CURR_CP_FILE} ${MASTER_CP_FILE}`"
-
-  if [ -z "${DIFF_RESULTS}" ]; then
-    echo " * This patch does not change any dependencies."
-  else
-    # Pretty print the new dependencies
-    added_deps=$(echo "${DIFF_RESULTS}" | grep "<" | cut -d' ' -f2 | awk '{printf "   * \`"$1"\`\\n"}')
-    removed_deps=$(echo "${DIFF_RESULTS}" | grep ">" | cut -d' ' -f2 | awk '{printf "   * \`"$1"\`\\n"}')
-    added_deps_text=" * This patch **adds the following new dependencies:**\n${added_deps}"
-    removed_deps_text=" * This patch **removes the following dependencies:**\n${removed_deps}"
-
-    # Construct the final returned message with proper 
-    return_mssg=""
-    [ -n "${added_deps}" ] && return_mssg="${added_deps_text}"
-    if [ -n "${removed_deps}" ]; then
-      if [ -n "${return_mssg}" ]; then
-        return_mssg="${return_mssg}\n${removed_deps_text}"
-      else
-        return_mssg="${removed_deps_text}"
-      fi
-    fi
-    echo "${return_mssg}"
-  fi
-  
-  # Remove the files we've left over
-  [ -f "${CURR_CP_FILE}" ] && rm -f "${CURR_CP_FILE}"
-  [ -f "${MASTER_CP_FILE}" ] && rm -f "${MASTER_CP_FILE}"
-
-  # Clean up our mess from the Maven builds just in case
-  ${MVN_BIN} clean &>/dev/null
-fi

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 284c219..62ea829 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2113,6 +2113,23 @@
           <artifactId>maven-deploy-plugin</artifactId>
           <version>2.8.2</version>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <executions>
+            <execution>
+              <id>default-cli</id>
+              <goals>
+                 <goal>build-classpath</goal>
+              </goals>
+              <configuration>
+                <!-- This includes dependencies with 'runtime' and 'compile' scopes;
+                     see the docs for includeScope for more details -->
+                <includeScope>runtime</includeScope>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
         <!-- This plugin's configuration is used to store Eclipse m2e settings only. -->
         <!-- It has no influence on the Maven build itself. -->
         <plugin>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org