You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/03/03 00:18:27 UTC

git commit: SPARK-1121: Include avro for yarn-alpha builds

Repository: spark
Updated Branches:
  refs/heads/master fd31adbf2 -> c3f5e0753


SPARK-1121: Include avro for yarn-alpha builds

This lets us explicitly include Avro based on a profile for 0.23.X
builds. It makes me sad how convoluted it is to express this logic
in Maven. @tgraves and @sryza curious if this works for you.

I'm also considering just reverting to how it was before. The only
real problem was that Spark advertised a dependency on Avro
even though it only really depends transitively on Avro through
other deps.

Author: Patrick Wendell <pw...@gmail.com>

Closes #49 from pwendell/avro-build-fix and squashes the following commits:

8d6ee92 [Patrick Wendell] SPARK-1121: Add avro to yarn-alpha profile


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3f5e075
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3f5e075
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3f5e075

Branch: refs/heads/master
Commit: c3f5e075335a65ea522b2f76716921ec056c52ed
Parents: fd31adb
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sun Mar 2 15:18:19 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sun Mar 2 15:18:19 2014 -0800

----------------------------------------------------------------------
 bagel/pom.xml               | 14 ++++++++++++++
 core/pom.xml                | 14 ++++++++++++++
 docs/building-with-maven.md |  4 ----
 examples/pom.xml            | 14 ++++++++++++++
 external/flume/pom.xml      | 14 ++++++++++++++
 external/kafka/pom.xml      | 14 ++++++++++++++
 external/mqtt/pom.xml       | 14 ++++++++++++++
 external/twitter/pom.xml    | 14 ++++++++++++++
 external/zeromq/pom.xml     | 14 ++++++++++++++
 graphx/pom.xml              | 14 ++++++++++++++
 mllib/pom.xml               | 14 ++++++++++++++
 pom.xml                     | 15 +++++++++++++++
 repl/pom.xml                | 14 ++++++++++++++
 streaming/pom.xml           | 14 ++++++++++++++
 tools/pom.xml               | 14 ++++++++++++++
 yarn/alpha/pom.xml          | 14 ++++++++++++++
 yarn/pom.xml                | 17 +++++++++--------
 yarn/stable/pom.xml         | 14 ++++++++++++++
 18 files changed, 234 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/bagel/pom.xml
----------------------------------------------------------------------
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 355f437..41aacbd 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 2afd250..99c8414 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -31,6 +31,20 @@
     <name>Spark Project Core</name>
     <url>http://spark.apache.org/</url>
 
+    <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+         a Hadoop 0.23.X issue -->
+    <profiles>
+      <profile>
+        <id>yarn-alpha</id>
+        <dependencies>
+           <dependency>
+             <groupId>org.apache.avro</groupId>
+             <artifactId>avro</artifactId>
+           </dependency>
+        </dependencies>
+      </profile>
+    </profiles>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.hadoop</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/docs/building-with-maven.md
----------------------------------------------------------------------
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 40cac8e..ded1292 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -76,7 +76,3 @@ The maven build includes support for building a Debian package containing the as
     $ mvn -Pdeb -DskipTests clean package
 
 The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
-
-## A note about Hadoop version 0.23.x
-
-For building spark with hadoop 0.23.x and also yarn, you will have to manually add a dependency on avro (org.apache.avro, avro, 1.7.4).

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index d952e2c..3aba343 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>apache-repo</id>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/external/flume/pom.xml
----------------------------------------------------------------------
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index bc00ab4..8783aea 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Flume</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/external/kafka/pom.xml
----------------------------------------------------------------------
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 979eb0c..79dc38f 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Kafka</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/external/mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 2c476b4..06c751d 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External MQTT</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>mqtt-repo</id>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/external/twitter/pom.xml
----------------------------------------------------------------------
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index a443459..37bb4fa 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Twitter</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/external/zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index a40e558..65ec0e2 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External ZeroMQ</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/graphx/pom.xml
----------------------------------------------------------------------
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 2b4d674..5b54dd2 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project GraphX</name>
   <url>http://spark-project.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index c0e745d..760a2a8 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7e04efa..7e28d7c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -426,6 +426,21 @@
         </exclusions>
       </dependency>
       <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro</artifactId>
+        <version>1.7.4</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
         <groupId>net.java.dev.jets3t</groupId>
         <artifactId>jets3t</artifactId>
         <version>0.7.1</version>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/repl/pom.xml
----------------------------------------------------------------------
diff --git a/repl/pom.xml b/repl/pom.xml
index 3a6baf5..aa01a17 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project REPL</name>
   <url>http://spark.apache.org/</url>
 
+  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+       a Hadoop 0.23.X issue -->
+  <profiles>
+    <profile>
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <properties>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 1f3366e..91d6a13 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Streaming</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>apache-repo</id>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/tools/pom.xml
----------------------------------------------------------------------
diff --git a/tools/pom.xml b/tools/pom.xml
index 67f2d78..b8dd255 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -30,6 +30,20 @@
   <name>Spark Project Tools</name>
   <url>http://spark.apache.org/</url>
 
+  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+       a Hadoop 0.23.X issue -->
+  <profiles>
+    <profile>
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/yarn/alpha/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
index e076ca1..bfe12ec 100644
--- a/yarn/alpha/pom.xml
+++ b/yarn/alpha/pom.xml
@@ -24,6 +24,20 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn-alpha_2.10</artifactId>
   <packaging>jar</packaging>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/yarn/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/pom.xml b/yarn/pom.xml
index be51679..35e3176 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -53,14 +53,6 @@
       <version>${yarn.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-ipc</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
@@ -78,6 +70,15 @@
       <modules>
         <module>alpha</module>
       </modules>
+
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
     </profile>
 
     <profile>

http://git-wip-us.apache.org/repos/asf/spark/blob/c3f5e075/yarn/stable/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 0780f25..9d68603 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -24,6 +24,20 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn_2.10</artifactId>
   <packaging>jar</packaging>