You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2016/04/08 22:59:13 UTC

spark git commit: [SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork

Repository: spark
Updated Branches:
  refs/heads/master f8c9beca3 -> 464a3c1e0


[SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork

This patch updates our custom Hive 1.2.1 fork in order to shade Kryo in Hive. This is a blocker for upgrading Spark to use Kryo 3 (see #12076).

The source for this new fork of Hive can be found at https://github.com/JoshRosen/hive/tree/release-1.2.1-spark2

Here's the complete diff from the official Hive 1.2.1 release: https://github.com/apache/hive/compare/release-1.2.1...JoshRosen:release-1.2.1-spark2

Here's the diff from the sources that pwendell used to publish the current `1.2.1.spark` release of Hive: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2. This diff looks large because his branch used a shell script to rewrite the groupId, whereas I had to commit the groupId changes in order to prevent the find-and-replace from affecting the package names in our relocated Kryo classes: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2#diff-6ada9aaec70e069df8f2c34c5519dd1e

Using these changes, I was able to publish a local version of Hive and verify that this change fixes the test failures which are blocking #12076. Note that this PR will not compile until we complete the review of the Hive POM changes and stage and publish a release.

/cc vanzin, steveloughran, and pwendell for review.

Author: Josh Rosen <jo...@databricks.com>

Closes #12215 from JoshRosen/shade-kryo-in-hive.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/464a3c1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/464a3c1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/464a3c1e

Branch: refs/heads/master
Commit: 464a3c1e02c665c7ad2709f8c47898b682526eb3
Parents: f8c9bec
Author: Josh Rosen <jo...@databricks.com>
Authored: Fri Apr 8 13:58:58 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Fri Apr 8 13:58:58 2016 -0700

----------------------------------------------------------------------
 pom.xml                                         |  2 +-
 .../org/apache/spark/sql/hive/HiveShim.scala    |  4 +-
 .../sql/hive/ClasspathDependenciesSuite.scala   | 41 +++++---------------
 3 files changed, 12 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1b40983..f37a898 100644
--- a/pom.xml
+++ b/pom.xml
@@ -131,7 +131,7 @@
     <curator.version>2.4.0</curator.version>
     <hive.group>org.spark-project.hive</hive.group>
     <!-- Version used in Maven Hive dependency -->
-    <hive.version>1.2.1.spark</hive.version>
+    <hive.version>1.2.1.spark2</hive.version>
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <derby.version>10.10.1.1</derby.version>

http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index da91053..0d2a765 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -24,8 +24,6 @@ import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
-import com.esotericsoftware.kryo.Kryo
-import com.esotericsoftware.kryo.io.{Input, Output}
 import com.google.common.base.Objects
 import org.apache.avro.Schema
 import org.apache.hadoop.conf.Configuration
@@ -37,6 +35,8 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
 import org.apache.hadoop.io.Writable
+import org.apache.hive.com.esotericsoftware.kryo.Kryo
+import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.types.Decimal

http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
index 34b2edb..f262ef6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
@@ -24,9 +24,7 @@ import org.apache.spark.SparkFunSuite
 /**
  * Verify that some classes load and that others are not found on the classpath.
  *
- *
- * This is used to detect classpath and shading conflict, especially between
- * Spark's required Kryo version and that which can be found in some Hive versions.
+ * This is used to detect classpath and shading conflicts.
  */
 class ClasspathDependenciesSuite extends SparkFunSuite {
   private val classloader = this.getClass.getClassLoader
@@ -40,10 +38,6 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
     classloader.loadClass(classname)
   }
 
-  private def assertLoads(classes: String*): Unit = {
-    classes.foreach(assertLoads)
-  }
-
   private def findResource(classname: String): URL = {
     val resource = resourceName(classname)
     classloader.getResource(resource)
@@ -63,17 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
     }
   }
 
-  private def assertClassNotFound(classes: String*): Unit = {
-    classes.foreach(assertClassNotFound)
+  test("shaded Protobuf") {
+    assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
   }
 
-  private val KRYO = "com.esotericsoftware.kryo.Kryo"
-
-  private val SPARK_HIVE = "org.apache.hive."
-  private val SPARK_SHADED = "org.spark-project.hive.shaded."
-
-  test("shaded Protobuf") {
-    assertLoads(SPARK_SHADED + "com.google.protobuf.ServiceException")
+  test("shaded Kryo") {
+    assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
   }
 
   test("hive-common") {
@@ -86,25 +75,13 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
 
   private val STD_INSTANTIATOR = "org.objenesis.strategy.StdInstantiatorStrategy"
 
-  test("unshaded kryo") {
-    assertLoads(KRYO, STD_INSTANTIATOR)
-  }
-
   test("Forbidden Dependencies") {
-    assertClassNotFound(
-      SPARK_HIVE + KRYO,
-      SPARK_SHADED + KRYO,
-      "org.apache.hive." + KRYO,
-      "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
-      SPARK_HIVE + "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
-      "org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR
-    )
+    assertClassNotFound("com.esotericsoftware.shaded." + STD_INSTANTIATOR)
+    assertClassNotFound("org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR)
   }
 
   test("parquet-hadoop-bundle") {
-    assertLoads(
-      "parquet.hadoop.ParquetOutputFormat",
-      "parquet.hadoop.ParquetInputFormat"
-    )
+    assertLoads("parquet.hadoop.ParquetOutputFormat")
+    assertLoads("parquet.hadoop.ParquetInputFormat")
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org