You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2016/04/08 22:59:13 UTC
spark git commit: [SPARK-14435][BUILD] Shade Kryo in our custom Hive
1.2.1 fork
Repository: spark
Updated Branches:
refs/heads/master f8c9beca3 -> 464a3c1e0
[SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork
This patch updates our custom Hive 1.2.1 fork in order to shade Kryo in Hive. This is a blocker for upgrading Spark to use Kryo 3 (see #12076).
The source for this new fork of Hive can be found at https://github.com/JoshRosen/hive/tree/release-1.2.1-spark2
Here's the complete diff from the official Hive 1.2.1 release: https://github.com/apache/hive/compare/release-1.2.1...JoshRosen:release-1.2.1-spark2
Here's the diff from the sources that pwendell used to publish the current `1.2.1.spark` release of Hive: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2. This diff looks large because his branch used a shell script to rewrite the groupId, whereas I had to commit the groupId changes in order to prevent the find-and-replace from affecting the package names in our relocated Kryo classes: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2#diff-6ada9aaec70e069df8f2c34c5519dd1e
Using these changes, I was able to publish a local version of Hive and verify that this change fixes the test failures which are blocking #12076. Note that this PR will not compile until we complete the review of the Hive POM changes and stage and publish a release.
/cc vanzin, steveloughran, and pwendell for review.
Author: Josh Rosen <jo...@databricks.com>
Closes #12215 from JoshRosen/shade-kryo-in-hive.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/464a3c1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/464a3c1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/464a3c1e
Branch: refs/heads/master
Commit: 464a3c1e02c665c7ad2709f8c47898b682526eb3
Parents: f8c9bec
Author: Josh Rosen <jo...@databricks.com>
Authored: Fri Apr 8 13:58:58 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Fri Apr 8 13:58:58 2016 -0700
----------------------------------------------------------------------
pom.xml | 2 +-
.../org/apache/spark/sql/hive/HiveShim.scala | 4 +-
.../sql/hive/ClasspathDependenciesSuite.scala | 41 +++++---------------
3 files changed, 12 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1b40983..f37a898 100644
--- a/pom.xml
+++ b/pom.xml
@@ -131,7 +131,7 @@
<curator.version>2.4.0</curator.version>
<hive.group>org.spark-project.hive</hive.group>
<!-- Version used in Maven Hive dependency -->
- <hive.version>1.2.1.spark</hive.version>
+ <hive.version>1.2.1.spark2</hive.version>
<!-- Version used for internal directory structure -->
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.10.1.1</derby.version>
http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index da91053..0d2a765 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -24,8 +24,6 @@ import scala.collection.JavaConverters._
import scala.language.implicitConversions
import scala.reflect.ClassTag
-import com.esotericsoftware.kryo.Kryo
-import com.esotericsoftware.kryo.io.{Input, Output}
import com.google.common.base.Objects
import org.apache.avro.Schema
import org.apache.hadoop.conf.Configuration
@@ -37,6 +35,8 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
import org.apache.hadoop.io.Writable
+import org.apache.hive.com.esotericsoftware.kryo.Kryo
+import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.types.Decimal
http://git-wip-us.apache.org/repos/asf/spark/blob/464a3c1e/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
index 34b2edb..f262ef6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
@@ -24,9 +24,7 @@ import org.apache.spark.SparkFunSuite
/**
* Verify that some classes load and that others are not found on the classpath.
*
- *
- * This is used to detect classpath and shading conflict, especially between
- * Spark's required Kryo version and that which can be found in some Hive versions.
+ * This is used to detect classpath and shading conflicts.
*/
class ClasspathDependenciesSuite extends SparkFunSuite {
private val classloader = this.getClass.getClassLoader
@@ -40,10 +38,6 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
classloader.loadClass(classname)
}
- private def assertLoads(classes: String*): Unit = {
- classes.foreach(assertLoads)
- }
-
private def findResource(classname: String): URL = {
val resource = resourceName(classname)
classloader.getResource(resource)
@@ -63,17 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
}
}
- private def assertClassNotFound(classes: String*): Unit = {
- classes.foreach(assertClassNotFound)
+ test("shaded Protobuf") {
+ assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
}
- private val KRYO = "com.esotericsoftware.kryo.Kryo"
-
- private val SPARK_HIVE = "org.apache.hive."
- private val SPARK_SHADED = "org.spark-project.hive.shaded."
-
- test("shaded Protobuf") {
- assertLoads(SPARK_SHADED + "com.google.protobuf.ServiceException")
+ test("shaded Kryo") {
+ assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
}
test("hive-common") {
@@ -86,25 +75,13 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
private val STD_INSTANTIATOR = "org.objenesis.strategy.StdInstantiatorStrategy"
- test("unshaded kryo") {
- assertLoads(KRYO, STD_INSTANTIATOR)
- }
-
test("Forbidden Dependencies") {
- assertClassNotFound(
- SPARK_HIVE + KRYO,
- SPARK_SHADED + KRYO,
- "org.apache.hive." + KRYO,
- "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
- SPARK_HIVE + "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
- "org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR
- )
+ assertClassNotFound("com.esotericsoftware.shaded." + STD_INSTANTIATOR)
+ assertClassNotFound("org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR)
}
test("parquet-hadoop-bundle") {
- assertLoads(
- "parquet.hadoop.ParquetOutputFormat",
- "parquet.hadoop.ParquetInputFormat"
- )
+ assertLoads("parquet.hadoop.ParquetOutputFormat")
+ assertLoads("parquet.hadoop.ParquetInputFormat")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org