You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2015/06/20 02:34:15 UTC

spark git commit: [SPARK-8498] [SQL] Add regression test for SPARK-8470

Repository: spark
Updated Branches:
  refs/heads/master b305e377f -> 093c34838


[SPARK-8498] [SQL] Add regression test for SPARK-8470

**Summary of the problem in SPARK-8470.** When using `HiveContext` to create a data frame of a user case class, Spark throws `scala.reflect.internal.MissingRequirementError` when it tries to infer the schema using reflection. This is caused by `HiveContext` silently overwriting the context class loader containing the user classes.

**What this issue is about.** This issue adds regression tests for SPARK-8470, which is already fixed in #6891. We closed SPARK-8470 as a duplicate because it is a different manifestation of the same problem in SPARK-8368. Due to the complexity of the reproduction, this requires us to pre-package a special test jar and include it in the Spark project itself.

I tested this with and without the fix in #6891 and verified that it passes only if the fix is present.

Author: Andrew Or <an...@databricks.com>

Closes #6909 from andrewor14/SPARK-8498 and squashes the following commits:

5e9d688 [Andrew Or] Add regression test for SPARK-8470


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/093c3483
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/093c3483
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/093c3483

Branch: refs/heads/master
Commit: 093c34838d1db7a9375f36a9a2ab5d96a23ae683
Parents: b305e37
Author: Andrew Or <an...@databricks.com>
Authored: Fri Jun 19 17:34:09 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Fri Jun 19 17:34:09 2015 -0700

----------------------------------------------------------------------
 .../regression-test-SPARK-8498/Main.scala       |  43 +++++++++++++++++++
 .../MyCoolClass.scala                           |  20 +++++++++
 .../regression-test-SPARK-8498/test.jar         | Bin 0 -> 6811 bytes
 .../spark/sql/hive/HiveSparkSubmitSuite.scala   |  13 ++++++
 4 files changed, 76 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/093c3483/sql/hive/src/test/resources/regression-test-SPARK-8498/Main.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8498/Main.scala b/sql/hive/src/test/resources/regression-test-SPARK-8498/Main.scala
new file mode 100644
index 0000000..858dd6b
--- /dev/null
+++ b/sql/hive/src/test/resources/regression-test-SPARK-8498/Main.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.hive.HiveContext
+
+/**
+ * Entry point in test application for SPARK-8498.
+ *
+ * This file is not meant to be compiled during tests. It is already included
+ * in a pre-built "test.jar" located in the same directory as this file.
+ * This is included here for reference only and should NOT be modified without
+ * rebuilding the test jar itself.
+ *
+ * This is used in org.apache.spark.sql.hive.HiveSparkSubmitSuite.
+ */
+object Main {
+  def main(args: Array[String]) {
+    println("Running regression test for SPARK-8498.")
+    val sc = new SparkContext("local", "testing")
+    val hc = new HiveContext(sc)
+    // This line should not throw scala.reflect.internal.MissingRequirementError.
+    // See SPARK-8470 for more detail.
+    val df = hc.createDataFrame(Seq(MyCoolClass("1", "2", "3")))
+    df.collect()
+    println("Regression test for SPARK-8498 success!")
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/093c3483/sql/hive/src/test/resources/regression-test-SPARK-8498/MyCoolClass.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8498/MyCoolClass.scala b/sql/hive/src/test/resources/regression-test-SPARK-8498/MyCoolClass.scala
new file mode 100644
index 0000000..a72c063
--- /dev/null
+++ b/sql/hive/src/test/resources/regression-test-SPARK-8498/MyCoolClass.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Dummy class used in regression test SPARK-8498. */
+case class MyCoolClass(past: String, present: String, future: String)
+

http://git-wip-us.apache.org/repos/asf/spark/blob/093c3483/sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar b/sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar
new file mode 100644
index 0000000..4f59fba
Binary files /dev/null and b/sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar differ

http://git-wip-us.apache.org/repos/asf/spark/blob/093c3483/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 7963abf..820af80 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -35,6 +35,8 @@ class HiveSparkSubmitSuite
   with ResetSystemProperties
   with Timeouts {
 
+  // TODO: rewrite these or mark them as slow tests to be run sparingly
+
   def beforeAll() {
     System.setProperty("spark.testing", "true")
   }
@@ -65,6 +67,17 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
+  test("SPARK-8498: MissingRequirementError during reflection") {
+    // This test uses a pre-built jar to test SPARK-8498. In a nutshell, this test creates
+    // a HiveContext and uses it to create a data frame from an RDD using reflection.
+    // Before the fix in SPARK-8470, this results in a MissingRequirementError because
+    // the HiveContext code mistakenly overrides the class loader that contains user classes.
+    // For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8498/*scala.
+    val testJar = "sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar"
+    val args = Seq("--class", "Main", testJar)
+    runSparkSubmit(args)
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   // This is copied from org.apache.spark.deploy.SparkSubmitSuite
   private def runSparkSubmit(args: Seq[String]): Unit = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org