You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2015/05/29 02:12:34 UTC

spark git commit: [SPARK-7853] [SQL] Fix HiveContext in Spark Shell

Repository: spark
Updated Branches:
  refs/heads/master 0077af22c -> 572b62caf


[SPARK-7853] [SQL] Fix HiveContext in Spark Shell

https://issues.apache.org/jira/browse/SPARK-7853

This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue.

Author: Yin Huai <yh...@databricks.com>

Closes #6459 from yhuai/SPARK-7853 and squashes the following commits:

37ad33e [Yin Huai] Do not use hiveQlTable at all.
47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf.
005649b [Yin Huai] Update comment.
35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions.
3737766 [Yin Huai] Recursively find all jars.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/572b62ca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/572b62ca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/572b62ca

Branch: refs/heads/master
Commit: 572b62cafe4bc7b1d464c9dcfb449c9d53456826
Parents: 0077af2
Author: Yin Huai <yh...@databricks.com>
Authored: Thu May 28 17:12:30 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Thu May 28 17:12:30 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/HiveContext.scala | 35 +++++++++++---------
 .../spark/sql/hive/HiveMetastoreCatalog.scala   | 12 +++----
 2 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 9ab98fd..2ed71d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
           "Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
           s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
       }
-      // We recursively add all jars in the class loader chain,
-      // starting from the given urlClassLoader.
-      def addJars(urlClassLoader: URLClassLoader): Array[URL] = {
-        val jarsInParent = urlClassLoader.getParent match {
-          case parent: URLClassLoader => addJars(parent)
-          case other => Array.empty[URL]
-        }
 
-        urlClassLoader.getURLs ++ jarsInParent
+      // We recursively find all jars in the class loader chain,
+      // starting from the given classLoader.
+      def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
+        case null => Array.empty[URL]
+        case urlClassLoader: URLClassLoader =>
+          urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
+        case other => allJars(other.getParent)
       }
 
-      val jars = Utils.getContextOrSparkClassLoader match {
-        case urlClassLoader: URLClassLoader => addJars(urlClassLoader)
-        case other =>
-          throw new IllegalArgumentException(
-            "Unable to locate hive jars to connect to metastore " +
-            s"using classloader ${other.getClass.getName}. " +
-            "Please set spark.sql.hive.metastore.jars")
+      val classLoader = Utils.getContextOrSparkClassLoader
+      val jars = allJars(classLoader)
+      if (jars.length == 0) {
+        throw new IllegalArgumentException(
+          "Unable to locate hive jars to connect to metastore. " +
+            "Please set spark.sql.hive.metastore.jars.")
       }
 
       logInfo(
@@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
-    hiveconf.set(key, value)
     executionHive.runSqlHive(s"SET $key=$value")
     metadataHive.runSqlHive(s"SET $key=$value")
+    // If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf),
+    // this setConf will be called in the constructor of the SQLContext.
+    // Also, calling hiveconf will create a default session containing a HiveConf, which
+    // will interfer with the creation of executionHive (which is a lazy val). So,
+    // we put hiveconf.set at the end of this method.
+    hiveconf.set(key, value)
   }
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */

http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 425a400..95117f7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation
     hiveQlTable.getMetadata
   )
 
-  implicit class SchemaAttribute(f: FieldSchema) {
+  implicit class SchemaAttribute(f: HiveColumn) {
     def toAttribute: AttributeReference = AttributeReference(
-      f.getName,
-      HiveMetastoreTypes.toDataType(f.getType),
+      f.name,
+      HiveMetastoreTypes.toDataType(f.hiveType),
       // Since data can be dumped in randomly with no validation, everything is nullable.
       nullable = true
     )(qualifiers = Seq(alias.getOrElse(tableName)))
   }
 
-  // Must be a stable value since new attributes are born here.
-  val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
+  /** PartitionKey attributes */
+  val partitionKeys = table.partitionColumns.map(_.toAttribute)
 
   /** Non-partitionKey attributes */
-  val attributes = hiveQlTable.getCols.map(_.toAttribute)
+  val attributes = table.schema.map(_.toAttribute)
 
   val output = attributes ++ partitionKeys
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org