You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2015/05/29 02:12:34 UTC
spark git commit: [SPARK-7853] [SQL] Fix HiveContext in Spark Shell
Repository: spark
Updated Branches:
refs/heads/master 0077af22c -> 572b62caf
[SPARK-7853] [SQL] Fix HiveContext in Spark Shell
https://issues.apache.org/jira/browse/SPARK-7853
This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue.
Author: Yin Huai <yh...@databricks.com>
Closes #6459 from yhuai/SPARK-7853 and squashes the following commits:
37ad33e [Yin Huai] Do not use hiveQlTable at all.
47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf.
005649b [Yin Huai] Update comment.
35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions.
3737766 [Yin Huai] Recursively find all jars.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/572b62ca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/572b62ca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/572b62ca
Branch: refs/heads/master
Commit: 572b62cafe4bc7b1d464c9dcfb449c9d53456826
Parents: 0077af2
Author: Yin Huai <yh...@databricks.com>
Authored: Thu May 28 17:12:30 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Thu May 28 17:12:30 2015 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/hive/HiveContext.scala | 35 +++++++++++---------
.../spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++----
2 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 9ab98fd..2ed71d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
"Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
}
- // We recursively add all jars in the class loader chain,
- // starting from the given urlClassLoader.
- def addJars(urlClassLoader: URLClassLoader): Array[URL] = {
- val jarsInParent = urlClassLoader.getParent match {
- case parent: URLClassLoader => addJars(parent)
- case other => Array.empty[URL]
- }
- urlClassLoader.getURLs ++ jarsInParent
+ // We recursively find all jars in the class loader chain,
+ // starting from the given classLoader.
+ def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
+ case null => Array.empty[URL]
+ case urlClassLoader: URLClassLoader =>
+ urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
+ case other => allJars(other.getParent)
}
- val jars = Utils.getContextOrSparkClassLoader match {
- case urlClassLoader: URLClassLoader => addJars(urlClassLoader)
- case other =>
- throw new IllegalArgumentException(
- "Unable to locate hive jars to connect to metastore " +
- s"using classloader ${other.getClass.getName}. " +
- "Please set spark.sql.hive.metastore.jars")
+ val classLoader = Utils.getContextOrSparkClassLoader
+ val jars = allJars(classLoader)
+ if (jars.length == 0) {
+ throw new IllegalArgumentException(
+ "Unable to locate hive jars to connect to metastore. " +
+ "Please set spark.sql.hive.metastore.jars.")
}
logInfo(
@@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
override def setConf(key: String, value: String): Unit = {
super.setConf(key, value)
- hiveconf.set(key, value)
executionHive.runSqlHive(s"SET $key=$value")
metadataHive.runSqlHive(s"SET $key=$value")
+ // If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf),
+ // this setConf will be called in the constructor of the SQLContext.
+ // Also, calling hiveconf will create a default session containing a HiveConf, which
+ // will interfer with the creation of executionHive (which is a lazy val). So,
+ // we put hiveconf.set at the end of this method.
+ hiveconf.set(key, value)
}
/* A catalyst metadata catalog that points to the Hive Metastore. */
http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 425a400..95117f7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation
hiveQlTable.getMetadata
)
- implicit class SchemaAttribute(f: FieldSchema) {
+ implicit class SchemaAttribute(f: HiveColumn) {
def toAttribute: AttributeReference = AttributeReference(
- f.getName,
- HiveMetastoreTypes.toDataType(f.getType),
+ f.name,
+ HiveMetastoreTypes.toDataType(f.hiveType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifiers = Seq(alias.getOrElse(tableName)))
}
- // Must be a stable value since new attributes are born here.
- val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
+ /** PartitionKey attributes */
+ val partitionKeys = table.partitionColumns.map(_.toAttribute)
/** Non-partitionKey attributes */
- val attributes = hiveQlTable.getCols.map(_.toAttribute)
+ val attributes = table.schema.map(_.toAttribute)
val output = attributes ++ partitionKeys
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org