You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/08/02 06:08:10 UTC
spark git commit: [SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

Repository: spark
Updated Branches:
  refs/heads/master 2eedc00b0 -> 5184df06b


[SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

## What changes were proposed in this pull request?

With SPARK-15034, we could use the value of spark.sql.warehouse.dir to set the warehouse location. In TestHive, we can now simply set the temporary warehouse path in sc's conf, and thus, param "warehousePath" could be removed.

## How was this patch tested?

exsiting testsuites.

Author: jiangxingbo <ji...@meituan.com>

Closes #14401 from jiangxb1987/warehousePath.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5184df06
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5184df06
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5184df06

Branch: refs/heads/master
Commit: 5184df06b347f86776c8ac87415b8002a5942a35
Parents: 2eedc00
Author: jiangxingbo <ji...@meituan.com>
Authored: Mon Aug 1 23:08:06 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Aug 1 23:08:06 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/test/TestHive.scala   | 42 +++++++++-----------
 .../sql/hive/execution/HiveQuerySuite.scala     |  2 +-
 .../spark/sql/sources/BucketedReadSuite.scala   |  2 +-
 3 files changed, 21 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 7f89204..fbacd59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -54,6 +54,7 @@ object TestHive
         .set("spark.sql.test", "")
         .set("spark.sql.hive.metastore.barrierPrefixes",
           "org.apache.spark.sql.hive.execution.PairSerDe")
+        .set("spark.sql.warehouse.dir", TestHiveContext.makeWarehouseDir().toURI.getPath)
         // SPARK-8910
         .set("spark.ui.enabled", "false")))
 
@@ -111,7 +112,6 @@ class TestHiveContext(
  * A [[SparkSession]] used in [[TestHiveContext]].
  *
  * @param sc SparkContext
- * @param warehousePath path to the Hive warehouse directory
  * @param scratchDirPath scratch directory used by Hive's metastore client
  * @param metastoreTemporaryConf configuration options for Hive's metastore
  * @param existingSharedState optional [[TestHiveSharedState]]
@@ -120,23 +120,15 @@ class TestHiveContext(
  */
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
-    val warehousePath: File,
     scratchDirPath: File,
     metastoreTemporaryConf: Map[String, String],
     @transient private val existingSharedState: Option[TestHiveSharedState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
-  // TODO: We need to set the temp warehouse path to sc's conf.
-  // Right now, In SparkSession, we will set the warehouse path to the default one
-  // instead of the temp one. Then, we override the setting in TestHiveSharedState
-  // when we creating metadataHive. This flow is not easy to follow and can introduce
-  // confusion when a developer is debugging an issue. We need to refactor this part
-  // to just set the temp warehouse path in sc's conf.
   def this(sc: SparkContext, loadTestTables: Boolean) {
     this(
       sc,
-      Utils.createTempDir(namePrefix = "warehouse"),
       TestHiveContext.makeScratchDir(),
       HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false),
       None,
@@ -151,16 +143,16 @@ private[hive] class TestHiveSparkSession(
   @transient
   override lazy val sharedState: TestHiveSharedState = {
     existingSharedState.getOrElse(
-      new TestHiveSharedState(sc, warehousePath, scratchDirPath, metastoreTemporaryConf))
+      new TestHiveSharedState(sc, scratchDirPath, metastoreTemporaryConf))
   }
 
   @transient
   override lazy val sessionState: TestHiveSessionState =
-    new TestHiveSessionState(self, warehousePath)
+    new TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(
-      sc, warehousePath, scratchDirPath, metastoreTemporaryConf, Some(sharedState), loadTestTables)
+      sc, scratchDirPath, metastoreTemporaryConf, Some(sharedState), loadTestTables)
   }
 
   private var cacheTables: Boolean = false
@@ -199,6 +191,12 @@ private[hive] class TestHiveSparkSession(
     new File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
   }
 
+  def getWarehousePath(): String = {
+    val tempConf = new SQLConf
+    sc.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }
+    tempConf.warehousePath
+  }
+
   val describedTable = "DESCRIBE (\\w+)".r
 
   case class TestTable(name: String, commands: (() => Unit)*)
@@ -509,21 +507,19 @@ private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {
 
 private[hive] class TestHiveSharedState(
     sc: SparkContext,
-    warehousePath: File,
     scratchDirPath: File,
     metastoreTemporaryConf: Map[String, String])
   extends HiveSharedState(sc) {
 
   override lazy val metadataHive: HiveClient = {
     TestHiveContext.newClientForMetadata(
-      sc.conf, sc.hadoopConfiguration, warehousePath, scratchDirPath, metastoreTemporaryConf)
+      sc.conf, sc.hadoopConfiguration, scratchDirPath, metastoreTemporaryConf)
   }
 }
 
 
 private[hive] class TestHiveSessionState(
-    sparkSession: TestHiveSparkSession,
-    warehousePath: File)
+    sparkSession: TestHiveSparkSession)
   extends HiveSessionState(sparkSession) { self =>
 
   override lazy val conf: SQLConf = {
@@ -533,7 +529,6 @@ private[hive] class TestHiveSessionState(
       override def clear(): Unit = {
         super.clear()
         TestHiveContext.overrideConfs.foreach { case (k, v) => setConfString(k, v) }
-        setConfString("hive.metastore.warehouse.dir", self.warehousePath.toURI.toString)
       }
     }
   }
@@ -571,13 +566,12 @@ private[hive] object TestHiveContext {
   def newClientForMetadata(
       conf: SparkConf,
       hadoopConf: Configuration,
-      warehousePath: File,
       scratchDirPath: File,
       metastoreTemporaryConf: Map[String, String]): HiveClient = {
     HiveUtils.newClientForMetadata(
       conf,
       hadoopConf,
-      hiveClientConfigurations(hadoopConf, warehousePath, scratchDirPath, metastoreTemporaryConf))
+      hiveClientConfigurations(hadoopConf, scratchDirPath, metastoreTemporaryConf))
   }
 
   /**
@@ -585,18 +579,20 @@ private[hive] object TestHiveContext {
    */
   def hiveClientConfigurations(
       hadoopConf: Configuration,
-      warehousePath: File,
       scratchDirPath: File,
       metastoreTemporaryConf: Map[String, String]): Map[String, String] = {
     HiveUtils.hiveClientConfigurations(hadoopConf) ++ metastoreTemporaryConf ++ Map(
-      // Override WAREHOUSE_PATH and METASTOREWAREHOUSE to use the given path.
-      SQLConf.WAREHOUSE_PATH.key -> warehousePath.toURI.toString,
-      ConfVars.METASTOREWAREHOUSE.varname -> warehousePath.toURI.toString,
       ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname -> "true",
       ConfVars.SCRATCHDIR.varname -> scratchDirPath.toURI.toString,
       ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY.varname -> "1")
   }
 
+  def makeWarehouseDir(): File = {
+    val warehouseDir = Utils.createTempDir(namePrefix = "warehouse")
+    warehouseDir.delete()
+    warehouseDir
+  }
+
   def makeScratchDir(): File = {
     val scratchDir = Utils.createTempDir(namePrefix = "scratch")
     scratchDir.delete()

http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 31283b9..6785167 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -964,7 +964,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
         .mkString("/")
 
       // Loads partition data to a temporary table to verify contents
-      val path = s"${sparkSession.warehousePath}/dynamic_part_table/$partFolder/part-00000"
+      val path = s"${sparkSession.getWarehousePath}/dynamic_part_table/$partFolder/part-00000"
 
       sql("DROP TABLE IF EXISTS dp_verify")
       sql("CREATE TABLE dp_verify(intcol INT)")

http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index e461490..8d161a3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -353,7 +353,7 @@ class BucketedReadSuite extends QueryTest with SQLTestUtils with TestHiveSinglet
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
       val tableDir = new File(hiveContext
-        .sparkSession.warehousePath, "bucketed_table")
+        .sparkSession.getWarehousePath, "bucketed_table")
       Utils.deleteRecursively(tableDir)
       df1.write.parquet(tableDir.getAbsolutePath)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org