You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/05/16 09:12:53 UTC

[spark] branch master updated: [SPARK-31289][TEST][TEST-HIVE1.2] Eliminate org.apache.spark.sql.hive.thriftserver.CliSuite flakiness

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d66085  [SPARK-31289][TEST][TEST-HIVE1.2] Eliminate org.apache.spark.sql.hive.thriftserver.CliSuite flakiness
1d66085 is described below

commit 1d66085a93a875247f19d710a5b5458ce1842c73
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Sat May 16 09:11:21 2020 +0000

    [SPARK-31289][TEST][TEST-HIVE1.2] Eliminate org.apache.spark.sql.hive.thriftserver.CliSuite flakiness
    
    ### What changes were proposed in this pull request?
    
    CliSuite seems to be flaky while using metastoreDir per test.
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/120470/testReport/org.apache.spark.sql.hive.thriftserver/CliSuite/
    
    https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/120470/testReport/junit/org.apache.spark.sql.hive.thriftserver/CliSuite/history/
    
    According to the error stack trace in the failed test, the test failed to instantiate a hive metastore client because of derby requirements.
    ```scala
    Caused by: ERROR XBM0A:
     The database directory '/home/jenkins/workspace/SparkPullRequestBuilder/target/tmp/spark-9249ce52-0a06-42b6-a3df-e6295e880df0' exists.
    However, it does not contain the expected 'service.properties' file.
    Perhaps Derby was brought down in the middle of creating this database.
    You may want to delete this directory and try creating the database again.
    ```
    
    The derby requires the metastore dir does not exist, but it does exist probably due to the test case before it failed to clear the metastore dir
    
    In this PR, the metastore is shared across the tests of CliSuite except those explicitly asked a separated metastore env itself
    
    ### Why are the changes needed?
    
    CliSuite seems to be flaky while using metastoreDir per test.
     To eliminate test flakiness
    ### Does this PR introduce any user-facing change?
    
    no
    ### How was this patch tested?
    
    modified test
    
    Closes #28055 from yaooqinn/clisuite.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/hive/thriftserver/CliSuite.scala     | 96 +++++++++++++---------
 1 file changed, 59 insertions(+), 37 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 17df955..ea1a371 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -39,7 +39,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
 /**
  * A test suite for the `spark-sql` CLI tool.
  */
-class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterEach with Logging {
+class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   val warehousePath = Utils.createTempDir()
   val metastorePath = Utils.createTempDir()
   val scratchDirPath = Utils.createTempDir()
@@ -62,12 +62,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
     }
   }
 
-  override def afterEach(): Unit = {
-    // Only running `runCliWithin` in a single test case will share the same temporary
-    // Hive metastore
-    Utils.deleteRecursively(metastorePath)
-  }
-
   /**
    * Run a CLI operation and expect all the queries and expected answers to be returned.
    *
@@ -77,6 +71,12 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
    *                       is taken as an immediate error condition. That is: if a line containing
    *                       with one of these strings is found, fail the test immediately.
    *                       The default value is `Seq("Error:")`
+   * @param maybeWarehouse an option for warehouse path, which will be set via
+   *                       `hive.metastore.warehouse.dir`.
+   * @param useExternalHiveFile whether to load the hive-site.xml from `src/test/noclasspath` or
+   *                            not, disabled by default
+   * @param metastore which path the embedded derby database for metastore locates. Use the the
+   *                  global `metastorePath` by default
    * @param queriesAndExpectedAnswers one or more tuples of query + answer
    */
   def runCliWithin(
@@ -84,7 +84,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
       extraArgs: Seq[String] = Seq.empty,
       errorResponses: Seq[String] = Seq("Error:"),
       maybeWarehouse: Option[File] = Some(warehousePath),
-      useExternalHiveFile: Boolean = false)(
+      useExternalHiveFile: Boolean = false,
+      metastore: File = metastorePath)(
       queriesAndExpectedAnswers: (String, String)*): Unit = {
 
     // Explicitly adds ENTER for each statement to make sure they are actually entered into the CLI.
@@ -116,7 +117,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
       maybeWarehouse.map(dir => s"--hiveconf ${ConfVars.METASTOREWAREHOUSE}=$dir").getOrElse("")
     val command = {
       val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator)
-      val jdbcUrl = s"jdbc:derby:;databaseName=$metastorePath;create=true"
+      val jdbcUrl = s"jdbc:derby:;databaseName=$metastore;create=true"
       s"""$cliScript
          |  --master local
          |  --driver-java-options -Dderby.system.durability=test
@@ -202,9 +203,18 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
   }
 
   test("load warehouse dir from hive-site.xml") {
-    runCliWithin(1.minute, maybeWarehouse = None, useExternalHiveFile = true)(
-      "desc database default;" -> "hive_one",
-      "set spark.sql.warehouse.dir;" -> "hive_one")
+    val metastore = Utils.createTempDir()
+    metastore.delete()
+    try {
+      runCliWithin(1.minute,
+        maybeWarehouse = None,
+        useExternalHiveFile = true,
+        metastore = metastore)(
+        "desc database default;" -> "hive_one",
+        "set spark.sql.warehouse.dir;" -> "hive_one")
+    } finally {
+      Utils.deleteRecursively(metastore)
+    }
   }
 
   test("load warehouse dir from --hiveconf") {
@@ -218,35 +228,47 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
 
   test("load warehouse dir from --conf spark(.hadoop).hive.*") {
     // override conf from hive-site.xml
-    runCliWithin(
-      2.minute,
-      extraArgs = Seq("--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"),
-      maybeWarehouse = None,
-      useExternalHiveFile = true)(
-      "desc database default;" -> sparkWareHouseDir.getAbsolutePath,
-      "create database cliTestDb;" -> "",
-      "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath,
-      "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath)
-
-    // override conf from --hiveconf too
-    runCliWithin(
-      2.minute,
-      extraArgs = Seq("--conf", s"spark.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"))(
-      "desc database default;" -> sparkWareHouseDir.getAbsolutePath,
-      "create database cliTestDb;" -> "",
-      "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath,
-      "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath)
+    val metastore = Utils.createTempDir()
+    metastore.delete()
+    try {
+      runCliWithin(2.minute,
+        extraArgs =
+          Seq("--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"),
+        maybeWarehouse = None,
+        useExternalHiveFile = true,
+        metastore = metastore)(
+        "desc database default;" -> sparkWareHouseDir.getAbsolutePath,
+        "create database cliTestDb;" -> "",
+        "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath,
+        "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath)
+
+      // override conf from --hiveconf too
+      runCliWithin(2.minute,
+        extraArgs = Seq("--conf", s"spark.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"),
+        metastore = metastore)(
+        "desc database default;" -> sparkWareHouseDir.getAbsolutePath,
+        "create database cliTestDb;" -> "",
+        "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath,
+        "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath)
+    } finally {
+      Utils.deleteRecursively(metastore)
+    }
   }
 
   test("load warehouse dir from spark.sql.warehouse.dir") {
     // spark.sql.warehouse.dir overrides all hive ones
-    runCliWithin(
-      2.minute,
-      extraArgs =
-        Seq("--conf",
-          s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}1",
-          "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"))(
-      "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1"))
+    val metastore = Utils.createTempDir()
+    metastore.delete()
+    try {
+      runCliWithin(2.minute,
+        extraArgs = Seq(
+            "--conf", s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}1",
+            "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"),
+        metastore = metastore)(
+        "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1"))
+    } finally {
+      Utils.deleteRecursively(metastore)
+    }
   }
 
   test("Simple commands") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org