You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/04/21 02:56:34 UTC
spark git commit: [SPARK-14775][SQL] Remove
TestHiveSparkSession.rewritePaths
Repository: spark
Updated Branches:
refs/heads/master f47dbf27f -> 24f338ba7
[SPARK-14775][SQL] Remove TestHiveSparkSession.rewritePaths
## What changes were proposed in this pull request?
The path rewrite in TestHiveSparkSession is pretty hacky. I think we can remove those complexity and just do a string replacement when we read the query files in. This would remove the overloading of runNativeSql in TestHive, which will simplify the removal of Hive specific variable substitution.
## How was this patch tested?
This is a small test refactoring to simplify test infrastructure.
Author: Reynold Xin <rx...@databricks.com>
Closes #12543 from rxin/SPARK-14775.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24f338ba
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24f338ba
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24f338ba
Branch: refs/heads/master
Commit: 24f338ba7b34df493dd49bbc354d08f5e3afbb85
Parents: f47dbf2
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed Apr 20 17:56:31 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Apr 20 17:56:31 2016 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/hive/test/TestHive.scala | 18 ------------------
.../src/test/resources/hive-test-path-helper.txt | 1 +
.../sql/hive/execution/HiveComparisonTest.scala | 14 +++++++++++++-
.../sql/hive/execution/HiveQueryFileTest.scala | 6 +++---
4 files changed, 17 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/24f338ba/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index e629099..2bb1399 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -182,19 +182,6 @@ private[hive] class TestHiveSparkSession(
Option(System.getenv(envVar)).map(new File(_))
}
- /**
- * Replaces relative paths to the parent directory "../" with hiveDevHome since this is how the
- * hive test cases assume the system is set up.
- */
- private[hive] def rewritePaths(cmd: String): String =
- if (cmd.toUpperCase contains "LOAD DATA") {
- val testDataLocation =
- hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
- cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
- } else {
- cmd
- }
-
val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
hiveFilesTemp.delete()
hiveFilesTemp.mkdir()
@@ -566,11 +553,6 @@ private[hive] class TestHiveSessionState(sparkSession: TestHiveSparkSession)
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
new TestHiveQueryExecution(sparkSession, plan)
}
-
- // Override so we can intercept relative paths and rewrite them to point at hive.
- override def runNativeSql(sql: String): Seq[String] = {
- super.runNativeSql(sparkSession.rewritePaths(substitutor.substitute(hiveconf, sql)))
- }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/24f338ba/sql/hive/src/test/resources/hive-test-path-helper.txt
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/hive-test-path-helper.txt b/sql/hive/src/test/resources/hive-test-path-helper.txt
new file mode 100644
index 0000000..356b131
--- /dev/null
+++ b/sql/hive/src/test/resources/hive-test-path-helper.txt
@@ -0,0 +1 @@
+This file is here so we can match on it and find the path to the current folder.
http://git-wip-us.apache.org/repos/asf/spark/blob/24f338ba/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index bd46cb9..994dc4a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -48,6 +48,17 @@ abstract class HiveComparisonTest
extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen {
/**
+ * Path to the test datasets. We find this by looking up "hive-test-path-helper.txt" file.
+ *
+ * Before we run the query in Spark, we replace "../../data" with this path.
+ */
+ private val testDataPath: String = {
+ Thread.currentThread.getContextClassLoader
+ .getResource("hive-test-path-helper.txt")
+ .getPath.replace("/hive-test-path-helper.txt", "/data")
+ }
+
+ /**
* When set, any cache files that result in test failures will be deleted. Used when the test
* harness or hive have been updated thus requiring new golden answers to be computed for some
* tests. Also prevents the classpath being used when looking for golden answers as these are
@@ -386,7 +397,8 @@ abstract class HiveComparisonTest
var query: TestHiveQueryExecution = null
try {
query = {
- val originalQuery = new TestHiveQueryExecution(queryString)
+ val originalQuery = new TestHiveQueryExecution(
+ queryString.replace("../../data", testDataPath))
val containsCommands = originalQuery.analyzed.collectFirst {
case _: Command => ()
case _: LogicalInsertIntoHiveTable => ()
http://git-wip-us.apache.org/repos/asf/spark/blob/24f338ba/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
index f96c989..e772324 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
@@ -40,14 +40,14 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
def testCases: Seq[(String, File)]
- val runAll =
+ val runAll: Boolean =
!(System.getProperty("spark.hive.alltests") == null) ||
runOnlyDirectories.nonEmpty ||
skipDirectories.nonEmpty
- val whiteListProperty = "spark.hive.whitelist"
+ val whiteListProperty: String = "spark.hive.whitelist"
// Allow the whiteList to be overridden by a system property
- val realWhiteList =
+ val realWhiteList: Seq[String] =
Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)
// Go through all the test cases and add them to scala test.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org