You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/04/14 01:02:22 UTC
spark git commit: [SPARK-5941] [SQL] Unit Test loads the table `src` twice for leftsemijoin.q

Repository: spark
Updated Branches:
  refs/heads/master e63a86abe -> c5602bdc3


[SPARK-5941] [SQL] Unit Test loads the table `src` twice for leftsemijoin.q

In `leftsemijoin.q`, there is a data loading command for table `sales` already, but in `TestHive`, it also created the table `sales`, which causes duplicated records inserted into the `sales`.

Author: Cheng Hao <ha...@intel.com>

Closes #4506 from chenghao-intel/df_table and squashes the following commits:

0be05f7 [Cheng Hao] Remove the table `sales` creating from TestHive


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5602bdc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5602bdc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5602bdc

Branch: refs/heads/master
Commit: c5602bdc310cc8f82dc304500bebe40217cba785
Parents: e63a86a
Author: Cheng Hao <ha...@intel.com>
Authored: Mon Apr 13 16:02:18 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Apr 13 16:02:18 2015 -0700

----------------------------------------------------------------------
 .../sql/columnar/InMemoryColumnarQuerySuite.scala    |  2 +-
 .../org/apache/spark/sql/hive/test/TestHive.scala    |  6 ------
 .../leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea |  2 --
 .../leftsemijoin-8-73cad58a10a1483ccb15e94a857013    |  2 --
 .../spark/sql/hive/execution/HiveSerDeSuite.scala    | 15 +++++++++++----
 5 files changed, 12 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c5602bdc/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index 27dfabc..479210d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -42,7 +42,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
       .toDF().registerTempTable("sizeTst")
     cacheTable("sizeTst")
     assert(
-      table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
+      table("sizeTst").queryExecution.analyzed.statistics.sizeInBytes >
         conf.autoBroadcastJoinThreshold)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/c5602bdc/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a3497ea..6570fa1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -262,12 +262,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
          |WITH SERDEPROPERTIES ('field.delim'='\\t')
        """.stripMargin.cmd,
       "INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
-    TestTable("sales",
-      s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
-         |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
-         |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
-       """.stripMargin.cmd,
-      s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
     TestTable("episodes",
       s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
          |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'

http://git-wip-us.apache.org/repos/asf/spark/blob/c5602bdc/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
index 25ce912..a1963ba 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
@@ -1,4 +1,2 @@
 Hank	2
-Hank	2
-Joe	2
 Joe	2

http://git-wip-us.apache.org/repos/asf/spark/blob/c5602bdc/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013 b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
index 25ce912..a1963ba 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
@@ -1,4 +1,2 @@
 Hank	2
-Hank	2
-Joe	2
 Joe	2

http://git-wip-us.apache.org/repos/asf/spark/blob/c5602bdc/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index d05e11f..5586a79 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -25,18 +25,25 @@ import org.apache.spark.sql.hive.test.TestHive
  * A set of tests that validates support for Hive SerDe.
  */
 class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
-
   override def beforeAll(): Unit = {
+    import TestHive._
+    import org.apache.hadoop.hive.serde2.RegexSerDe
+      super.beforeAll()
     TestHive.cacheTables = false
-    super.beforeAll()
+    sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
+       |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
+       |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
+       """.stripMargin)
+    sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales")
   }
 
+  // table sales is not a cache table, and will be clear after reset
+  createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false)
+
   createQueryTest(
     "Read and write with LazySimpleSerDe (tab separated)",
     "SELECT * from serdeins")
 
-  createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
-
   createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
 
   createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org