You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/01/03 02:24:40 UTC

[spark] branch branch-3.1 updated: [SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 1a4b6f4  [SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats
1a4b6f4 is described below

commit 1a4b6f46dd1a064b05190db9f59cff50c598f368
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Sun Jan 3 11:23:46 2021 +0900

    [SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats
    
    ### What changes were proposed in this pull request?
    Skip table stats in canonicalizing of `HiveTableRelation`.
    
    ### Why are the changes needed?
    The changes fix a regression comparing to Spark 3.0, see SPARK-33963.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After changes Spark behaves as in the version 3.0.1.
    
    ### How was this patch tested?
    By running new UT:
    ```
    $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite"
    ```
    
    Closes #30995 from MaxGekk/fix-caching-hive-table.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit fc7d0165d29e04a8e78577c853a701bdd8a2af4a)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../org/apache/spark/sql/catalyst/catalog/interface.scala      |  3 ++-
 .../scala/org/apache/spark/sql/hive/CachedTableSuite.scala     | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9876ee3..0c40afa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -747,7 +747,8 @@ case class HiveTableRelation(
     },
     partitionCols = partitionCols.zipWithIndex.map {
       case (attr, index) => attr.withExprId(ExprId(index + dataCols.length))
-    }
+    },
+    tableStats = None
   )
 
   override def computeStats(): Statistics = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 81c3f27..ea79c53 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -429,4 +429,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       }
     }
   }
+
+  test("SPARK-33963: do not use table stats while looking in table cache") {
+    val t = "table_on_test"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (col int)")
+      assert(!spark.catalog.isCached(t))
+      sql(s"CACHE TABLE $t")
+      assert(spark.catalog.isCached(t))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org