You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/01/03 02:25:16 UTC
[spark] branch branch-3.0 updated: [SPARK-33963][SQL] Canonicalize
`HiveTableRelation` w/o table stats
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new dda431a [SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats
dda431a is described below
commit dda431ad974802a8f6413c9aac267373634da0fe
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Sun Jan 3 11:23:46 2021 +0900
[SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats
### What changes were proposed in this pull request?
Skip table stats in canonicalizing of `HiveTableRelation`.
### Why are the changes needed?
The changes fix a regression comparing to Spark 3.0, see SPARK-33963.
### Does this PR introduce _any_ user-facing change?
Yes. After changes Spark behaves as in the version 3.0.1.
### How was this patch tested?
By running new UT:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite"
```
Closes #30995 from MaxGekk/fix-caching-hive-table.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
(cherry picked from commit fc7d0165d29e04a8e78577c853a701bdd8a2af4a)
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../org/apache/spark/sql/catalyst/catalog/interface.scala | 3 ++-
.../scala/org/apache/spark/sql/hive/CachedTableSuite.scala | 10 ++++++++++
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 4e63ee7..7d9b57a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -676,7 +676,8 @@ case class HiveTableRelation(
},
partitionCols = partitionCols.zipWithIndex.map {
case (attr, index) => attr.withExprId(ExprId(index + dataCols.length))
- }
+ },
+ tableStats = None
)
override def computeStats(): Statistics = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fc79353..6c0ab2f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -429,4 +429,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
}
}
}
+
+ test("SPARK-33963: do not use table stats while looking in table cache") {
+ val t = "table_on_test"
+ withTable(t) {
+ sql(s"CREATE TABLE $t (col int)")
+ assert(!spark.catalog.isCached(t))
+ sql(s"CACHE TABLE $t")
+ assert(spark.catalog.isCached(t))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org