You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2020/11/10 12:33:48 UTC
[spark] branch branch-2.4 updated: [SPARK-33372][SQL][2.4] Fix InSet bucket pruning

This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new efceeee  [SPARK-33372][SQL][2.4] Fix InSet bucket pruning
efceeee is described below

commit efceeeed7ebfd498b3010ef35d2c1388c2319c53
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Tue Nov 10 20:30:53 2020 +0800

    [SPARK-33372][SQL][2.4] Fix InSet bucket pruning
    
    ### What changes were proposed in this pull request?
    
    This is a backport of #30279.
    
    This pr fix `InSet` bucket pruning because of it's values should not be `Literal`:
    https://github.com/apache/spark/blob/cbd3fdea62dab73fc4a96702de8fd1f07722da66/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala#L253-L255
    
    ### Why are the changes needed?
    
    Fix bug.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Unit test
    
    Closes #30308 from wangyum/SPARK-33372-2.4.
    
    Authored-by: Yuming Wang <yu...@ebay.com>
    Signed-off-by: Yuming Wang <yu...@ebay.com>
---
 .../apache/spark/sql/execution/datasources/FileSourceStrategy.scala  | 5 ++---
 .../test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala  | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index fe27b78..9467293 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -89,9 +89,8 @@ object FileSourceStrategy extends Strategy with Logging {
       case expressions.In(a: Attribute, list)
         if list.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName =>
         getBucketSetFromIterable(a, list.map(e => e.eval(EmptyRow)))
-      case expressions.InSet(a: Attribute, hset)
-        if hset.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName =>
-        getBucketSetFromIterable(a, hset.map(e => expressions.Literal(e).eval(EmptyRow)))
+      case expressions.InSet(a: Attribute, hset) if a.name == bucketColumnName =>
+        getBucketSetFromIterable(a, hset)
       case expressions.IsNull(a: Attribute) if a.name == bucketColumnName =>
         getBucketSetFromValue(a, null)
       case expressions.And(left, right) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 42443b0..c01b7db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -173,7 +173,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
           df)
 
         // Case 4: InSet
-        val inSetExpr = expressions.InSet($"j".expr, Set(j, j + 1, j + 2, j + 3).map(lit(_).expr))
+        val inSetExpr = expressions.InSet($"j".expr, Set(j, j + 1, j + 2, j + 3))
         checkPrunedAnswers(
           bucketSpec,
           bucketValues = Seq(j, j + 1, j + 2, j + 3),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org