You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/09/22 05:18:26 UTC
[GitHub] [spark] CodingCat commented on a change in pull request #29831: [SPARK-32351][SQL] pushdown partition filter in FileSourceScanExec and HiveTableScanExec

CodingCat commented on a change in pull request #29831:
URL: https://github.com/apache/spark/pull/29831#discussion_r492478729



##########
File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
##########
@@ -46,30 +46,42 @@ abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with
         }
 
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2)
+          "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2, 1)
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4)
+           "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4, 0)
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2)
+          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2, 1)
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3)
+          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3, 1)
         assertPrunedPartitions(
-          "SELECT * FROM t", 4)
+          "SELECT * FROM t", 4, expectedPushedDownFilterCount = 0)
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' AND i = 2", 1)
+          "SELECT * FROM t WHERE p = '1' AND i = 2", 1, 2)
         assertPrunedPartitions(
           """
             |SELECT i, COUNT(1) FROM (
             |SELECT * FROM t WHERE  p = '1' OR (p = '2' AND i = 1)
             |) tmp GROUP BY i
-          """.stripMargin, 2)
+          """.stripMargin, 2, 1)
       }
     }
   }
 
-  protected def assertPrunedPartitions(query: String, expected: Long): Unit = {
-    val plan = sql(query).queryExecution.sparkPlan
-    assert(getScanExecPartitionSize(plan) == expected)
+  protected def assertPrunedPartitions(
+      query: String,
+      expectedPartitionCount: Long,
+      expectedPushedDownFilterCount: Int): Unit = {
+    val qe = sql(query).queryExecution
+    val plan = qe.sparkPlan
+    assert(getScanExecPartitionSize(plan) == expectedPartitionCount)
+    val pushedDownPartitionFilters = qe.executedPlan.collectFirst {
+      case FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _) =>
+        partitionFilters
+      case HiveTableScanExec(_, _, partitionFilters) =>
+        partitionFilters
+    }
+    assert(pushedDownPartitionFilters.isDefined &&
+      pushedDownPartitionFilters.get.length == expectedPushedDownFilterCount)

Review comment:
       thanks for the suggestion! I agree the suggested way is cleaner 
   
   but in the latest scalatest, it actually can tell something like `pushedDownPartitionFilters is defined, but `whatever` has length of 0`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org