You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/02/22 06:08:47 UTC
[spark] branch master updated: [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0663797  [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class
0663797 is described below

commit 066379783af154f1c9e2fae6daaf444b6e383ab0
Author: nandorKollar <na...@users.noreply.github.com>
AuthorDate: Fri Feb 22 14:07:55 2019 +0800

    [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class
    
    ## What changes were proposed in this pull request?
    
    Add assert to verify predicate class in ParquetFilterSuite
    
    ## How was this patch tested?
    
    Ran ParquetFilterSuite, tests passed
    
    Closes #23855 from nandorKollar/SPARK-26930.
    
    Lead-authored-by: nandorKollar <na...@users.noreply.github.com>
    Co-authored-by: Hyukjin Kwon <gu...@gmail.com>
    Co-authored-by: Nandor Kollar <nk...@cloudera.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../datasources/parquet/ParquetFilterSuite.scala     | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9cfc943..255f7db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.functions._
@@ -91,6 +92,10 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
       SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> "true",
       SQLConf.PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED.key -> "true",
       SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED.key -> "true",
+      // Disable adding filters from constraints because it adds, for instance,
+      // is-not-null to pushed filters, which makes it hard to test if the pushed
+      // filter is expected or not (this had to be fixed with SPARK-13495).
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName,
       SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         val query = df
           .select(output.map(e => Column(e)): _*)
@@ -109,13 +114,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
           DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
         assert(selectedFilters.nonEmpty, "No filter is pushed down")
 
-        selectedFilters.foreach { pred =>
+        val pushedParquetFilters = selectedFilters.map { pred =>
           val maybeFilter = parquetFilters.createFilter(
             new SparkToParquetSchemaConverter(conf).convert(df.schema), pred)
           assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred")
-          // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
-          maybeFilter.exists(_.getClass === filterClass)
+          maybeFilter.get
         }
+        // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
+        assert(pushedParquetFilters.exists(_.getClass === filterClass),
+          s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
+
         checker(stripSparkFilter(query), expected)
     }
   }
@@ -1073,20 +1081,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
 
       checkFilterPredicate(
         !'_1.startsWith("").asInstanceOf[Predicate],
-        classOf[UserDefinedByInstance[_, _]],
+        classOf[Operators.Not],
         Seq().map(Row(_)))
 
       Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
         checkFilterPredicate(
           !'_1.startsWith(prefix).asInstanceOf[Predicate],
-          classOf[UserDefinedByInstance[_, _]],
+          classOf[Operators.Not],
           Seq("1str1", "3str3", "4str4").map(Row(_)))
       }
 
       Seq("2S", "null", "2str22").foreach { prefix =>
         checkFilterPredicate(
           !'_1.startsWith(prefix).asInstanceOf[Predicate],
-          classOf[UserDefinedByInstance[_, _]],
+          classOf[Operators.Not],
           Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
       }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org