You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/02/22 06:08:47 UTC
[spark] branch master updated: [SPARK-26930][SQL] Tests in
ParquetFilterSuite don't verify filter class
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0663797 [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class
0663797 is described below
commit 066379783af154f1c9e2fae6daaf444b6e383ab0
Author: nandorKollar <na...@users.noreply.github.com>
AuthorDate: Fri Feb 22 14:07:55 2019 +0800
[SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class
## What changes were proposed in this pull request?
Add assert to verify predicate class in ParquetFilterSuite
## How was this patch tested?
Ran ParquetFilterSuite, tests passed
Closes #23855 from nandorKollar/SPARK-26930.
Lead-authored-by: nandorKollar <na...@users.noreply.github.com>
Co-authored-by: Hyukjin Kwon <gu...@gmail.com>
Co-authored-by: Nandor Kollar <nk...@cloudera.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../datasources/parquet/ParquetFilterSuite.scala | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9cfc943..255f7db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.SparkException
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.functions._
@@ -91,6 +92,10 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> "true",
SQLConf.PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED.key -> "true",
SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED.key -> "true",
+ // Disable adding filters from constraints because it adds, for instance,
+ // is-not-null to pushed filters, which makes it hard to test if the pushed
+ // filter is expected or not (this had to be fixed with SPARK-13495).
+ SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName,
SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
val query = df
.select(output.map(e => Column(e)): _*)
@@ -109,13 +114,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
assert(selectedFilters.nonEmpty, "No filter is pushed down")
- selectedFilters.foreach { pred =>
+ val pushedParquetFilters = selectedFilters.map { pred =>
val maybeFilter = parquetFilters.createFilter(
new SparkToParquetSchemaConverter(conf).convert(df.schema), pred)
assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred")
- // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
- maybeFilter.exists(_.getClass === filterClass)
+ maybeFilter.get
}
+ // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
+ assert(pushedParquetFilters.exists(_.getClass === filterClass),
+ s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
+
checker(stripSparkFilter(query), expected)
}
}
@@ -1073,20 +1081,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
checkFilterPredicate(
!'_1.startsWith("").asInstanceOf[Predicate],
- classOf[UserDefinedByInstance[_, _]],
+ classOf[Operators.Not],
Seq().map(Row(_)))
Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
checkFilterPredicate(
!'_1.startsWith(prefix).asInstanceOf[Predicate],
- classOf[UserDefinedByInstance[_, _]],
+ classOf[Operators.Not],
Seq("1str1", "3str3", "4str4").map(Row(_)))
}
Seq("2S", "null", "2str22").foreach { prefix =>
checkFilterPredicate(
!'_1.startsWith(prefix).asInstanceOf[Predicate],
- classOf[UserDefinedByInstance[_, _]],
+ classOf[Operators.Not],
Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org