You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/12/30 21:16:48 UTC
spark git commit: [SPARK-4493][SQL] Tests for IsNull / IsNotNull in
the ParquetFilterSuite
Repository: spark
Updated Branches:
refs/heads/master 53f0a00b6 -> 19a8802e7
[SPARK-4493][SQL] Tests for IsNull / IsNotNull in the ParquetFilterSuite
This is a follow-up of #3367 and #3644.
At the time #3644 was written, #3367 hadn't been merged yet, thus `IsNull` and `IsNotNull` filters are not covered in the first version of `ParquetFilterSuite`. This PR adds corresponding test cases.
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3748)
<!-- Reviewable:end -->
Author: Cheng Lian <li...@databricks.com>
Closes #3748 from liancheng/test-null-filters and squashes the following commits:
1ab943f [Cheng Lian] IsNull and IsNotNull Parquet filter test case for boolean type
bcd616b [Cheng Lian] Adds Parquet filter pushedown tests for IsNull and IsNotNull
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19a8802e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19a8802e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19a8802e
Branch: refs/heads/master
Commit: 19a8802e703e6b075a148ba73dc9dd80748d6322
Parents: 53f0a00
Author: Cheng Lian <li...@databricks.com>
Authored: Tue Dec 30 12:16:45 2014 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Dec 30 12:16:45 2014 -0800
----------------------------------------------------------------------
.../spark/sql/parquet/ParquetFilterSuite.scala | 60 ++++++++++++++++----
1 file changed, 50 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/19a8802e/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
index b173004..4c3a045 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -28,11 +28,14 @@ import org.apache.spark.sql.{QueryTest, SQLConf, SchemaRDD}
/**
* A test suite that tests Parquet filter2 API based filter pushdown optimization.
*
- * Notice that `!(a cmp b)` are always transformed to its negated form `a cmp' b` by the
- * `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
- * results a `GtEq` filter predicate rather than a `Not`.
+ * NOTE:
*
- * @todo Add test cases for `IsNull` and `IsNotNull` after merging PR #3367
+ * 1. `!(a cmp b)` is always transformed to its negated form `a cmp' b` by the
+ * `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
+ * results in a `GtEq` filter predicate rather than a `Not`.
+ *
+ * 2. `Tuple1(Option(x))` is used together with `AnyVal` types like `Int` to ensure the inferred
+ * data type is nullable.
*/
class ParquetFilterSuite extends QueryTest with ParquetTest {
val sqlContext = TestSQLContext
@@ -85,14 +88,26 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
}
test("filter pushdown - boolean") {
- withParquetRDD((true :: false :: Nil).map(Tuple1.apply)) { rdd =>
+ withParquetRDD((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Boolean]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Boolean]]) {
+ Seq(Row(true), Row(false))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === true, classOf[Eq[java.lang.Boolean]])(true)
- checkFilterPushdown(rdd, '_1)('_1 !== true, classOf[Operators.NotEq[java.lang.Boolean]])(false)
+ checkFilterPushdown(rdd, '_1)('_1 !== true, classOf[Operators.NotEq[java.lang.Boolean]]) {
+ false
+ }
}
}
test("filter pushdown - integer") {
- withParquetRDD((1 to 4).map(Tuple1.apply)) { rdd =>
+ withParquetRDD((1 to 4).map(i => Tuple1(Option(i)))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[Integer]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[Integer]]) {
+ (1 to 4).map(Row.apply(_))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[Integer]])(1)
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[Integer]]) {
(2 to 4).map(Row.apply(_))
@@ -118,7 +133,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
}
test("filter pushdown - long") {
- withParquetRDD((1 to 4).map(i => Tuple1(i.toLong))) { rdd =>
+ withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toLong)))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Long]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Long]]) {
+ (1 to 4).map(Row.apply(_))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Long]])(1)
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Long]]) {
(2 to 4).map(Row.apply(_))
@@ -144,7 +164,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
}
test("filter pushdown - float") {
- withParquetRDD((1 to 4).map(i => Tuple1(i.toFloat))) { rdd =>
+ withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Float]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Float]]) {
+ (1 to 4).map(Row.apply(_))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Float]])(1)
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Float]]) {
(2 to 4).map(Row.apply(_))
@@ -170,7 +195,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
}
test("filter pushdown - double") {
- withParquetRDD((1 to 4).map(i => Tuple1(i.toDouble))) { rdd =>
+ withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Double]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Double]]) {
+ (1 to 4).map(Row.apply(_))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Double]])(1)
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Double]]) {
(2 to 4).map(Row.apply(_))
@@ -197,6 +227,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
test("filter pushdown - string") {
withParquetRDD((1 to 4).map(i => Tuple1(i.toString))) { rdd =>
+ checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.String]])(Seq.empty[Row])
+ checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.String]]) {
+ (1 to 4).map(i => Row.apply(i.toString))
+ }
+
checkFilterPushdown(rdd, '_1)('_1 === "1", classOf[Eq[String]])("1")
checkFilterPushdown(rdd, '_1)('_1 !== "1", classOf[Operators.NotEq[String]]) {
(2 to 4).map(i => Row.apply(i.toString))
@@ -227,6 +262,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
}
withParquetRDD((1 to 4).map(i => Tuple1(i.b))) { rdd =>
+ checkBinaryFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.String]])(Seq.empty[Row])
+ checkBinaryFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.String]]) {
+ (1 to 4).map(i => Row.apply(i.b)).toSeq
+ }
+
checkBinaryFilterPushdown(rdd, '_1)('_1 === 1.b, classOf[Eq[Array[Byte]]])(1.b)
checkBinaryFilterPushdown(rdd, '_1)('_1 !== 1.b, classOf[Operators.NotEq[Array[Byte]]]) {
(2 to 4).map(i => Row.apply(i.b)).toSeq
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org