You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2016/06/30 16:47:53 UTC
parquet-mr git commit: PARQUET-645: Fix null handling in
DictionaryFilter.
Repository: parquet-mr
Updated Branches:
refs/heads/master 1f470253c -> 9c40a7bb3
PARQUET-645: Fix null handling in DictionaryFilter.
This fixes how null is handled by `DictionaryFilter` for equals predicates. Null is never in the dictionary and is encoded by the definition level, so the `DictionaryFilter` would never find the value in the dictionary and would incorrectly filter row groups whenever the filter was `col == null`.
Author: Ryan Blue <bl...@apache.org>
Closes #348 from rdblue/PARQUET-645-fix-null-dictionary-filter and squashes the following commits:
ae8dd41 [Ryan Blue] PARQUET-645: Fix null handling in DictionaryFilter.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9c40a7bb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9c40a7bb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9c40a7bb
Branch: refs/heads/master
Commit: 9c40a7bb3c9aca51d17490960c988dfb7b5acebb
Parents: 1f47025
Author: Ryan Blue <bl...@apache.org>
Authored: Thu Jun 30 09:47:48 2016 -0700
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Thu Jun 30 09:47:48 2016 -0700
----------------------------------------------------------------------
.../filter2/dictionarylevel/DictionaryFilter.java | 12 ++++++++++++
.../filter2/dictionarylevel/DictionaryFilterTest.java | 6 ++++++
2 files changed, 18 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index 9b03f82..dc1d649 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -123,6 +123,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
filterColumn.getColumnPath();
+ if (value == null) {
+ // the dictionary contains only non-null values so isn't helpful. this
+ // could check the column stats, but the StatisticsFilter is responsible
+ return BLOCK_MIGHT_MATCH;
+ }
+
try {
Set<T> dictSet = expandDictionary(meta);
if (dictSet != null && !dictSet.contains(value)) {
@@ -150,6 +156,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> {
filterColumn.getColumnPath();
+ if (value == null) {
+ // the dictionary contains only non-null values so isn't helpful. this
+ // could check the column stats, but the StatisticsFilter is responsible
+ return BLOCK_MIGHT_MATCH;
+ }
+
try {
Set<T> dictSet = expandDictionary(meta);
if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value)) {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 754da68..35b944d 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -193,6 +193,9 @@ public class DictionaryFilterTest {
assertTrue("Should drop block for upper case letters",
canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries));
+
+ assertFalse("Should not drop block for null",
+ canDrop(eq(b, null), ccmd, dictionaries));
}
@Test
@@ -211,6 +214,9 @@ public class DictionaryFilterTest {
assertFalse("Should not drop block with a known value",
canDrop(notEq(b, Binary.fromString("B")), ccmd, dictionaries));
+
+ assertFalse("Should not drop block for null",
+ canDrop(notEq(b, null), ccmd, dictionaries));
}
@Test