You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2020/04/10 01:00:23 UTC
[druid] branch 0.18.0 updated: More optimize CNF conversion of
filters (#9634) (#9656)
This is an automated email from the ASF dual-hosted git repository.
jonwei pushed a commit to branch 0.18.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/0.18.0 by this push:
new e7ef80e More optimize CNF conversion of filters (#9634) (#9656)
e7ef80e is described below
commit e7ef80eade26fe2120015f5857fc6d3e7a4422c3
Author: Jihoon Son <ji...@apache.org>
AuthorDate: Thu Apr 9 18:00:12 2020 -0700
More optimize CNF conversion of filters (#9634) (#9656)
* More optimize CNF conversion of filters
* update license
* fix build
* checkstyle
* remove unnecessary code
* split helper
* license
* checkstyle
* add comments on cnf conversion
---
LICENSE | 3 +-
.../druid/benchmark/FilterPartitionBenchmark.java | 4 +-
.../apache/druid/segment/filter/FalseFilter.java | 92 ++++
.../org/apache/druid/segment/filter/Filters.java | 186 +-------
.../druid/segment/filter/cnf/CalciteCnfHelper.java | 301 +++++++++++++
.../druid/segment/filter/cnf/HiveCnfHelper.java | 190 ++++++++
.../segment/join/filter/JoinFilterAnalyzer.java | 2 +-
.../druid/segment/filter/BaseFilterTest.java | 2 +-
.../segment/filter/FilterCnfConversionTest.java | 490 +++++++++++++++++++++
.../druid/segment/filter/FilterPartitionTest.java | 4 +-
.../apache/druid/segment/filter/FiltersTest.java | 210 ---------
11 files changed, 1100 insertions(+), 384 deletions(-)
diff --git a/LICENSE b/LICENSE
index 820819d..6171bc0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -215,7 +215,7 @@ Apache License version 2.0
SOURCE/JAVA-CORE
This product contains conjunctive normal form conversion code, a variance aggregator algorithm, and Bloom filter
adapted from Apache Hive.
- * processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+ * processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java
* extensions-core/stats/src/main/java/io/druid/query/aggregation/variance/VarianceAggregatorCollector.java
* extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilter.java
@@ -224,6 +224,7 @@ SOURCE/JAVA-CORE
This product contains SQL query planning code adapted from Apache Calcite.
* sql/src/main/java/org/apache/druid/sql/calcite/
+ * processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java
This product contains Kerberos authentication code adapted from Apache Hadoop.
* extensions-core/druid-kerberos/src/main/java/org/apache/druid/security/kerberos/
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java
index 547784e..512f1eb 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java
@@ -377,7 +377,7 @@ public class FilterPartitionBenchmark
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
- Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(orFilter));
+ Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(orFilter));
readCursors(cursors, blackhole);
}
@@ -451,7 +451,7 @@ public class FilterPartitionBenchmark
);
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
- Sequence<Cursor> cursors = makeCursors(sa, Filters.toCNF(dimFilter3.toFilter()));
+ Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(dimFilter3.toFilter()));
readCursors(cursors, blackhole);
}
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java
new file mode 100644
index 0000000..e143b70
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter;
+
+import org.apache.druid.query.BitmapResultFactory;
+import org.apache.druid.query.filter.BitmapIndexSelector;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.segment.ColumnSelector;
+import org.apache.druid.segment.ColumnSelectorFactory;
+
+import java.util.Collections;
+import java.util.Set;
+
+public class FalseFilter implements Filter
+{
+ private static final FalseFilter INSTANCE = new FalseFilter();
+
+ public static FalseFilter instance()
+ {
+ return INSTANCE;
+ }
+
+ private FalseFilter()
+ {
+ }
+
+ @Override
+ public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
+ {
+ return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
+ }
+
+ @Override
+ public double estimateSelectivity(BitmapIndexSelector indexSelector)
+ {
+ return 0;
+ }
+
+ @Override
+ public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
+ {
+ return FalseValueMatcher.instance();
+ }
+
+ @Override
+ public boolean supportsBitmapIndex(BitmapIndexSelector selector)
+ {
+ return true;
+ }
+
+ @Override
+ public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
+ {
+ return true;
+ }
+
+ @Override
+ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
+ {
+ return true;
+ }
+
+ @Override
+ public Set<String> getRequiredColumns()
+ {
+ return Collections.emptySet();
+ }
+
+ @Override
+ public String toString()
+ {
+ return "false";
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index 2328dfb..830f372 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -19,7 +19,6 @@
package org.apache.druid.segment.filter;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
@@ -30,7 +29,6 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.Query;
import org.apache.druid.query.filter.BitmapIndexSelector;
-import org.apache.druid.query.filter.BooleanFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.Filter;
@@ -44,12 +42,12 @@ import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
+import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
-import java.util.ArrayList;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
@@ -426,175 +424,29 @@ public class Filters
return null;
}
boolean useCNF = query.getContextBoolean(CTX_KEY_USE_FILTER_CNF, false);
- return useCNF ? toCNF(filter) : filter;
+ return useCNF ? Filters.toCnf(filter) : filter;
}
- public static Filter toCNF(Filter current)
+ public static Filter toCnf(Filter current)
{
- current = pushDownNot(current);
- current = flatten(current);
- current = convertToCNFInternal(current);
- current = flatten(current);
+ // Push down NOT filters to leaves if possible to remove NOT on NOT filters and reduce hierarchy.
+ // ex) ~(a OR ~b) => ~a AND b
+ current = HiveCnfHelper.pushDownNot(current);
+ // Flatten nested AND and OR filters if possible.
+ // ex) a AND (b AND c) => a AND b AND c
+ current = HiveCnfHelper.flatten(current);
+ // Pull up AND filters first to convert the filter into a conjunctive form.
+ // It is important to pull before CNF conversion to not create a huge CNF.
+ // ex) (a AND b) OR (a AND c AND d) => a AND (b OR (c AND d))
+ current = CalciteCnfHelper.pull(current);
+ // Convert filter to CNF.
+ // a AND (b OR (c AND d)) => a AND (b OR c) AND (b OR d)
+ current = HiveCnfHelper.convertToCnf(current);
+ // Flatten again to remove any flattenable nested AND or OR filters created during CNF conversion.
+ current = HiveCnfHelper.flatten(current);
return current;
}
- // CNF conversion functions were adapted from Apache Hive, see:
- // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
- @VisibleForTesting
- static Filter pushDownNot(Filter current)
- {
- if (current instanceof NotFilter) {
- Filter child = ((NotFilter) current).getBaseFilter();
- if (child instanceof NotFilter) {
- return pushDownNot(((NotFilter) child).getBaseFilter());
- }
- if (child instanceof AndFilter) {
- Set<Filter> children = new HashSet<>();
- for (Filter grandChild : ((AndFilter) child).getFilters()) {
- children.add(pushDownNot(new NotFilter(grandChild)));
- }
- return new OrFilter(children);
- }
- if (child instanceof OrFilter) {
- Set<Filter> children = new HashSet<>();
- for (Filter grandChild : ((OrFilter) child).getFilters()) {
- children.add(pushDownNot(new NotFilter(grandChild)));
- }
- return new AndFilter(children);
- }
- }
-
-
- if (current instanceof AndFilter) {
- Set<Filter> children = new HashSet<>();
- for (Filter child : ((AndFilter) current).getFilters()) {
- children.add(pushDownNot(child));
- }
- return new AndFilter(children);
- }
-
-
- if (current instanceof OrFilter) {
- Set<Filter> children = new HashSet<>();
- for (Filter child : ((OrFilter) current).getFilters()) {
- children.add(pushDownNot(child));
- }
- return new OrFilter(children);
- }
- return current;
- }
-
- // CNF conversion functions were adapted from Apache Hive, see:
- // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
- private static Filter convertToCNFInternal(Filter current)
- {
- if (current instanceof NotFilter) {
- return new NotFilter(convertToCNFInternal(((NotFilter) current).getBaseFilter()));
- }
- if (current instanceof AndFilter) {
- Set<Filter> children = new HashSet<>();
- for (Filter child : ((AndFilter) current).getFilters()) {
- children.add(convertToCNFInternal(child));
- }
- return new AndFilter(children);
- }
- if (current instanceof OrFilter) {
- // a list of leaves that weren't under AND expressions
- List<Filter> nonAndList = new ArrayList<Filter>();
- // a list of AND expressions that we need to distribute
- List<Filter> andList = new ArrayList<Filter>();
- for (Filter child : ((OrFilter) current).getFilters()) {
- if (child instanceof AndFilter) {
- andList.add(child);
- } else if (child instanceof OrFilter) {
- // pull apart the kids of the OR expression
- nonAndList.addAll(((OrFilter) child).getFilters());
- } else {
- nonAndList.add(child);
- }
- }
- if (!andList.isEmpty()) {
- Set<Filter> result = new HashSet<>();
- generateAllCombinations(result, andList, nonAndList);
- return new AndFilter(result);
- }
- }
- return current;
- }
-
- // CNF conversion functions were adapted from Apache Hive, see:
- // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
- @VisibleForTesting
- static Filter flatten(Filter root)
- {
- if (root instanceof BooleanFilter) {
- List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
- // iterate through the index, so that if we add more children,
- // they don't get re-visited
- for (int i = 0; i < children.size(); ++i) {
- Filter child = flatten(children.get(i));
- // do we need to flatten?
- if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
- boolean first = true;
- Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
- for (Filter grandkid : grandKids) {
- // for the first grandkid replace the original parent
- if (first) {
- first = false;
- children.set(i, grandkid);
- } else {
- children.add(++i, grandkid);
- }
- }
- } else {
- children.set(i, child);
- }
- }
- // if we have a singleton AND or OR, just return the child
- if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
- return children.get(0);
- }
-
- if (root instanceof AndFilter) {
- return new AndFilter(children);
- } else if (root instanceof OrFilter) {
- return new OrFilter(children);
- }
- }
- return root;
- }
-
- // CNF conversion functions were adapted from Apache Hive, see:
- // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
- private static void generateAllCombinations(
- Set<Filter> result,
- List<Filter> andList,
- List<Filter> nonAndList
- )
- {
- Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
- if (result.isEmpty()) {
- for (Filter child : children) {
- Set<Filter> a = new HashSet<>(nonAndList);
- a.add(child);
- result.add(new OrFilter(a));
- }
- } else {
- List<Filter> work = new ArrayList<>(result);
- result.clear();
- for (Filter child : children) {
- for (Filter or : work) {
- Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
- a.add(child);
- result.add(new OrFilter(a));
- }
- }
- }
- if (andList.size() > 1) {
- generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
- }
- }
-
/**
* This method provides a "standard" implementation of {@link Filter#shouldUseBitmapIndex(BitmapIndexSelector)} which takes
* a {@link Filter}, a {@link BitmapIndexSelector}, and {@link FilterTuning} to determine if:
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java
new file mode 100644
index 0000000..f857ae2
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter.cnf;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.segment.filter.AndFilter;
+import org.apache.druid.segment.filter.FalseFilter;
+import org.apache.druid.segment.filter.OrFilter;
+import org.apache.druid.segment.filter.TrueFilter;
+
+import javax.annotation.Nonnull;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * All functions in this class were basically adopted from Apache Calcite and modified to use them in Druid.
+ * See https://github.com/apache/calcite/blob/branch-1.21/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1615
+ * for original implementation.
+ */
+public class CalciteCnfHelper
+{
+ public static Filter pull(Filter rex)
+ {
+ final Set<Filter> operands;
+ if (rex instanceof AndFilter) {
+ operands = ((AndFilter) rex).getFilters();
+ return and(pullList(operands));
+ } else if (rex instanceof OrFilter) {
+ operands = ((OrFilter) rex).getFilters();
+ final Map<Filter, Filter> factors = commonFactors(operands);
+ if (factors.isEmpty()) {
+ return or(operands);
+ }
+ final List<Filter> list = new ArrayList<>();
+ for (Filter operand : operands) {
+ Filter removed = removeFactor(factors, operand);
+ if (removed != null) {
+ list.add(removed);
+ }
+ }
+ if (list.isEmpty()) {
+ return and(factors.values());
+ } else if (list.size() == 1) {
+ return and(Iterables.concat(factors.values(), ImmutableList.of(list.get(0))));
+ } else {
+ return and(Iterables.concat(factors.values(), ImmutableList.of(or(list))));
+ }
+ } else {
+ return rex;
+ }
+ }
+
+ private static List<Filter> pullList(Set<Filter> nodes)
+ {
+ final List<Filter> list = new ArrayList<>();
+ for (Filter node : nodes) {
+ Filter pulled = pull(node);
+ if (pulled instanceof AndFilter) {
+ list.addAll(((AndFilter) pulled).getFilters());
+ } else {
+ list.add(pulled);
+ }
+ }
+ return list;
+ }
+
+ private static Map<Filter, Filter> commonFactors(Set<Filter> nodes)
+ {
+ final Map<Filter, Filter> map = new HashMap<>();
+ int i = 0;
+ for (Filter node : nodes) {
+ if (i++ == 0) {
+ for (Filter conjunction : conjunctions(node)) {
+ map.put(conjunction, conjunction);
+ }
+ } else {
+ map.keySet().retainAll(conjunctions(node));
+ }
+ }
+ return map;
+ }
+
+ private static Filter removeFactor(Map<Filter, Filter> factors, Filter node)
+ {
+ List<Filter> list = new ArrayList<>();
+ for (Filter operand : conjunctions(node)) {
+ if (!factors.containsKey(operand)) {
+ list.add(operand);
+ }
+ }
+ return and(list);
+ }
+
+ private static Filter and(Iterable<? extends Filter> nodes)
+ {
+ return composeConjunction(nodes);
+ }
+
+ private static Filter or(Iterable<? extends Filter> nodes)
+ {
+ return composeDisjunction(nodes);
+ }
+
+ /** As {@link #composeConjunction(Iterable, boolean)} but never
+ * returns null. */
+ public static @Nonnull Filter composeConjunction(Iterable<? extends Filter> nodes)
+ {
+ final Filter e = composeConjunction(nodes, false);
+ return Objects.requireNonNull(e);
+ }
+
+ /**
+ * Converts a collection of expressions into an AND.
+ * If there are zero expressions, returns TRUE.
+ * If there is one expression, returns just that expression.
+ * If any of the expressions are FALSE, returns FALSE.
+ * Removes expressions that always evaluate to TRUE.
+ * Returns null only if {@code nullOnEmpty} and expression is TRUE.
+ */
+ public static Filter composeConjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
+ {
+ ImmutableList<Filter> list = flattenAnd(nodes);
+ switch (list.size()) {
+ case 0:
+ return nullOnEmpty
+ ? null
+ : TrueFilter.instance();
+ case 1:
+ return list.get(0);
+ default:
+ return new AndFilter(list);
+ }
+ }
+
+ /** Flattens a list of AND nodes.
+ *
+ * <p>Treats null nodes as literal TRUE (i.e. ignores them). */
+ public static ImmutableList<Filter> flattenAnd(Iterable<? extends Filter> nodes)
+ {
+ if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
+ // Optimize common case
+ return ImmutableList.of();
+ }
+ final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
+ final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
+ for (Filter node : nodes) {
+ if (node != null) {
+ addAnd(builder, set, node);
+ }
+ }
+ return builder.build();
+ }
+
+ private static void addAnd(ImmutableList.Builder<Filter> builder, Set<Filter> digests, Filter node)
+ {
+ if (node instanceof AndFilter) {
+ for (Filter operand : ((AndFilter) node).getFilters()) {
+ addAnd(builder, digests, operand);
+ }
+ } else {
+ if (!(node instanceof TrueFilter) && digests.add(node)) {
+ builder.add(node);
+ }
+ }
+ }
+
+ /**
+ * Converts a collection of expressions into an OR.
+ * If there are zero expressions, returns FALSE.
+ * If there is one expression, returns just that expression.
+ * If any of the expressions are TRUE, returns TRUE.
+ * Removes expressions that always evaluate to FALSE.
+ * Flattens expressions that are ORs.
+ */
+ @Nonnull public static Filter composeDisjunction(Iterable<? extends Filter> nodes)
+ {
+ final Filter e = composeDisjunction(nodes, false);
+ return Objects.requireNonNull(e);
+ }
+
+ /**
+ * Converts a collection of expressions into an OR,
+ * optionally returning null if the list is empty.
+ */
+ public static Filter composeDisjunction(Iterable<? extends Filter> nodes, boolean nullOnEmpty)
+ {
+ ImmutableList<Filter> list = flattenOr(nodes);
+ switch (list.size()) {
+ case 0:
+ return nullOnEmpty ? null : FalseFilter.instance();
+ case 1:
+ return list.get(0);
+ default:
+ if (containsTrue(list)) {
+ return TrueFilter.instance();
+ }
+ return new OrFilter(list);
+ }
+ }
+
+ /** Flattens a list of OR nodes. */
+ public static ImmutableList<Filter> flattenOr(Iterable<? extends Filter> nodes)
+ {
+ if (nodes instanceof Collection && ((Collection) nodes).isEmpty()) {
+ // Optimize common case
+ return ImmutableList.of();
+ }
+ final ImmutableList.Builder<Filter> builder = ImmutableList.builder();
+ final Set<Filter> set = new HashSet<>(); // to eliminate duplicates
+ for (Filter node : nodes) {
+ addOr(builder, set, node);
+ }
+ return builder.build();
+ }
+
+ private static void addOr(ImmutableList.Builder<Filter> builder, Set<Filter> set, Filter node)
+ {
+ if (node instanceof OrFilter) {
+ for (Filter operand : ((OrFilter) node).getFilters()) {
+ addOr(builder, set, operand);
+ }
+ } else {
+ if (set.add(node)) {
+ builder.add(node);
+ }
+ }
+ }
+
+ /**
+ * Returns a condition decomposed by AND.
+ *
+ * <p>For example, {@code conjunctions(TRUE)} returns the empty list;
+ * {@code conjunctions(FALSE)} returns list {@code {FALSE}}.</p>
+ */
+ public static List<Filter> conjunctions(Filter rexPredicate)
+ {
+ final List<Filter> list = new ArrayList<>();
+ decomposeConjunction(rexPredicate, list);
+ return list;
+ }
+
+ /**
+ * Decomposes a predicate into a list of expressions that are AND'ed
+ * together.
+ *
+ * @param rexPredicate predicate to be analyzed
+ * @param rexList list of decomposed RexNodes
+ */
+ public static void decomposeConjunction(Filter rexPredicate, List<Filter> rexList)
+ {
+ if (rexPredicate == null || rexPredicate instanceof TrueFilter) {
+ return;
+ }
+ if (rexPredicate instanceof AndFilter) {
+ for (Filter operand : ((AndFilter) rexPredicate).getFilters()) {
+ decomposeConjunction(operand, rexList);
+ }
+ } else {
+ rexList.add(rexPredicate);
+ }
+ }
+
+ private static boolean containsTrue(Iterable<Filter> nodes)
+ {
+ for (Filter node : nodes) {
+ if (node instanceof TrueFilter) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private CalciteCnfHelper()
+ {
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java b/processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java
new file mode 100644
index 0000000..e5669b8
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/filter/cnf/HiveCnfHelper.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter.cnf;
+
+import org.apache.druid.query.filter.BooleanFilter;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.segment.filter.AndFilter;
+import org.apache.druid.segment.filter.NotFilter;
+import org.apache.druid.segment.filter.OrFilter;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * All functions in this class were basically adopted from Apache Hive and modified to use them in Druid.
+ * See https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+ * for original implementation.
+ */
+public class HiveCnfHelper
+{
+ public static Filter pushDownNot(Filter current)
+ {
+ if (current instanceof NotFilter) {
+ Filter child = ((NotFilter) current).getBaseFilter();
+ if (child instanceof NotFilter) {
+ return pushDownNot(((NotFilter) child).getBaseFilter());
+ }
+ if (child instanceof AndFilter) {
+ Set<Filter> children = new HashSet<>();
+ for (Filter grandChild : ((AndFilter) child).getFilters()) {
+ children.add(pushDownNot(new NotFilter(grandChild)));
+ }
+ return new OrFilter(children);
+ }
+ if (child instanceof OrFilter) {
+ Set<Filter> children = new HashSet<>();
+ for (Filter grandChild : ((OrFilter) child).getFilters()) {
+ children.add(pushDownNot(new NotFilter(grandChild)));
+ }
+ return new AndFilter(children);
+ }
+ }
+
+ if (current instanceof AndFilter) {
+ Set<Filter> children = new HashSet<>();
+ for (Filter child : ((AndFilter) current).getFilters()) {
+ children.add(pushDownNot(child));
+ }
+ return new AndFilter(children);
+ }
+
+ if (current instanceof OrFilter) {
+ Set<Filter> children = new HashSet<>();
+ for (Filter child : ((OrFilter) current).getFilters()) {
+ children.add(pushDownNot(child));
+ }
+ return new OrFilter(children);
+ }
+ return current;
+ }
+
+ public static Filter convertToCnf(Filter current)
+ {
+ if (current instanceof NotFilter) {
+ return new NotFilter(convertToCnf(((NotFilter) current).getBaseFilter()));
+ }
+ if (current instanceof AndFilter) {
+ Set<Filter> children = new HashSet<>();
+ for (Filter child : ((AndFilter) current).getFilters()) {
+ children.add(convertToCnf(child));
+ }
+ return new AndFilter(children);
+ }
+ if (current instanceof OrFilter) {
+ // a list of leaves that weren't under AND expressions
+ List<Filter> nonAndList = new ArrayList<Filter>();
+ // a list of AND expressions that we need to distribute
+ List<Filter> andList = new ArrayList<Filter>();
+ for (Filter child : ((OrFilter) current).getFilters()) {
+ if (child instanceof AndFilter) {
+ andList.add(child);
+ } else if (child instanceof OrFilter) {
+ // pull apart the kids of the OR expression
+ nonAndList.addAll(((OrFilter) child).getFilters());
+ } else {
+ nonAndList.add(child);
+ }
+ }
+ if (!andList.isEmpty()) {
+ Set<Filter> result = new HashSet<>();
+ generateAllCombinations(result, andList, nonAndList);
+ return new AndFilter(result);
+ }
+ }
+ return current;
+ }
+
+ public static Filter flatten(Filter root)
+ {
+ if (root instanceof BooleanFilter) {
+ List<Filter> children = new ArrayList<>(((BooleanFilter) root).getFilters());
+ // iterate through the index, so that if we add more children,
+ // they don't get re-visited
+ for (int i = 0; i < children.size(); ++i) {
+ Filter child = flatten(children.get(i));
+ // do we need to flatten?
+ if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
+ boolean first = true;
+ Set<Filter> grandKids = ((BooleanFilter) child).getFilters();
+ for (Filter grandkid : grandKids) {
+ // for the first grandkid replace the original parent
+ if (first) {
+ first = false;
+ children.set(i, grandkid);
+ } else {
+ children.add(++i, grandkid);
+ }
+ }
+ } else {
+ children.set(i, child);
+ }
+ }
+ // if we have a singleton AND or OR, just return the child
+ if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
+ return children.get(0);
+ }
+
+ if (root instanceof AndFilter) {
+ return new AndFilter(children);
+ } else if (root instanceof OrFilter) {
+ return new OrFilter(children);
+ }
+ }
+ return root;
+ }
+
+ // A helper function adapted from Apache Hive, see:
+ // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+ private static void generateAllCombinations(
+ Set<Filter> result,
+ List<Filter> andList,
+ List<Filter> nonAndList
+ )
+ {
+ Set<Filter> children = ((AndFilter) andList.get(0)).getFilters();
+ if (result.isEmpty()) {
+ for (Filter child : children) {
+ Set<Filter> a = new HashSet<>(nonAndList);
+ a.add(child);
+ result.add(new OrFilter(a));
+ }
+ } else {
+ List<Filter> work = new ArrayList<>(result);
+ result.clear();
+ for (Filter child : children) {
+ for (Filter or : work) {
+ Set<Filter> a = new HashSet<>((((OrFilter) or).getFilters()));
+ a.add(child);
+ result.add(new OrFilter(a));
+ }
+ }
+ }
+ if (andList.size() > 1) {
+ generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
+ }
+ }
+
+ private HiveCnfHelper()
+ {
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java
index 892fcb3..69e3b23 100644
--- a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java
+++ b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java
@@ -128,7 +128,7 @@ public class JoinFilterAnalyzer
);
}
- Filter normalizedFilter = Filters.toCNF(originalFilter);
+ Filter normalizedFilter = Filters.toCnf(originalFilter);
// List of candidates for pushdown
// CNF normalization will generate either
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
index 5700458..b5472fd 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
@@ -336,7 +336,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
final DimFilter maybeOptimized = optimize ? dimFilter.optimize() : dimFilter;
final Filter filter = maybeOptimized.toFilter();
- return cnf ? Filters.toCNF(filter) : filter;
+ return cnf ? Filters.toCnf(filter) : filter;
}
private DimFilter maybeOptimize(final DimFilter dimFilter)
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterCnfConversionTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterCnfConversionTest.java
new file mode 100644
index 0000000..6857e93
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterCnfConversionTest.java
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.filter;
+
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.filter.cnf.CalciteCnfHelper;
+import org.apache.druid.segment.filter.cnf.HiveCnfHelper;
+import org.junit.Assert;
+import org.junit.Test;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+public class FilterCnfConversionTest
+{
+ @Test
+ public void testPushDownNot()
+ {
+ final Filter filter = FilterTestUtils.not(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "1"),
+ FilterTestUtils.selector("col2", "2"),
+ FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
+ )
+ );
+ final Filter expected = FilterTestUtils.or(
+ FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
+ FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
+ FilterTestUtils.selector("col3", "3")
+ );
+ final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
+ assertFilter(filter, expected, pushedDown);
+ }
+
+ @Test
+ public void testPushDownNotLeafNot()
+ {
+ final Filter filter = FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "1"),
+ FilterTestUtils.selector("col2", "2"),
+ FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
+ );
+ final Filter pushedDown = HiveCnfHelper.pushDownNot(filter);
+ assertFilter(filter, filter, pushedDown);
+ }
+
+ @Test
+ public void testFlatten()
+ {
+ final Filter filter = FilterTestUtils.and(
+ FilterTestUtils.and(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "1"),
+ FilterTestUtils.selector("col2", "2")
+ )
+ ),
+ FilterTestUtils.selector("col3", "3")
+ );
+ final Filter expected = FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "1"),
+ FilterTestUtils.selector("col2", "2"),
+ FilterTestUtils.selector("col3", "3")
+ );
+ final Filter flattened = HiveCnfHelper.flatten(filter);
+ assertFilter(filter, expected, flattened);
+ }
+
+ @Test
+ public void testFlattenUnflattenable()
+ {
+ final Filter filter = FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "1"),
+ FilterTestUtils.selector("col2", "2")
+ ),
+ FilterTestUtils.selector("col3", "3")
+ );
+ final Filter flattened = HiveCnfHelper.flatten(filter);
+ assertFilter(filter, filter, flattened);
+ }
+
+ @Test
+ public void testToCnfWithMuchReducibleFilter()
+ {
+ final Filter muchReducible = FilterTestUtils.and(
+ // should be flattened
+ FilterTestUtils.and(
+ FilterTestUtils.and(
+ FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
+ )
+ ),
+ // should be flattened
+ FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
+ )
+ ),
+ // should be flattened
+ FilterTestUtils.or(
+ FilterTestUtils.and(
+ FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
+ )
+ ),
+ // should eliminate duplicate filters
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col1", "val1")
+ )
+ )
+ );
+ final Filter expected = FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ );
+ final Filter cnf = Filters.toCnf(muchReducible);
+ assertFilter(muchReducible, expected, cnf);
+ }
+
+ @Test
+ public void testToCnfWithComplexFilterIncludingNotAndOr()
+ {
+ final Filter filter = FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ ),
+ FilterTestUtils.not(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col4", "val4"),
+ FilterTestUtils.selector("col5", "val5")
+ )
+ )
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.not(
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col4", "val4"),
+ FilterTestUtils.selector("col5", "val5")
+ )
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col3", "val3")
+ )
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val22"), // selecting different value
+ FilterTestUtils.selector("col3", "val3")
+ ),
+ FilterTestUtils.not(
+ FilterTestUtils.selector("col1", "val11")
+ )
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val22"),
+ FilterTestUtils.selector("col3", "val3")
+ ),
+ FilterTestUtils.not(
+ FilterTestUtils.selector("col1", "val11") // selecting different value
+ )
+ )
+ );
+ final Filter expected = FilterTestUtils.and(
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val22"),
+ FilterTestUtils.selector("col3", "val3")
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
+ FilterTestUtils.selector("col3", "val3")
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
+ ),
+ FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
+ // The below OR filter could be eliminated because this filter also has
+ // (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
+ // The reduction process would be
+ // (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
+ // => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
+ // => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
+ // => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
+ // However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
+ FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
+ FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
+ )
+ );
+ final Filter cnf = Filters.toCnf(filter);
+ assertFilter(filter, expected, cnf);
+ }
+
+ @Test
+ public void testToCnfCollapsibleBigFilter()
+ {
+ Set<Filter> ands = new HashSet<>();
+ Set<Filter> ors = new HashSet<>();
+ for (int i = 0; i < 12; i++) {
+ ands.add(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.selector("col4", "val4"),
+ FilterTestUtils.selector("col5", StringUtils.format("val%d", i))
+ )
+ );
+ ors.add(FilterTestUtils.selector("col5", StringUtils.format("val%d", i)));
+ }
+
+ final Filter bigFilter = FilterTestUtils.and(
+ new OrFilter(ands),
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ );
+ final Filter expectedCnf = FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.selector("col4", "val4"),
+ new OrFilter(ors)
+ );
+ final Filter cnf = Filters.toCnf(bigFilter);
+ assertFilter(bigFilter, expectedCnf, cnf);
+ }
+
+ @Test
+ public void testPullOrOnlyFilter()
+ {
+ final Filter filter = FilterTestUtils.or(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col3", "val3")
+ );
+
+ assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
+ }
+
+ @Test
+ public void testPullNotPullableFilter()
+ {
+ final Filter filter = FilterTestUtils.or(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.selector("col4", "val4")
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col5", "val5"),
+ FilterTestUtils.selector("col6", "val6")
+ ),
+ FilterTestUtils.selector("col7", "val7")
+ );
+
+ assertFilter(filter, filter, CalciteCnfHelper.pull(filter));
+ }
+
+ @Test
+ public void testToCnfFilterThatPullCannotConvertToCnfProperly()
+ {
+ final Filter filter = FilterTestUtils.or(
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col2", "val2")
+ ),
+ FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.selector("col3", "val3"),
+ FilterTestUtils.selector("col4", "val4")
+ )
+ );
+
+ final Filter expectedCnf = FilterTestUtils.and(
+ FilterTestUtils.selector("col1", "val1"),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col3", "val3")
+ ),
+ FilterTestUtils.or(
+ FilterTestUtils.selector("col2", "val2"),
+ FilterTestUtils.selector("col4", "val4")
+ )
+ );
+
+ assertFilter(filter, expectedCnf, Filters.toCnf(filter));
+ }
+
+ private void assertFilter(Filter original, Filter expectedConverted, Filter actualConverted)
+ {
+ assertEquivalent(original, expectedConverted);
+ Assert.assertEquals(expectedConverted, actualConverted);
+ }
+
+ /**
+ * Assert the given two filters are equivalent by comparing their truth table.
+ */
+ private void assertEquivalent(Filter f1, Filter f2)
+ {
+ final Set<SelectorFilter> s1 = searchForSelectors(f1);
+ final Set<SelectorFilter> s2 = searchForSelectors(f2);
+ Assert.assertEquals(s1, s2);
+
+ // Compare truth table
+ final List<SelectorFilter> selectorFilters = new ArrayList<>(s1);
+ List<Map<SelectorFilter, Boolean>> truthValues = populate(selectorFilters, selectorFilters.size() - 1);
+ for (Map<SelectorFilter, Boolean> truthValue : truthValues) {
+ Assert.assertEquals(evaluateFilterWith(f1, truthValue), evaluateFilterWith(f2, truthValue));
+ }
+ }
+
+ /**
+ * Recursively populate all permutations of truth values.
+ *
+ * @param selectorFilters selector filters
+ * @param cursor the offset to the current selectFilter to pupulate truth values
+ *
+ * @return a list of truth values populated up to the cursor
+ */
+ private List<Map<SelectorFilter, Boolean>> populate(List<SelectorFilter> selectorFilters, int cursor)
+ {
+ // populateFalse and populateTrue will include the false and the true value
+ // for the current selectFilter, respectively.
+ final List<Map<SelectorFilter, Boolean>> populateFalse;
+ final List<Map<SelectorFilter, Boolean>> populateTrue;
+ if (cursor == 0) {
+ Map<SelectorFilter, Boolean> mapForFalse = new HashMap<>();
+ Map<SelectorFilter, Boolean> mapForTrue = new HashMap<>();
+ for (SelectorFilter eachFilter : selectorFilters) {
+ mapForFalse.put(eachFilter, false);
+ mapForTrue.put(eachFilter, false);
+ }
+ populateFalse = new ArrayList<>();
+ populateFalse.add(mapForFalse);
+ populateTrue = new ArrayList<>();
+ populateTrue.add(mapForTrue);
+ } else {
+ final List<Map<SelectorFilter, Boolean>> populated = populate(selectorFilters, cursor - 1);
+ populateFalse = new ArrayList<>(populated.size());
+ populateTrue = new ArrayList<>(populated.size());
+ for (Map<SelectorFilter, Boolean> eachMap : populated) {
+ populateFalse.add(new HashMap<>(eachMap));
+ populateTrue.add(new HashMap<>(eachMap));
+ }
+ }
+
+ for (Map<SelectorFilter, Boolean> eachMap : populateTrue) {
+ eachMap.put(selectorFilters.get(cursor), true);
+ }
+
+ final List<Map<SelectorFilter, Boolean>> allPopulated = new ArrayList<>(populateFalse);
+ allPopulated.addAll(populateTrue);
+ return allPopulated;
+ }
+
+ private Set<SelectorFilter> searchForSelectors(Filter filter)
+ {
+ Set<SelectorFilter> found = new HashSet<>();
+ visitSelectorFilters(filter, selectorFilter -> {
+ found.add(selectorFilter);
+ return selectorFilter;
+ });
+ return found;
+ }
+
+ private boolean evaluateFilterWith(Filter filter, Map<SelectorFilter, Boolean> values)
+ {
+ Filter rewrittenFilter = visitSelectorFilters(filter, selectorFilter -> {
+ Boolean truth = values.get(selectorFilter);
+ if (truth == null) {
+ throw new ISE("Can't find truth value for selectorFilter[%s]", selectorFilter);
+ }
+ return truth ? TrueFilter.instance() : FalseFilter.instance();
+ });
+ return rewrittenFilter.makeMatcher(
+ new ColumnSelectorFactory()
+ {
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ return null;
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return null;
+ }
+ }
+ ).matches();
+ }
+
+ private Filter visitSelectorFilters(Filter filter, Function<SelectorFilter, Filter> visitAction)
+ {
+ if (filter instanceof AndFilter) {
+ Set<Filter> newChildren = new HashSet<>();
+ for (Filter child : ((AndFilter) filter).getFilters()) {
+ newChildren.add(visitSelectorFilters(child, visitAction));
+ }
+ return new AndFilter(newChildren);
+ } else if (filter instanceof OrFilter) {
+ Set<Filter> newChildren = new HashSet<>();
+ for (Filter child : ((OrFilter) filter).getFilters()) {
+ newChildren.add(visitSelectorFilters(child, visitAction));
+ }
+ return new OrFilter(newChildren);
+ } else if (filter instanceof NotFilter) {
+ Filter child = ((NotFilter) filter).getBaseFilter();
+ return new NotFilter(visitSelectorFilters(child, visitAction));
+ } else if (filter instanceof SelectorFilter) {
+ return visitAction.apply((SelectorFilter) filter);
+ }
+ return filter;
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java
index 5ffd167..323b6f2 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterPartitionTest.java
@@ -621,7 +621,7 @@ public class FilterPartitionTest extends BaseFilterTest
);
Filter filter1 = dimFilter1.toFilter();
- Filter filter1CNF = Filters.toCNF(filter1);
+ Filter filter1CNF = Filters.toCnf(filter1);
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
@@ -675,7 +675,7 @@ public class FilterPartitionTest extends BaseFilterTest
);
Filter filter1 = dimFilter1.toFilter();
- Filter filter1CNF = Filters.toCNF(filter1);
+ Filter filter1CNF = Filters.toCnf(filter1);
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FiltersTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FiltersTest.java
index 70b527d..f9d493a 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/FiltersTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/FiltersTest.java
@@ -25,7 +25,6 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ConciseBitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
-import org.apache.druid.query.filter.Filter;
import org.apache.druid.segment.IntIteratorUtils;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
@@ -52,215 +51,6 @@ public class FiltersTest extends InitializedNullHandlingTest
Assert.assertEquals(expected, estimated, 0.00001);
}
- @Test
- public void testPushDownNot()
- {
- final Filter filter = FilterTestUtils.not(
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "1"),
- FilterTestUtils.selector("col2", "2"),
- FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
- )
- );
- final Filter expected = FilterTestUtils.or(
- FilterTestUtils.not(FilterTestUtils.selector("col1", "1")),
- FilterTestUtils.not(FilterTestUtils.selector("col2", "2")),
- FilterTestUtils.selector("col3", "3")
- );
- Assert.assertEquals(expected, Filters.pushDownNot(filter));
- }
-
- @Test
- public void testPushDownNotLeafNot()
- {
- final Filter filter = FilterTestUtils.and(
- FilterTestUtils.selector("col1", "1"),
- FilterTestUtils.selector("col2", "2"),
- FilterTestUtils.not(FilterTestUtils.selector("col3", "3"))
- );
- Assert.assertEquals(filter, Filters.pushDownNot(filter));
- }
-
- @Test
- public void testFlatten()
- {
- final Filter filter = FilterTestUtils.and(
- FilterTestUtils.and(
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "1"),
- FilterTestUtils.selector("col2", "2")
- )
- ),
- FilterTestUtils.selector("col3", "3")
- );
- final Filter expected = FilterTestUtils.and(
- FilterTestUtils.selector("col1", "1"),
- FilterTestUtils.selector("col2", "2"),
- FilterTestUtils.selector("col3", "3")
- );
- Assert.assertEquals(expected, Filters.flatten(filter));
- }
-
- @Test
- public void testFlattenUnflattenable()
- {
- final Filter filter = FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "1"),
- FilterTestUtils.selector("col2", "2")
- ),
- FilterTestUtils.selector("col3", "3")
- );
- Assert.assertEquals(filter, Filters.flatten(filter));
- }
-
- @Test
- public void testToCNFWithMuchReducibleFilter()
- {
- final Filter muchReducible = FilterTestUtils.and(
- // should be flattened
- FilterTestUtils.and(
- FilterTestUtils.and(
- FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
- )
- ),
- // should be flattened
- FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.and(FilterTestUtils.selector("col1", "val1"))
- )
- ),
- // should be flattened
- FilterTestUtils.or(
- FilterTestUtils.and(
- FilterTestUtils.or(FilterTestUtils.selector("col1", "val1"))
- )
- ),
- // should eliminate duplicate filters
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val2"),
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val2")
- ),
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.and(
- FilterTestUtils.selector("col2", "val2"),
- FilterTestUtils.selector("col1", "val1")
- )
- )
- );
- final Filter expected = FilterTestUtils.and(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val2")
- );
- Assert.assertEquals(expected, Filters.toCNF(muchReducible));
- }
-
- @Test
- public void testToCNFWithComplexFilterIncludingNotAndOr()
- {
- final Filter filter = FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val2")
- ),
- FilterTestUtils.not(
- FilterTestUtils.and(
- FilterTestUtils.selector("col4", "val4"),
- FilterTestUtils.selector("col5", "val5")
- )
- )
- ),
- FilterTestUtils.or(
- FilterTestUtils.not(
- FilterTestUtils.or(
- FilterTestUtils.selector("col2", "val2"),
- FilterTestUtils.selector("col4", "val4"),
- FilterTestUtils.selector("col5", "val5")
- )
- ),
- FilterTestUtils.and(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col3", "val3")
- )
- ),
- FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val22"), // selecting different value
- FilterTestUtils.selector("col3", "val3")
- ),
- FilterTestUtils.not(
- FilterTestUtils.selector("col1", "val11")
- )
- ),
- FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val22"),
- FilterTestUtils.selector("col3", "val3")
- ),
- FilterTestUtils.not(
- FilterTestUtils.selector("col1", "val11") // selecting different value
- )
- )
- );
- final Filter expected = FilterTestUtils.and(
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.selector("col2", "val22"),
- FilterTestUtils.selector("col3", "val3")
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.not(FilterTestUtils.selector("col2", "val2"))
- ),
- FilterTestUtils.or(
- FilterTestUtils.not(FilterTestUtils.selector("col2", "val2")),
- FilterTestUtils.selector("col3", "val3")
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col3", "val3"),
- FilterTestUtils.not(FilterTestUtils.selector("col4", "val4"))
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col3", "val3"),
- FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
- ),
- FilterTestUtils.not(FilterTestUtils.selector("col1", "val11")),
- // The below OR filter could be eliminated because this filter also has
- // (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
- // The reduction process would be
- // (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
- // => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5)) && (col1 = val1 || ~(col4 = val4) || ~(col5 = val5))
- // => (col1 = val1 && ~(col4 = val4) || ~(col5 = val5))
- // => (col1 = val1 || ~(col4 = val4)) && (col1 = val1 || ~(col5 = val5)).
- // However, we don't have this reduction now, so we have a filter in a suboptimized CNF.
- FilterTestUtils.or(
- FilterTestUtils.selector("col1", "val1"),
- FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
- FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
- ),
- FilterTestUtils.or(
- FilterTestUtils.selector("col2", "val2"),
- FilterTestUtils.not(FilterTestUtils.selector("col4", "val4")),
- FilterTestUtils.not(FilterTestUtils.selector("col5", "val5"))
- )
- );
- Assert.assertEquals(expected, Filters.toCNF(filter));
- }
-
private static BitmapIndex getBitmapIndex(final List<ImmutableBitmap> bitmapList)
{
return new BitmapIndex()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org