You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xb...@apache.org on 2023/06/28 14:08:03 UTC

[pinot] branch master updated: Add a PrioritizedFilterOperator (#10953)

This is an automated email from the ASF dual-hosted git repository.

xbli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 45a32eaaad Add a PrioritizedFilterOperator (#10953)
45a32eaaad is described below

commit 45a32eaaad594955754732ec18ee78abeb8e96b8
Author: Gonzalo Ortiz Jaureguizar <go...@users.noreply.github.com>
AuthorDate: Wed Jun 28 16:07:55 2023 +0200

    Add a PrioritizedFilterOperator (#10953)
    
    * Add a PrioritizedFilterOperator which is used when FilterOperatorUtils tries to sort a bunch of operators.
    
    This is needed by new operators that are not included in the compilation unit. This improves FilterOperatorUtils extensibility, although a redesign may be convenient.
    
    * Multiply most priorities by 100 in order to have more space between them
    
    * Change default FilterOperatorUtils to do not fail when an operator is not recognized but prioritized it last.
---
 .../core/operator/filter/FilterOperatorUtils.java  | 29 +++++---
 .../operator/filter/PrioritizedFilterOperator.java | 52 +++++++++++++
 .../operator/filter/FilterOperatorUtilsTest.java   | 86 ++++++++++++++++++++++
 3 files changed, 156 insertions(+), 11 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index 556f6e66ad..f81cc9bc80 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -21,6 +21,7 @@ package org.apache.pinot.core.operator.filter;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import java.util.OptionalInt;
 import org.apache.pinot.common.request.context.predicate.Predicate;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.apache.pinot.core.query.request.context.QueryContext;
@@ -175,7 +176,7 @@ public class FilterOperatorUtils {
 
 
     /**
-     * For AND filter operator, reorders its child filter operators based on the their cost and puts the ones with
+     * For AND filter operator, reorders its child filter operators based on their cost and puts the ones with
      * inverted index first in order to reduce the number of documents to be processed.
      * <p>Special filter operators such as {@link MatchAllFilterOperator} and {@link EmptyFilterOperator} should be
      * removed from the list before calling this method.
@@ -188,36 +189,42 @@ public class FilterOperatorUtils {
         }
 
         int getPriority(BaseFilterOperator filterOperator) {
+          if (filterOperator instanceof PrioritizedFilterOperator) {
+            OptionalInt priority = ((PrioritizedFilterOperator<?>) filterOperator).getPriority();
+            if (priority.isPresent()) {
+              return priority.getAsInt();
+            }
+          }
           if (filterOperator instanceof SortedIndexBasedFilterOperator) {
-            return 0;
+            return PrioritizedFilterOperator.HIGH_PRIORITY;
           }
           if (filterOperator instanceof BitmapBasedFilterOperator) {
-            return 1;
+            return PrioritizedFilterOperator.MEDIUM_PRIORITY;
           }
           if (filterOperator instanceof RangeIndexBasedFilterOperator
               || filterOperator instanceof TextContainsFilterOperator
               || filterOperator instanceof TextMatchFilterOperator || filterOperator instanceof JsonMatchFilterOperator
               || filterOperator instanceof H3IndexFilterOperator
               || filterOperator instanceof H3InclusionIndexFilterOperator) {
-            return 2;
+            return PrioritizedFilterOperator.LOW_PRIORITY;
           }
           if (filterOperator instanceof AndFilterOperator) {
-            return 3;
+            return PrioritizedFilterOperator.AND_PRIORITY;
           }
           if (filterOperator instanceof OrFilterOperator) {
-            return 4;
+            return PrioritizedFilterOperator.OR_PRIORITY;
           }
           if (filterOperator instanceof NotFilterOperator) {
             return getPriority(((NotFilterOperator) filterOperator).getChildFilterOperator());
           }
           if (filterOperator instanceof ScanBasedFilterOperator) {
-            return getScanBasedFilterPriority(queryContext, (ScanBasedFilterOperator) filterOperator, 5);
+            int basePriority = PrioritizedFilterOperator.SCAN_PRIORITY;
+            return getScanBasedFilterPriority(queryContext, (ScanBasedFilterOperator) filterOperator, basePriority);
           }
           if (filterOperator instanceof ExpressionFilterOperator) {
-            return 10;
+            return PrioritizedFilterOperator.EXPRESSION_PRIORITY;
           }
-          throw new IllegalStateException(filterOperator.getClass().getSimpleName()
-              + " should not be reordered, remove it from the list before calling this method");
+          return PrioritizedFilterOperator.UNKNOWN_FILTER_PRIORITY;
         }
       });
     }
@@ -232,7 +239,7 @@ public class FilterOperatorUtils {
         return basePriority;
       } else {
         // Lower priority for multi-value column
-        return basePriority + 1;
+        return basePriority + 50;
       }
     }
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/PrioritizedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/PrioritizedFilterOperator.java
new file mode 100644
index 0000000000..d847a34a6d
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/PrioritizedFilterOperator.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import java.util.OptionalInt;
+import org.apache.pinot.core.common.Block;
+import org.apache.pinot.core.common.Operator;
+
+
+/**
+ * Operators that implements this interface can define how to be sorted in an AND, as defined in
+ * {@link FilterOperatorUtils.Implementation#}.
+ */
+public interface PrioritizedFilterOperator<T extends Block> extends Operator<T> {
+
+  int HIGH_PRIORITY = 0;
+  int MEDIUM_PRIORITY = 100;
+  int LOW_PRIORITY = 200;
+  int AND_PRIORITY = 300;
+  int OR_PRIORITY = 400;
+  int SCAN_PRIORITY = 500;
+  int EXPRESSION_PRIORITY = 1000;
+  int UNKNOWN_FILTER_PRIORITY = 10000;
+
+  /**
+   * Priority is a number that is used to compare different filters. Some predicates, like AND, sort their sub
+   * predicates in order to first apply the ones that should be more efficient.
+   *
+   * For example, {@link SortedIndexBasedFilterOperator} is assigned to {@link #HIGH_PRIORITY},
+   * {@link BitmapBasedFilterOperator} is assigned to {@link #MEDIUM_PRIORITY} and {@link H3IndexFilterOperator} to
+   * {@link #LOW_PRIORITY}
+   *
+   * @return the priority of this filter operator or an empty optional if no special priority should be used.
+   */
+  OptionalInt getPriority();
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java
index ef5f781fce..8eaae20962 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/FilterOperatorUtilsTest.java
@@ -18,12 +18,21 @@
  */
 package org.apache.pinot.core.operator.filter;
 
+import com.google.common.collect.Lists;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
+import java.util.OptionalInt;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
 import org.apache.pinot.core.query.request.context.QueryContext;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertTrue;
 
 
@@ -101,4 +110,81 @@ public class FilterOperatorUtilsTest {
         Arrays.asList(MATCH_ALL_FILTER_OPERATOR, REGULAR_FILTER_OPERATOR), NUM_DOCS);
     assertTrue(filterOperator instanceof MatchAllFilterOperator);
   }
+
+  @DataProvider
+  public static Object[][] priorities() {
+    SortedIndexBasedFilterOperator sorted = mock(SortedIndexBasedFilterOperator.class);
+    BitmapBasedFilterOperator bitmap = mock(BitmapBasedFilterOperator.class);
+    RangeIndexBasedFilterOperator range = mock(RangeIndexBasedFilterOperator.class);
+    TextContainsFilterOperator textContains = mock(TextContainsFilterOperator.class);
+    TextMatchFilterOperator textMatch = mock(TextMatchFilterOperator.class);
+    JsonMatchFilterOperator jsonMatch = mock(JsonMatchFilterOperator.class);
+    H3IndexFilterOperator h3 = mock(H3IndexFilterOperator.class);
+    H3InclusionIndexFilterOperator h3Inclusion = mock(H3InclusionIndexFilterOperator.class);
+    AndFilterOperator andFilterOperator = mock(AndFilterOperator.class);
+    OrFilterOperator orFilterOperator = mock(OrFilterOperator.class);
+    NotFilterOperator notWithHighPriority = new NotFilterOperator(sorted, NUM_DOCS);
+    NotFilterOperator notWithLowPriority = new NotFilterOperator(orFilterOperator, NUM_DOCS);
+
+    ExpressionFilterOperator expression = mock(ExpressionFilterOperator.class);
+    BaseFilterOperator unknown = mock(BaseFilterOperator.class);
+
+    MockedPrioritizedFilterOperator prioritizedBetweenSortedAndBitmap = mock(MockedPrioritizedFilterOperator.class);
+    OptionalInt betweenSortedAndBitmapPriority =
+        OptionalInt.of((PrioritizedFilterOperator.HIGH_PRIORITY + PrioritizedFilterOperator.MEDIUM_PRIORITY) / 2);
+    when(prioritizedBetweenSortedAndBitmap.getPriority())
+        .thenReturn(betweenSortedAndBitmapPriority);
+
+    MockedPrioritizedFilterOperator notPrioritized = mock(MockedPrioritizedFilterOperator.class);
+    when(prioritizedBetweenSortedAndBitmap.getPriority())
+        .thenReturn(OptionalInt.empty());
+
+    List<? extends List<? extends BaseFilterOperator>> expectedOrder = Lists.newArrayList(
+        Lists.newArrayList(sorted, notWithHighPriority),
+        Lists.newArrayList(bitmap),
+        Lists.newArrayList(range, textContains, textMatch, jsonMatch, h3, h3Inclusion),
+        Lists.newArrayList(andFilterOperator),
+        Lists.newArrayList(orFilterOperator, notWithLowPriority),
+        Lists.newArrayList(expression),
+        Lists.newArrayList(unknown, notPrioritized)
+    );
+
+    List<Object[]> cases = new ArrayList<>();
+    for (int i = 0; i < expectedOrder.size(); i++) {
+      List<? extends BaseFilterOperator> currentOps = expectedOrder.get(i);
+      for (BaseFilterOperator highPriorityOp : currentOps) {
+        for (int j = i + 1; j < expectedOrder.size(); j++) {
+          List<? extends BaseFilterOperator> lowerPriorityOps = expectedOrder.get(j);
+          for (BaseFilterOperator lowerPriorityOp : lowerPriorityOps) {
+            cases.add(new Object[] {highPriorityOp, lowerPriorityOp});
+          }
+        }
+      }
+    }
+    return cases.toArray(new Object[][]{});
+  }
+
+  @Test(dataProvider = "priorities")
+  public void testPriority(BaseFilterOperator highPriorty, BaseFilterOperator lowerPriorty) {
+    ArrayList<BaseFilterOperator> unsorted = Lists.newArrayList(lowerPriorty, highPriorty);
+    BaseFilterOperator filterOperator =
+        FilterOperatorUtils.getAndFilterOperator(QUERY_CONTEXT, unsorted, NUM_DOCS);
+    assertTrue(filterOperator instanceof AndFilterOperator);
+    List<Operator> actualChildOperators = ((AndFilterOperator) filterOperator).getChildOperators();
+    assertEquals(actualChildOperators, Lists.newArrayList(highPriorty, lowerPriorty), "Filter " + highPriorty
+        + " should have more priority than filter " + lowerPriorty);
+  }
+
+  private void assertOrder(BaseFilterOperator first, BaseFilterOperator second) {
+    BaseFilterOperator filterOperator =
+        FilterOperatorUtils.getAndFilterOperator(QUERY_CONTEXT, Lists.newArrayList(second, first), NUM_DOCS);
+    assertTrue(filterOperator instanceof AndFilterOperator);
+    List<Operator> actualChildOperators = ((AndFilterOperator) filterOperator).getChildOperators();
+    assertEquals(actualChildOperators, Lists.newArrayList(first, second), "Filter " + first + " should have "
+        + "more priority than filter " + second);
+  }
+
+  private static abstract class MockedPrioritizedFilterOperator extends BaseFilterOperator
+      implements PrioritizedFilterOperator<FilterBlock> {
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org