You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/04/18 17:32:43 UTC

[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #8513: push counts down to range index when v2 is in use

Jackie-Jiang commented on code in PR #8513:
URL: https://github.com/apache/pinot/pull/8513#discussion_r852266617


##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java:
##########
@@ -26,6 +26,51 @@
  * @param <T>
  */
 public interface RangeIndexReader<T> extends Closeable {
+
+  /**
+   * @return true if the results are exact and don't need refinement by scanning.
+   * This means {@see getPartiallyMatchingDocIds} will return null.
+   */
+  default boolean isExact() {
+    return true;
+  }

Review Comment:
   (minor) Add an empty line after



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java:
##########
@@ -26,6 +26,51 @@
  * @param <T>
  */
 public interface RangeIndexReader<T> extends Closeable {
+
+  /**
+   * @return true if the results are exact and don't need refinement by scanning.
+   * This means {@see getPartiallyMatchingDocIds} will return null.
+   */
+  default boolean isExact() {
+    return true;
+  }
+  /**
+   * Returns the number of docs with a value between min and max, both inclusive.
+   * Doc ids returned by this method must correspond to values which
+   * satisfy the query.
+   * @param min the inclusive lower bound.
+   * @param max the inclusive upper bound.
+   * @return the matching doc ids.
+   */
+  int getMatchingDocCount(int min, int max);

Review Comment:
   (minor) Suggest renaming to `getNumMatchingDocs` to be consistent with `BaseFilterOperator`



##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java:
##########
@@ -192,7 +194,9 @@ private void testLong(Dataset<long[]> dataset)
       for (long quantile : dataset.quantiles()) {
         ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
         ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile);
+        int resultCount = reader.getMatchingDocCount(prev, quantile);
         assertEquals(result, reference);
+        assertEquals(resultCount, result == null ? 0 : result.getCardinality());

Review Comment:
   Per the implementation seems `result` can never be `null`, and we rely on it not being `null` in `RangeIndexBasedFilterOperator.getNextBlock()` or it will throw NPE



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java:
##########
@@ -145,10 +127,178 @@ public List<Operator> getChildOperators() {
 
   @Override
   public String toExplainString() {
-    StringBuilder stringBuilder = new StringBuilder(EXPLAIN_NAME).append("(indexLookUp:range_index");
-    stringBuilder.append(",operator:").append(_rangePredicateEvaluator.getPredicateType());
-    stringBuilder.append(",predicate:").append(_rangePredicateEvaluator.getPredicate().toString());
-    return stringBuilder.append(')').toString();
+    return EXPLAIN_NAME + "(indexLookUp:range_index"
+        + ",operator:" + _rangePredicateEvaluator.getPredicateType()
+        + ",predicate:" + _rangePredicateEvaluator.getPredicate().toString()
+        + ')';
+  }
+
+  interface RangeEvaluator {
+    static RangeEvaluator of(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader,
+        PredicateEvaluator predicateEvaluator) {
+      if (predicateEvaluator.isDictionaryBased()) {
+        return new IntRangeEvaluator(rangeIndexReader, predicateEvaluator);
+      } else {
+        switch (predicateEvaluator.getDataType()) {
+          case INT:
+            return new IntRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case LONG:
+            return new LongRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case FLOAT:
+            return new FloatRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case DOUBLE:
+            return new DoubleRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          default:
+            throw new IllegalStateException("String and Bytes data type not supported for Range Indexing");
+        }
+      }
+    }
+
+    ImmutableRoaringBitmap getMatchingDocIds();
+
+    ImmutableRoaringBitmap getPartiallyMatchingDocIds();
+
+    int getMatchingDocCount();

Review Comment:
   (minor) Suggest renaming to `getNumMatchingDocs` to be consistent with `BaseFilterOperator`



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java:
##########
@@ -145,10 +127,178 @@ public List<Operator> getChildOperators() {
 
   @Override
   public String toExplainString() {
-    StringBuilder stringBuilder = new StringBuilder(EXPLAIN_NAME).append("(indexLookUp:range_index");
-    stringBuilder.append(",operator:").append(_rangePredicateEvaluator.getPredicateType());
-    stringBuilder.append(",predicate:").append(_rangePredicateEvaluator.getPredicate().toString());
-    return stringBuilder.append(')').toString();
+    return EXPLAIN_NAME + "(indexLookUp:range_index"
+        + ",operator:" + _rangePredicateEvaluator.getPredicateType()
+        + ",predicate:" + _rangePredicateEvaluator.getPredicate().toString()
+        + ')';
+  }
+
+  interface RangeEvaluator {
+    static RangeEvaluator of(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader,
+        PredicateEvaluator predicateEvaluator) {
+      if (predicateEvaluator.isDictionaryBased()) {
+        return new IntRangeEvaluator(rangeIndexReader, predicateEvaluator);
+      } else {
+        switch (predicateEvaluator.getDataType()) {
+          case INT:
+            return new IntRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case LONG:
+            return new LongRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case FLOAT:
+            return new FloatRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          case DOUBLE:
+            return new DoubleRangeEvaluator(rangeIndexReader, predicateEvaluator);
+          default:
+            throw new IllegalStateException("String and Bytes data type not supported for Range Indexing");
+        }
+      }
+    }
+
+    ImmutableRoaringBitmap getMatchingDocIds();
+
+    ImmutableRoaringBitmap getPartiallyMatchingDocIds();
+
+    int getMatchingDocCount();
+
+    boolean isExact();
+  }
+
+  private static final class IntRangeEvaluator implements RangeEvaluator {
+    final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader;
+    final int _min;
+    final int _max;
+
+    IntRangeEvaluator(
+        RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, PredicateEvaluator predicateEvaluator) {
+      _rangeIndexReader = rangeIndexReader;
+      if (predicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) {
+        // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator.
+        _min = ((SortedDictionaryBasedRangePredicateEvaluator) predicateEvaluator).getStartDictId();
+        _max = ((SortedDictionaryBasedRangePredicateEvaluator) predicateEvaluator).getEndDictId() - 1;
+      } else {
+        _min = ((IntRawValueBasedRangePredicateEvaluator) predicateEvaluator).getLowerBound();
+        _max = ((IntRawValueBasedRangePredicateEvaluator) predicateEvaluator).getUpperBound();
+      }
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getMatchingDocIds() {
+      return _rangeIndexReader.getMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getPartiallyMatchingDocIds() {
+      return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public int getMatchingDocCount() {
+      return _rangeIndexReader.getMatchingDocCount(_min, _max);
+    }
+
+    @Override
+    public boolean isExact() {
+      return _rangeIndexReader.isExact();
+    }
+  }
+
+  private static final class LongRangeEvaluator implements RangeEvaluator {
+    final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader;
+    final long _min;
+    final long _max;
+
+    LongRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader,
+        PredicateEvaluator predicateEvaluator) {
+      _rangeIndexReader = rangeIndexReader;
+      _min = ((LongRawValueBasedRangePredicateEvaluator) predicateEvaluator).getLowerBound();
+      _max = ((LongRawValueBasedRangePredicateEvaluator) predicateEvaluator).getUpperBound();
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getMatchingDocIds() {
+      return _rangeIndexReader.getMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getPartiallyMatchingDocIds() {
+      return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public int getMatchingDocCount() {
+      return _rangeIndexReader.getMatchingDocCount(_min, _max);
+    }
+
+    @Override
+    public boolean isExact() {
+      return _rangeIndexReader.isExact();
+    }
+  }
+
+  private static final class FloatRangeEvaluator implements RangeEvaluator {
+    final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader;
+    final float _min;
+    final float _max;
+
+    FloatRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader,
+        PredicateEvaluator predicateEvaluator) {
+      _rangeIndexReader = rangeIndexReader;
+      _min = ((FloatRawValueBasedRangePredicateEvaluator) predicateEvaluator).geLowerBound();
+      _max = ((FloatRawValueBasedRangePredicateEvaluator) predicateEvaluator).getUpperBound();
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getMatchingDocIds() {
+      return _rangeIndexReader.getMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public ImmutableRoaringBitmap getPartiallyMatchingDocIds() {
+      return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max);
+    }
+
+    @Override
+    public int getMatchingDocCount() {
+      return _rangeIndexReader.getMatchingDocCount(_min, _max);
+    }
+
+    @Override
+    public boolean isExact() {
+      return _rangeIndexReader.isExact();
+    }
+  }
+
+  private static final class DoubleRangeEvaluator implements RangeEvaluator {
+    final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader;
+    final double _min;
+    final double _max;
+
+    DoubleRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader,
+        PredicateEvaluator predicateEvaluator) {
+      _rangeIndexReader = rangeIndexReader;
+      _min = ((FloatRawValueBasedRangePredicateEvaluator) predicateEvaluator).geLowerBound();

Review Comment:
   (MAJOR) This should be `DoubleRawValueBasedRangePredicateEvaluator`. We should probably add a test which can catch this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org