You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2020/11/30 20:37:04 UTC

[incubator-pinot] branch master updated: Use sorted index based filtering only for dictionary encoded column (#6288)

This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 3eb0f9c  Use sorted index based filtering only for dictionary encoded column (#6288)
3eb0f9c is described below

commit 3eb0f9c9daf3e176d41775af515eeb982ff2daeb
Author: Sidd <si...@gmail.com>
AuthorDate: Mon Nov 30 12:36:50 2020 -0800

    Use sorted index based filtering only for dictionary encoded column (#6288)
    
    * Use sorted index based filtering
    only for sorted column with dictionary
    
    * Address review comments
    
    Co-authored-by: Siddharth Teotia <st...@steotia-mn1.linkedin.biz>
---
 .../core/operator/filter/FilterOperatorUtils.java  | 10 +++++--
 .../RangePredicateWithSortedInvertedIndexTest.java | 32 ++++++++++++++++++++--
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index b497031..a0b7cae 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -47,9 +47,15 @@ public class FilterOperatorUtils {
       return new MatchAllFilterOperator(numDocs);
     }
 
+    // Currently sorted index based filtering is supported only for
+    // dictionary encoded columns. The on-disk segment metadata
+    // will indicate if the column is sorted or not regardless of
+    // whether it is raw or dictionary encoded. Here when creating
+    // the filter operator, we need to make sure that sort filter
+    // operator is used only if the column is sorted and has dictionary.
     Predicate.Type predicateType = predicateEvaluator.getPredicateType();
     if (predicateType == Predicate.Type.RANGE) {
-      if (dataSource.getDataSourceMetadata().isSorted()) {
+      if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) {
         return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
       }
       if (dataSource.getRangeIndex() != null) {
@@ -59,7 +65,7 @@ public class FilterOperatorUtils {
     } else if (predicateType == Predicate.Type.REGEXP_LIKE) {
       return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
     } else {
-      if (dataSource.getDataSourceMetadata().isSorted()) {
+      if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) {
         return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
       }
       if (dataSource.getInvertedIndex() != null) {
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
index cd97723..9539969 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
@@ -59,8 +59,9 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
   private static final String SEGMENT_NAME = "testSegment";
 
   private static final String D1 = "STRING_COL";
-  private static final String M1 = "INT_COL"; // sorted column
+  private static final String M1 = "INT_COL"; // sorted column (dictionary encoded)
   private static final String M2 = "LONG_COL";
+  private static final String M3 = "INT_COL_RAW"; // sorted raw column
 
   private static final int NUM_ROWS = 30000;
   private static final int INT_BASE_VALUE = 0;
@@ -117,13 +118,17 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
       row.putValue(M1, INT_BASE_VALUE + rowIndex);
       _longValues[rowIndex] = RANDOM.nextLong();
       row.putValue(M2, _longValues[rowIndex]);
+      row.putValue(M3, INT_BASE_VALUE + rowIndex);
       rows.add(row);
     }
 
-    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
+    TableConfig tableConfig =
+        new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(Arrays.asList(M3))
+            .build();
     Schema schema =
         new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension(D1, FieldSpec.DataType.STRING)
-            .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG).build();
+            .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG)
+            .addMetric(M3, FieldSpec.DataType.INT).build();
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
     config.setOutDir(INDEX_DIR.getPath());
     config.setTableName(TABLE_NAME);
@@ -142,14 +147,35 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
     Pairs.IntPair pair = new Pairs.IntPair(20000, 29999);
     runQuery(query, 10000, Lists.newArrayList(pair), 2);
 
+    // test with sorted column without dictionary
+    // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+    // else this test will fail
+    query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL_RAW >= 20000 LIMIT 100000";
+    pair = new Pairs.IntPair(20000, 29999);
+    runQuery(query, 10000, Lists.newArrayList(pair), 2);
+
     query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 LIMIT 100000";
     pair = new Pairs.IntPair(20000, 23666);
     runQuery(query, 3667, Lists.newArrayList(pair), 2);
 
+    // test with sorted column without dictionary
+    // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+    // else this test will fail
+    query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 AND INT_COL_RAW <= 23666 LIMIT 100000";
+    pair = new Pairs.IntPair(20000, 23666);
+    runQuery(query, 3667, Lists.newArrayList(pair), 2);
+
     query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL <= 20000 LIMIT 100000";
     pair = new Pairs.IntPair(0, 20000);
     runQuery(query, 20001, Lists.newArrayList(pair), 2);
 
+    // test with sorted column without dictionary
+    // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+    // else this test will fail
+    query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL_RAW = 20000 LIMIT 100000";
+    pair = new Pairs.IntPair(20000, 20000);
+    runQuery(query, 1, Lists.newArrayList(pair), 2);
+
     String filter = "WHERE (INT_COL >= 15000 AND INT_COL <= 16665) OR (INT_COL >= 18000 AND INT_COL <= 19887)";
     query = "SELECT STRING_COL, INT_COL FROM testTable " + filter + " LIMIT 100000";
     pair = new Pairs.IntPair(15000, 16665);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org