You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2020/11/30 20:37:04 UTC
[incubator-pinot] branch master updated: Use sorted index based
filtering only for dictionary encoded column (#6288)
This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 3eb0f9c Use sorted index based filtering only for dictionary encoded column (#6288)
3eb0f9c is described below
commit 3eb0f9c9daf3e176d41775af515eeb982ff2daeb
Author: Sidd <si...@gmail.com>
AuthorDate: Mon Nov 30 12:36:50 2020 -0800
Use sorted index based filtering only for dictionary encoded column (#6288)
* Use sorted index based filtering
only for sorted column with dictionary
* Address review comments
Co-authored-by: Siddharth Teotia <st...@steotia-mn1.linkedin.biz>
---
.../core/operator/filter/FilterOperatorUtils.java | 10 +++++--
.../RangePredicateWithSortedInvertedIndexTest.java | 32 ++++++++++++++++++++--
2 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index b497031..a0b7cae 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -47,9 +47,15 @@ public class FilterOperatorUtils {
return new MatchAllFilterOperator(numDocs);
}
+ // Currently sorted index based filtering is supported only for
+ // dictionary encoded columns. The on-disk segment metadata
+ // will indicate if the column is sorted or not regardless of
+ // whether it is raw or dictionary encoded. Here when creating
+ // the filter operator, we need to make sure that sort filter
+ // operator is used only if the column is sorted and has dictionary.
Predicate.Type predicateType = predicateEvaluator.getPredicateType();
if (predicateType == Predicate.Type.RANGE) {
- if (dataSource.getDataSourceMetadata().isSorted()) {
+ if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) {
return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
}
if (dataSource.getRangeIndex() != null) {
@@ -59,7 +65,7 @@ public class FilterOperatorUtils {
} else if (predicateType == Predicate.Type.REGEXP_LIKE) {
return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
} else {
- if (dataSource.getDataSourceMetadata().isSorted()) {
+ if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) {
return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs);
}
if (dataSource.getInvertedIndex() != null) {
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
index cd97723..9539969 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java
@@ -59,8 +59,9 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
private static final String SEGMENT_NAME = "testSegment";
private static final String D1 = "STRING_COL";
- private static final String M1 = "INT_COL"; // sorted column
+ private static final String M1 = "INT_COL"; // sorted column (dictionary encoded)
private static final String M2 = "LONG_COL";
+ private static final String M3 = "INT_COL_RAW"; // sorted raw column
private static final int NUM_ROWS = 30000;
private static final int INT_BASE_VALUE = 0;
@@ -117,13 +118,17 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
row.putValue(M1, INT_BASE_VALUE + rowIndex);
_longValues[rowIndex] = RANDOM.nextLong();
row.putValue(M2, _longValues[rowIndex]);
+ row.putValue(M3, INT_BASE_VALUE + rowIndex);
rows.add(row);
}
- TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
+ TableConfig tableConfig =
+ new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(Arrays.asList(M3))
+ .build();
Schema schema =
new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension(D1, FieldSpec.DataType.STRING)
- .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG).build();
+ .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG)
+ .addMetric(M3, FieldSpec.DataType.INT).build();
SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
config.setOutDir(INDEX_DIR.getPath());
config.setTableName(TABLE_NAME);
@@ -142,14 +147,35 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest {
Pairs.IntPair pair = new Pairs.IntPair(20000, 29999);
runQuery(query, 10000, Lists.newArrayList(pair), 2);
+ // test with sorted column without dictionary
+ // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+ // else this test will fail
+ query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL_RAW >= 20000 LIMIT 100000";
+ pair = new Pairs.IntPair(20000, 29999);
+ runQuery(query, 10000, Lists.newArrayList(pair), 2);
+
query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 LIMIT 100000";
pair = new Pairs.IntPair(20000, 23666);
runQuery(query, 3667, Lists.newArrayList(pair), 2);
+ // test with sorted column without dictionary
+ // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+ // else this test will fail
+ query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 AND INT_COL_RAW <= 23666 LIMIT 100000";
+ pair = new Pairs.IntPair(20000, 23666);
+ runQuery(query, 3667, Lists.newArrayList(pair), 2);
+
query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL <= 20000 LIMIT 100000";
pair = new Pairs.IntPair(0, 20000);
runQuery(query, 20001, Lists.newArrayList(pair), 2);
+ // test with sorted column without dictionary
+ // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW
+ // else this test will fail
+ query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL_RAW = 20000 LIMIT 100000";
+ pair = new Pairs.IntPair(20000, 20000);
+ runQuery(query, 1, Lists.newArrayList(pair), 2);
+
String filter = "WHERE (INT_COL >= 15000 AND INT_COL <= 16665) OR (INT_COL >= 18000 AND INT_COL <= 19887)";
query = "SELECT STRING_COL, INT_COL FROM testTable " + filter + " LIMIT 100000";
pair = new Pairs.IntPair(15000, 16665);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org