You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wy...@apache.org on 2023/09/18 17:42:55 UTC
[asterixdb] branch master updated: [ASTERIXDB-3262][COMP] Enable columnar filters by default
This is an automated email from the ASF dual-hosted git repository.
wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 6d9275b51a [ASTERIXDB-3262][COMP] Enable columnar filters by default
6d9275b51a is described below
commit 6d9275b51a443ec42a5a907dc3f65dd94be3541b
Author: Wail Alkowaileet <wa...@gmail.com>
AuthorDate: Sat Sep 16 11:22:46 2023 -0700
[ASTERIXDB-3262][COMP] Enable columnar filters by default
- user model changes: yes
- storage format changes: no
- interface changes: no
Details:
Make the use of columnar filters enabled by default.
- Fix ASTERIXDB-3265: The array filter reader has
its delimiters reversed
- Fix ASTERIXDB-3264: Columnar range-filter doesn't correctly
evaluate equality if a constant is the left operand
Change-Id: I9306db62ff0e991efed30346a8847b9edae9393e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17781
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wa...@gmail.com>
Reviewed-by: Hussain Towaileb <hu...@gmail.com>
---
.../queries/column-pushdown/meta.001.sqlpp | 1 +
.../api/cluster_state_1/cluster_state_1.1.regexadm | 2 +-
.../cluster_state_1_full.1.regexadm | 2 +-
.../cluster_state_1_less.1.regexadm | 2 +-
.../array-access-pushdown.010.plan | 2 +-
.../other-pushdowns/other-pushdowns.008.plan | 2 +-
.../other-pushdowns/other-pushdowns.010.plan | 2 +-
.../array-access-pushdown.010.plan | 2 +-
.../other-pushdowns/other-pushdowns.008.plan | 2 +-
.../other-pushdowns/other-pushdowns.010.plan | 2 +-
.../schema/visitor/PathExtractorVisitor.java | 7 +++++-
.../utils/filter/ColumnRangeFilterBuilder.java | 26 +++++++++++++++-------
.../algebricks/core/config/AlgebricksConfig.java | 2 +-
13 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
index e5e90c1915..6ef6e72313 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
@@ -40,6 +40,7 @@
CREATE DATASET `nation`(CH2Type) WITH META(CH2MetaType) PRIMARY KEY META().uid WITH {"storage-format": {"format": "column"}};
CREATE DATASET `region`(CH2Type) WITH META(CH2MetaType) PRIMARY KEY META().uid WITH {"storage-format": {"format": "column"}};
+SET `compiler.column.filter` "false";
SELECT s.s_i_id, SUM(s.s_order_cnt) as ordercount
FROM nation n, supplier su, stock s
WHERE s.s_w_id * s.s_i_id MOD 10000 = su.su_suppkey
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index a67d32617c..1c3b061499 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -21,7 +21,7 @@
"compiler\.arrayindex" : true,
"compiler.batch.lookup" : true,
"compiler.cbo" : true,
- "compiler.column.filter" : false,
+ "compiler.column.filter" : true,
"compiler\.external\.field\.pushdown" : true,
"compiler.forcejoinorder" : false,
"compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 0e90266da4..93fea8f70e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -21,7 +21,7 @@
"compiler\.arrayindex" : true,
"compiler.batch.lookup" : true,
"compiler.cbo" : true,
- "compiler.column.filter" : false,
+ "compiler.column.filter" : true,
"compiler\.external\.field\.pushdown" : true,
"compiler.forcejoinorder" : false,
"compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 931fd068a8..e68b30ad92 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -21,7 +21,7 @@
"compiler\.arrayindex" : true,
"compiler.batch.lookup" : true,
"compiler.cbo" : true,
- "compiler.column.filter" : false,
+ "compiler.column.filter" : true,
"compiler\.external\.field\.pushdown" : true,
"compiler.forcejoinorder" : false,
"compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
index 7783b102b4..3f46c9cacf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
@@ -33,7 +33,7 @@ distribute result [$$50] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- STREAM_PROJECT |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: not(if-missing-or-null(eq(scan-collection($$p.getField("entities").getField("urls")).getField("display_url"), "string"), false)) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
index 26cb68ce3d..d88c101cfc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
@@ -44,7 +44,7 @@ distribute result [$$69] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- STREAM_SELECT |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
index 41b9e05a83..5a7b0b32ba 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
@@ -46,7 +46,7 @@ distribute result [$$68] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- STREAM_SELECT |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
index d0bf0f0ccc..633350a2cc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
@@ -33,7 +33,7 @@ distribute result [$$50] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
-- STREAM_PROJECT |PARTITIONED|
exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+ data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: not(if-missing-or-null(eq(scan-collection($$p.getField("entities").getField("urls")).getField("display_url"), "string"), false)) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
index 224298ebd9..6ededaeddd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
@@ -44,7 +44,7 @@ distribute result [$$69] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
-- STREAM_SELECT |PARTITIONED|
exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+ data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
index 0bf5e31925..402e741b56 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
@@ -46,7 +46,7 @@ distribute result [$$68] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
-- STREAM_SELECT |PARTITIONED|
exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+ data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
-- DATASOURCE_SCAN |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
index 707f5b7d48..de4df8a919 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
@@ -133,9 +133,14 @@ public class PathExtractorVisitor implements ISchemaNodeVisitor<AbstractSchemaNo
} else {
// array
reader = readerFactory.createValueReader(primitiveNode.getTypeTag(), primitiveNode.getColumnIndex(), level,
- delimiters.toIntArray());
+ getReversedDelimiters());
}
readers.add(reader);
return reader;
}
+
+ private int[] getReversedDelimiters() {
+ Collections.reverse(delimiters);
+ return delimiters.toIntArray();
+ }
}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
index be05381306..f83b4845b2 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
@@ -118,6 +118,11 @@ public class ColumnRangeFilterBuilder {
}
ComparisonKind comparisonKind = getComparisonKind(fid, constant.getType().getTypeTag());
+ if (comparisonKind == ComparisonKind.NEQ) {
+ // Ignore NEQ
+ return NoOpColumnFilterEvaluatorFactory.INSTANCE;
+ }
+
IColumnRangeFilterValueAccessorFactory constValue =
ConstantColumnRangeFilterValueAccessorFactory.createFactory(constant);
IColumnRangeFilterValueAccessorFactory min = new ColumnRangeFilterValueAccessorFactory(path, true);
@@ -153,15 +158,20 @@ public class ColumnRangeFilterBuilder {
}
private static ComparisonKind invert(ComparisonKind comparisonKind) {
- if (comparisonKind == ComparisonKind.LT) {
- return ComparisonKind.GE;
- } else if (comparisonKind == ComparisonKind.LE) {
- return ComparisonKind.GT;
- } else if (comparisonKind == ComparisonKind.GT) {
- return ComparisonKind.LE;
+ switch (comparisonKind) {
+ case EQ:
+ return ComparisonKind.EQ;
+ case LE:
+ return ComparisonKind.GT;
+ case GE:
+ return ComparisonKind.LT;
+ case LT:
+ return ComparisonKind.GE;
+ case GT:
+ return ComparisonKind.LE;
+ default:
+ throw new IllegalStateException("Unsupported comparison type: " + comparisonKind);
}
- //ComparisonKind.GE
- return ComparisonKind.LT;
}
private static IColumnRangeFilterEvaluatorFactory createEvaluator(ComparisonKind comparisonKind,
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index 74f4447050..98c42239c7 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -45,5 +45,5 @@ public class AlgebricksConfig {
public static final int EXTERNAL_SCAN_BUFFER_SIZE =
StorageUtil.getIntSizeInBytes(8, StorageUtil.StorageUnit.KILOBYTE);
public static final boolean BATCH_LOOKUP_DEFAULT = true;
- public static final boolean COLUMN_FILTER_DEFAULT = false;
+ public static final boolean COLUMN_FILTER_DEFAULT = true;
}