You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wy...@apache.org on 2023/09/18 17:42:55 UTC

[asterixdb] branch master updated: [ASTERIXDB-3262][COMP] Enable columnar filters by default

This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d9275b51a [ASTERIXDB-3262][COMP] Enable columnar filters by default
6d9275b51a is described below

commit 6d9275b51a443ec42a5a907dc3f65dd94be3541b
Author: Wail Alkowaileet <wa...@gmail.com>
AuthorDate: Sat Sep 16 11:22:46 2023 -0700

    [ASTERIXDB-3262][COMP] Enable columnar filters by default
    
    - user model changes: yes
    - storage format changes: no
    - interface changes: no
    
    Details:
    Make the use of columnar filters enabled by default.
    - Fix ASTERIXDB-3265: The array filter reader has
      its delimiters reversed
    - Fix ASTERIXDB-3264: Columnar range-filter doesn't correctly
      evaluate equality if a constant is the left operand
    
    Change-Id: I9306db62ff0e991efed30346a8847b9edae9393e
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17781
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Wail Alkowaileet <wa...@gmail.com>
    Reviewed-by: Hussain Towaileb <hu...@gmail.com>
---
 .../queries/column-pushdown/meta.001.sqlpp         |  1 +
 .../api/cluster_state_1/cluster_state_1.1.regexadm |  2 +-
 .../cluster_state_1_full.1.regexadm                |  2 +-
 .../cluster_state_1_less.1.regexadm                |  2 +-
 .../array-access-pushdown.010.plan                 |  2 +-
 .../other-pushdowns/other-pushdowns.008.plan       |  2 +-
 .../other-pushdowns/other-pushdowns.010.plan       |  2 +-
 .../array-access-pushdown.010.plan                 |  2 +-
 .../other-pushdowns/other-pushdowns.008.plan       |  2 +-
 .../other-pushdowns/other-pushdowns.010.plan       |  2 +-
 .../schema/visitor/PathExtractorVisitor.java       |  7 +++++-
 .../utils/filter/ColumnRangeFilterBuilder.java     | 26 +++++++++++++++-------
 .../algebricks/core/config/AlgebricksConfig.java   |  2 +-
 13 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
index e5e90c1915..6ef6e72313 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/column-pushdown/meta.001.sqlpp
@@ -40,6 +40,7 @@
  CREATE DATASET `nation`(CH2Type) WITH META(CH2MetaType) PRIMARY KEY META().uid WITH {"storage-format": {"format": "column"}};
  CREATE DATASET `region`(CH2Type) WITH META(CH2MetaType) PRIMARY KEY META().uid WITH {"storage-format": {"format": "column"}};
 
+SET `compiler.column.filter` "false";
 SELECT s.s_i_id, SUM(s.s_order_cnt) as ordercount
 FROM   nation n, supplier su, stock s
 WHERE  s.s_w_id * s.s_i_id MOD 10000 = su.su_suppkey
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index a67d32617c..1c3b061499 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -21,7 +21,7 @@
     "compiler\.arrayindex" : true,
     "compiler.batch.lookup" : true,
     "compiler.cbo" : true,
-    "compiler.column.filter" : false,
+    "compiler.column.filter" : true,
     "compiler\.external\.field\.pushdown" : true,
     "compiler.forcejoinorder" : false,
     "compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 0e90266da4..93fea8f70e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -21,7 +21,7 @@
     "compiler\.arrayindex" : true,
     "compiler.batch.lookup" : true,
     "compiler.cbo" : true,
-    "compiler.column.filter" : false,
+    "compiler.column.filter" : true,
     "compiler\.external\.field\.pushdown" : true,
     "compiler.forcejoinorder" : false,
     "compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 931fd068a8..e68b30ad92 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -21,7 +21,7 @@
     "compiler\.arrayindex" : true,
     "compiler.batch.lookup" : true,
     "compiler.cbo" : true,
-    "compiler.column.filter" : false,
+    "compiler.column.filter" : true,
     "compiler\.external\.field\.pushdown" : true,
     "compiler.forcejoinorder" : false,
     "compiler\.framesize" : 32768,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
index 7783b102b4..3f46c9cacf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
@@ -33,7 +33,7 @@ distribute result [$$50] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                     -- STREAM_PROJECT  |PARTITIONED|
                       exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                        data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                        data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: not(if-missing-or-null(eq(scan-collection($$p.getField("entities").getField("urls")).getField("display_url"), "string"), false)) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                         -- DATASOURCE_SCAN  |PARTITIONED|
                           exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
index 26cb68ce3d..d88c101cfc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.008.plan
@@ -44,7 +44,7 @@ distribute result [$$69] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                 -- STREAM_SELECT  |PARTITIONED|
                                   exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                    data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                    data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                     -- DATASOURCE_SCAN  |PARTITIONED|
                                       exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
index 41b9e05a83..5a7b0b32ba 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/pushdown/other-pushdowns/other-pushdowns.010.plan
@@ -46,7 +46,7 @@ distribute result [$$68] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                   -- STREAM_SELECT  |PARTITIONED|
                                     exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                      data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                      data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                       -- DATASOURCE_SCAN  |PARTITIONED|
                                         exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
index d0bf0f0ccc..633350a2cc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/array-access-pushdown/array-access-pushdown.010.plan
@@ -33,7 +33,7 @@ distribute result [$$50] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
                     -- STREAM_PROJECT  |PARTITIONED|
                       exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                        data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+                        data-scan []<-[$$49, $$p] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: not(if-missing-or-null(eq(scan-collection($$p.getField("entities").getField("urls")).getField("display_url"), "string"), false)) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
                         -- DATASOURCE_SCAN  |PARTITIONED|
                           exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
index 224298ebd9..6ededaeddd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.008.plan
@@ -44,7 +44,7 @@ distribute result [$$69] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
                                 -- STREAM_SELECT  |PARTITIONED|
                                   exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                    data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+                                    data-scan []<-[$$72, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
                                     -- DATASOURCE_SCAN  |PARTITIONED|
                                       exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
index 0bf5e31925..402e741b56 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.010.plan
@@ -46,7 +46,7 @@ distribute result [$$68] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
                                   -- STREAM_SELECT  |PARTITIONED|
                                     exchange [cardinality: 2.0, op-cost: 0.0, total-cost: 2.1]
                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                      data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
+                                      data-scan []<-[$$71, $$p1] <- test.ColumnDataset project ({entities:{urls:[{display_url:any,indices:any}]}}) filter on: eq(lowercase(scan-collection($$p1.getField("entities").getField("urls")).getField("display_url")), "string") [cardinality: 2.0, op-cost: 2.1, total-cost: 2.1]
                                       -- DATASOURCE_SCAN  |PARTITIONED|
                                         exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
index 707f5b7d48..de4df8a919 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
@@ -133,9 +133,14 @@ public class PathExtractorVisitor implements ISchemaNodeVisitor<AbstractSchemaNo
         } else {
             // array
             reader = readerFactory.createValueReader(primitiveNode.getTypeTag(), primitiveNode.getColumnIndex(), level,
-                    delimiters.toIntArray());
+                    getReversedDelimiters());
         }
         readers.add(reader);
         return reader;
     }
+
+    private int[] getReversedDelimiters() {
+        Collections.reverse(delimiters);
+        return delimiters.toIntArray();
+    }
 }
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
index be05381306..f83b4845b2 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ColumnRangeFilterBuilder.java
@@ -118,6 +118,11 @@ public class ColumnRangeFilterBuilder {
         }
 
         ComparisonKind comparisonKind = getComparisonKind(fid, constant.getType().getTypeTag());
+        if (comparisonKind == ComparisonKind.NEQ) {
+            // Ignore NEQ
+            return NoOpColumnFilterEvaluatorFactory.INSTANCE;
+        }
+
         IColumnRangeFilterValueAccessorFactory constValue =
                 ConstantColumnRangeFilterValueAccessorFactory.createFactory(constant);
         IColumnRangeFilterValueAccessorFactory min = new ColumnRangeFilterValueAccessorFactory(path, true);
@@ -153,15 +158,20 @@ public class ColumnRangeFilterBuilder {
     }
 
     private static ComparisonKind invert(ComparisonKind comparisonKind) {
-        if (comparisonKind == ComparisonKind.LT) {
-            return ComparisonKind.GE;
-        } else if (comparisonKind == ComparisonKind.LE) {
-            return ComparisonKind.GT;
-        } else if (comparisonKind == ComparisonKind.GT) {
-            return ComparisonKind.LE;
+        switch (comparisonKind) {
+            case EQ:
+                return ComparisonKind.EQ;
+            case LE:
+                return ComparisonKind.GT;
+            case GE:
+                return ComparisonKind.LT;
+            case LT:
+                return ComparisonKind.GE;
+            case GT:
+                return ComparisonKind.LE;
+            default:
+                throw new IllegalStateException("Unsupported comparison type: " + comparisonKind);
         }
-        //ComparisonKind.GE
-        return ComparisonKind.LT;
     }
 
     private static IColumnRangeFilterEvaluatorFactory createEvaluator(ComparisonKind comparisonKind,
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index 74f4447050..98c42239c7 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -45,5 +45,5 @@ public class AlgebricksConfig {
     public static final int EXTERNAL_SCAN_BUFFER_SIZE =
             StorageUtil.getIntSizeInBytes(8, StorageUtil.StorageUnit.KILOBYTE);
     public static final boolean BATCH_LOOKUP_DEFAULT = true;
-    public static final boolean COLUMN_FILTER_DEFAULT = false;
+    public static final boolean COLUMN_FILTER_DEFAULT = true;
 }