You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2020/02/07 14:25:59 UTC

[kylin] 04/44: KYLIN-4280 SegmenPruner support 'OR' and 'NOT'

This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch 3.0.x
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 4300fa51a048a4033c54fa775e4e7dff58413c07
Author: zhangbushi5 <84...@qq.com>
AuthorDate: Fri Dec 6 17:04:22 2019 +0800

    KYLIN-4280 SegmenPruner support 'OR' and 'NOT'
---
 .../apache/kylin/cube/common/SegmentPruner.java    |  48 ++-----
 .../apache/kylin/cube/common/TupleFilterNode.java  |  76 +++++++++++
 .../kylin/cube/common/SegmentPrunerTest.java       | 151 +++++++++++++++++++++
 3 files changed, 241 insertions(+), 34 deletions(-)

diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
index f3f2052..ae21a4d 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
@@ -19,9 +19,7 @@
 package org.apache.kylin.cube.common;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
@@ -45,11 +43,11 @@ import org.slf4j.LoggerFactory;
 public class SegmentPruner {
     private static final Logger logger = LoggerFactory.getLogger(SegmentPruner.class);
 
-    final private Set<CompareTupleFilter> mustTrueCompares;
+    private TupleFilterNode node;
 
     public SegmentPruner(TupleFilter filter) {
-        this.mustTrueCompares = filter == null ? Collections.<CompareTupleFilter> emptySet()
-                : filter.findMustTrueCompareFilters();
+        this.node = new TupleFilterNode(filter);
+
     }
 
     public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) {
@@ -62,7 +60,7 @@ public class SegmentPruner {
     }
     
     public boolean check(CubeSegment seg) {
-        
+
         if (seg.getInputRecords() == 0) {
             if (seg.getConfig().isSkippingEmptySegments()) {
                 logger.debug("Prune segment {} due to 0 input record", seg);
@@ -72,53 +70,35 @@ public class SegmentPruner {
             }
         }
 
-        Map<String, DimensionRangeInfo> segDimRangInfoMap = seg.getDimensionRangeInfoMap();
-        for (CompareTupleFilter comp : mustTrueCompares) {
-            TblColRef col = comp.getColumn();
-
-            if (!col.getType().needCompare()) {
-                continue;
-            }
-            
-            DimensionRangeInfo dimRangeInfo = segDimRangInfoMap.get(col.getIdentity());
-            if (dimRangeInfo == null)
-                dimRangeInfo = tryDeduceRangeFromPartitionCol(seg, col);
-            if (dimRangeInfo == null)
-                continue;
-            
-            String minVal = dimRangeInfo.getMin();
-            String maxVal = dimRangeInfo.getMax();
-            
-            if (!satisfy(comp, minVal, maxVal)) {
-                logger.debug("Prune segment {} due to given filter", seg);
-                return false;
-            }
+        if (!node.checkSeg(seg)) {
+            logger.debug("Prune segment {} due to given filter", seg);
+            return false;
         }
 
         logger.debug("Pruner passed on segment {}", seg);
         return true;
     }
 
-    private DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
+    public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
         DataModelDesc model = seg.getModel();
         PartitionDesc part = model.getPartitionDesc();
-        
+
         if (!part.isPartitioned())
             return null;
         if (!col.equals(part.getPartitionDateColumnRef()))
             return null;
-        
+
         // deduce the dim range from TSRange
         TSRange tsRange = seg.getTSRange();
         if (tsRange.start.isMin || tsRange.end.isMax)
             return null; // DimensionRangeInfo cannot express infinite
-        
+
         String min = tsRangeToStr(tsRange.start.v, part);
         String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive
         return new DimensionRangeInfo(min, max);
     }
 
-    private String tsRangeToStr(long ts, PartitionDesc part) {
+    private static String tsRangeToStr(long ts, PartitionDesc part) {
         String value;
         DataType partitionColType = part.getPartitionDateColumnRef().getType();
         if (partitionColType.isDate()) {
@@ -138,7 +118,7 @@ public class SegmentPruner {
         return value;
     }
 
-    private boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) {
+    public static boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) {
 
         // When both min and max are null, it means all cells of the column are null.
         // In such case, return true to let query engine scan the segment, since the
@@ -177,7 +157,7 @@ public class SegmentPruner {
         }
     }
 
-    private String toString(Object v) {
+    private static String toString(Object v) {
         return v == null ? null : v.toString();
     }
 }
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java
new file mode 100644
index 0000000..8a84f67
--- /dev/null
+++ b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.common;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.DimensionRangeInfo;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.model.TblColRef;
+
+public class TupleFilterNode {
+    private TupleFilter filter;
+
+    public TupleFilterNode(TupleFilter filter) {
+        this.filter = filter;
+    }
+
+    public boolean checkSeg(CubeSegment seg) {
+        if (filter == null)
+            return true;
+        if (filter instanceof CompareTupleFilter) {
+            CompareTupleFilter compareTupleFilter = (CompareTupleFilter) filter;
+            TblColRef col = compareTupleFilter.getColumn();
+            if (col == null) {
+                return true;
+            }
+            DimensionRangeInfo dimRangeInfo = seg.getDimensionRangeInfoMap().get(col.getIdentity());
+            if (dimRangeInfo == null)
+                dimRangeInfo = SegmentPruner.tryDeduceRangeFromPartitionCol(seg, col);
+            if (dimRangeInfo == null)
+                return true;
+            String minVal = dimRangeInfo.getMin();
+            String maxVal = dimRangeInfo.getMax();
+            return SegmentPruner.satisfy(compareTupleFilter, minVal, maxVal);
+        }
+
+        if (filter instanceof LogicalTupleFilter) {
+            if (filter.getOperator() == TupleFilter.FilterOperatorEnum.AND) {
+                for (TupleFilter filter : filter.getChildren()) {
+                    if (!new TupleFilterNode(filter).checkSeg(seg))
+                        return false;
+                }
+                return true;
+
+            } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.OR) {
+                for (TupleFilter filter : filter.getChildren()) {
+                    if (new TupleFilterNode(filter).checkSeg(seg))
+                        return true;
+                }
+                return false;
+            } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT) {
+                for (TupleFilter filter : filter.getChildren()) {
+                    return !(new TupleFilterNode(filter).checkSeg(seg));
+                }
+            }
+        }
+        return true;
+    }
+}
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
index 603db97..69f9b10 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
@@ -18,15 +18,21 @@
 
 package org.apache.kylin.cube.common;
 
+import static org.apache.kylin.metadata.filter.TupleFilter.and;
 import static org.apache.kylin.metadata.filter.TupleFilter.compare;
+import static org.apache.kylin.metadata.filter.TupleFilter.or;
 
+import com.google.common.collect.Maps;
 import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.common.util.SetAndUnsetSystemProp;
 import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.DimensionRangeInfo;
 import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
 import org.apache.kylin.metadata.filter.TupleFilter;
 import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
 import org.apache.kylin.metadata.model.SegmentRange.TSRange;
@@ -39,6 +45,8 @@ import org.junit.Test;
 
 import com.google.common.collect.Sets;
 
+import java.util.Map;
+
 public class SegmentPrunerTest extends LocalFileMetadataTestCase {
     private CubeInstance cube;
 
@@ -192,4 +200,147 @@ public class SegmentPrunerTest extends LocalFileMetadataTestCase {
             }
         }
     }
+
+    @Test
+    public void testLegacyCubeSegWithOrFilter() {
+        // legacy cube segments does not have DimensionRangeInfo, but with TSRange can do some pruning
+        CubeInstance cube = CubeManager.getInstance(getTestConfig())
+                .getCube("test_kylin_cube_without_slr_left_join_ready_2_segments");
+
+        TblColRef col = cube.getModel().findColumn("TEST_KYLIN_FACT.CAL_DT");
+        TblColRef col2 = cube.getModel().findColumn("TEST_KYLIN_FACT.LSTG_SITE_ID");
+        CubeSegment seg = cube.getSegments(SegmentStatusEnum.READY).get(0);
+        Map<String, DimensionRangeInfo> dimensionRangeInfoMap = Maps.newHashMap();
+        dimensionRangeInfoMap.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("10", "20"));
+        seg.setDimensionRangeInfoMap(dimensionRangeInfoMap);
+        TSRange tsRange = seg.getTSRange();
+        String start = DateFormat.formatToTimeStr(tsRange.start.v, "yyyy-MM-dd");
+
+        CubeSegment seg2 = cube.getSegments(SegmentStatusEnum.READY).get(1);
+        Map<String, DimensionRangeInfo> dimensionRangeInfoMap2 = Maps.newHashMap();
+        dimensionRangeInfoMap2.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("20", "30"));
+        seg2.setDimensionRangeInfoMap(dimensionRangeInfoMap2);
+        TSRange tsRange2 = seg2.getTSRange();
+        String start2 = DateFormat.formatToTimeStr(tsRange2.start.v, "yyyy-MM-dd");
+
+        try (SetAndUnsetSystemProp sns = new SetAndUnsetSystemProp("kylin.query.skip-empty-segments", "false")) {
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+                LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "25");
+                LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertTrue(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+                LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35");
+                LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertFalse(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+                LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35");
+                LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertFalse(segmentPruner.check(seg));
+                Assert.assertFalse(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+                LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.LT, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+                LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertFalse(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35");
+                LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+                LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertTrue(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.IN, "15");
+                LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+                LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+                SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertTrue(segmentPruner.check(seg2));
+            }
+
+            {
+                TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35");
+                LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2);
+
+                TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+                LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+                LogicalTupleFilter finalFilter1 = or(logicalAndFilter, logicalAndFilter2);
+
+                TupleFilter andFilter5 = compare(col, FilterOperatorEnum.EQ, start);
+                TupleFilter andFilter6 = compare(col2, FilterOperatorEnum.IN, "15");
+                LogicalTupleFilter logicalAndFilter3 = and(andFilter5, andFilter6);
+
+                TupleFilter andFilter7 = compare(col, FilterOperatorEnum.EQ, start2);
+                TupleFilter andFilter8 = compare(col2, FilterOperatorEnum.LT, "35");
+                LogicalTupleFilter logicalAndFilter4 = or(andFilter7, andFilter8);
+
+                LogicalTupleFilter finalFilter2 = or(logicalAndFilter3, logicalAndFilter4);
+
+                LogicalTupleFilter finalFinalFilter = or(finalFilter1, finalFilter2);
+
+                SegmentPruner segmentPruner = new SegmentPruner(finalFinalFilter);
+                Assert.assertTrue(segmentPruner.check(seg));
+                Assert.assertTrue(segmentPruner.check(seg2));
+            }
+        }
+    }
 }