You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2020/02/07 14:25:59 UTC
[kylin] 04/44: KYLIN-4280 SegmenPruner support 'OR' and 'NOT'
This is an automated email from the ASF dual-hosted git repository.
nic pushed a commit to branch 3.0.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 4300fa51a048a4033c54fa775e4e7dff58413c07
Author: zhangbushi5 <84...@qq.com>
AuthorDate: Fri Dec 6 17:04:22 2019 +0800
KYLIN-4280 SegmenPruner support 'OR' and 'NOT'
---
.../apache/kylin/cube/common/SegmentPruner.java | 48 ++-----
.../apache/kylin/cube/common/TupleFilterNode.java | 76 +++++++++++
.../kylin/cube/common/SegmentPrunerTest.java | 151 +++++++++++++++++++++
3 files changed, 241 insertions(+), 34 deletions(-)
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
index f3f2052..ae21a4d 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
@@ -19,9 +19,7 @@
package org.apache.kylin.cube.common;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
@@ -45,11 +43,11 @@ import org.slf4j.LoggerFactory;
public class SegmentPruner {
private static final Logger logger = LoggerFactory.getLogger(SegmentPruner.class);
- final private Set<CompareTupleFilter> mustTrueCompares;
+ private TupleFilterNode node;
public SegmentPruner(TupleFilter filter) {
- this.mustTrueCompares = filter == null ? Collections.<CompareTupleFilter> emptySet()
- : filter.findMustTrueCompareFilters();
+ this.node = new TupleFilterNode(filter);
+
}
public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) {
@@ -62,7 +60,7 @@ public class SegmentPruner {
}
public boolean check(CubeSegment seg) {
-
+
if (seg.getInputRecords() == 0) {
if (seg.getConfig().isSkippingEmptySegments()) {
logger.debug("Prune segment {} due to 0 input record", seg);
@@ -72,53 +70,35 @@ public class SegmentPruner {
}
}
- Map<String, DimensionRangeInfo> segDimRangInfoMap = seg.getDimensionRangeInfoMap();
- for (CompareTupleFilter comp : mustTrueCompares) {
- TblColRef col = comp.getColumn();
-
- if (!col.getType().needCompare()) {
- continue;
- }
-
- DimensionRangeInfo dimRangeInfo = segDimRangInfoMap.get(col.getIdentity());
- if (dimRangeInfo == null)
- dimRangeInfo = tryDeduceRangeFromPartitionCol(seg, col);
- if (dimRangeInfo == null)
- continue;
-
- String minVal = dimRangeInfo.getMin();
- String maxVal = dimRangeInfo.getMax();
-
- if (!satisfy(comp, minVal, maxVal)) {
- logger.debug("Prune segment {} due to given filter", seg);
- return false;
- }
+ if (!node.checkSeg(seg)) {
+ logger.debug("Prune segment {} due to given filter", seg);
+ return false;
}
logger.debug("Pruner passed on segment {}", seg);
return true;
}
- private DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
+ public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
DataModelDesc model = seg.getModel();
PartitionDesc part = model.getPartitionDesc();
-
+
if (!part.isPartitioned())
return null;
if (!col.equals(part.getPartitionDateColumnRef()))
return null;
-
+
// deduce the dim range from TSRange
TSRange tsRange = seg.getTSRange();
if (tsRange.start.isMin || tsRange.end.isMax)
return null; // DimensionRangeInfo cannot express infinite
-
+
String min = tsRangeToStr(tsRange.start.v, part);
String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive
return new DimensionRangeInfo(min, max);
}
- private String tsRangeToStr(long ts, PartitionDesc part) {
+ private static String tsRangeToStr(long ts, PartitionDesc part) {
String value;
DataType partitionColType = part.getPartitionDateColumnRef().getType();
if (partitionColType.isDate()) {
@@ -138,7 +118,7 @@ public class SegmentPruner {
return value;
}
- private boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) {
+ public static boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) {
// When both min and max are null, it means all cells of the column are null.
// In such case, return true to let query engine scan the segment, since the
@@ -177,7 +157,7 @@ public class SegmentPruner {
}
}
- private String toString(Object v) {
+ private static String toString(Object v) {
return v == null ? null : v.toString();
}
}
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java
new file mode 100644
index 0000000..8a84f67
--- /dev/null
+++ b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.common;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.DimensionRangeInfo;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.model.TblColRef;
+
+public class TupleFilterNode {
+ private TupleFilter filter;
+
+ public TupleFilterNode(TupleFilter filter) {
+ this.filter = filter;
+ }
+
+ public boolean checkSeg(CubeSegment seg) {
+ if (filter == null)
+ return true;
+ if (filter instanceof CompareTupleFilter) {
+ CompareTupleFilter compareTupleFilter = (CompareTupleFilter) filter;
+ TblColRef col = compareTupleFilter.getColumn();
+ if (col == null) {
+ return true;
+ }
+ DimensionRangeInfo dimRangeInfo = seg.getDimensionRangeInfoMap().get(col.getIdentity());
+ if (dimRangeInfo == null)
+ dimRangeInfo = SegmentPruner.tryDeduceRangeFromPartitionCol(seg, col);
+ if (dimRangeInfo == null)
+ return true;
+ String minVal = dimRangeInfo.getMin();
+ String maxVal = dimRangeInfo.getMax();
+ return SegmentPruner.satisfy(compareTupleFilter, minVal, maxVal);
+ }
+
+ if (filter instanceof LogicalTupleFilter) {
+ if (filter.getOperator() == TupleFilter.FilterOperatorEnum.AND) {
+ for (TupleFilter filter : filter.getChildren()) {
+ if (!new TupleFilterNode(filter).checkSeg(seg))
+ return false;
+ }
+ return true;
+
+ } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.OR) {
+ for (TupleFilter filter : filter.getChildren()) {
+ if (new TupleFilterNode(filter).checkSeg(seg))
+ return true;
+ }
+ return false;
+ } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT) {
+ for (TupleFilter filter : filter.getChildren()) {
+ return !(new TupleFilterNode(filter).checkSeg(seg));
+ }
+ }
+ }
+ return true;
+ }
+}
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
index 603db97..69f9b10 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java
@@ -18,15 +18,21 @@
package org.apache.kylin.cube.common;
+import static org.apache.kylin.metadata.filter.TupleFilter.and;
import static org.apache.kylin.metadata.filter.TupleFilter.compare;
+import static org.apache.kylin.metadata.filter.TupleFilter.or;
+import com.google.common.collect.Maps;
import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.DateFormat;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.common.util.SetAndUnsetSystemProp;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.DimensionRangeInfo;
import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
import org.apache.kylin.metadata.model.SegmentRange.TSRange;
@@ -39,6 +45,8 @@ import org.junit.Test;
import com.google.common.collect.Sets;
+import java.util.Map;
+
public class SegmentPrunerTest extends LocalFileMetadataTestCase {
private CubeInstance cube;
@@ -192,4 +200,147 @@ public class SegmentPrunerTest extends LocalFileMetadataTestCase {
}
}
}
+
+ @Test
+ public void testLegacyCubeSegWithOrFilter() {
+ // legacy cube segments does not have DimensionRangeInfo, but with TSRange can do some pruning
+ CubeInstance cube = CubeManager.getInstance(getTestConfig())
+ .getCube("test_kylin_cube_without_slr_left_join_ready_2_segments");
+
+ TblColRef col = cube.getModel().findColumn("TEST_KYLIN_FACT.CAL_DT");
+ TblColRef col2 = cube.getModel().findColumn("TEST_KYLIN_FACT.LSTG_SITE_ID");
+ CubeSegment seg = cube.getSegments(SegmentStatusEnum.READY).get(0);
+ Map<String, DimensionRangeInfo> dimensionRangeInfoMap = Maps.newHashMap();
+ dimensionRangeInfoMap.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("10", "20"));
+ seg.setDimensionRangeInfoMap(dimensionRangeInfoMap);
+ TSRange tsRange = seg.getTSRange();
+ String start = DateFormat.formatToTimeStr(tsRange.start.v, "yyyy-MM-dd");
+
+ CubeSegment seg2 = cube.getSegments(SegmentStatusEnum.READY).get(1);
+ Map<String, DimensionRangeInfo> dimensionRangeInfoMap2 = Maps.newHashMap();
+ dimensionRangeInfoMap2.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("20", "30"));
+ seg2.setDimensionRangeInfoMap(dimensionRangeInfoMap2);
+ TSRange tsRange2 = seg2.getTSRange();
+ String start2 = DateFormat.formatToTimeStr(tsRange2.start.v, "yyyy-MM-dd");
+
+ try (SetAndUnsetSystemProp sns = new SetAndUnsetSystemProp("kylin.query.skip-empty-segments", "false")) {
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+ LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "25");
+ LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertTrue(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+ LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35");
+ LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertFalse(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+ LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35");
+ LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertFalse(segmentPruner.check(seg));
+ Assert.assertFalse(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15");
+ LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.LT, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+ LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertFalse(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35");
+ LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+ LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertTrue(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.IN, "15");
+ LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+ LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2);
+ SegmentPruner segmentPruner = new SegmentPruner(finalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertTrue(segmentPruner.check(seg2));
+ }
+
+ {
+ TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35");
+ LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2);
+
+ TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35");
+ LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4);
+
+ LogicalTupleFilter finalFilter1 = or(logicalAndFilter, logicalAndFilter2);
+
+ TupleFilter andFilter5 = compare(col, FilterOperatorEnum.EQ, start);
+ TupleFilter andFilter6 = compare(col2, FilterOperatorEnum.IN, "15");
+ LogicalTupleFilter logicalAndFilter3 = and(andFilter5, andFilter6);
+
+ TupleFilter andFilter7 = compare(col, FilterOperatorEnum.EQ, start2);
+ TupleFilter andFilter8 = compare(col2, FilterOperatorEnum.LT, "35");
+ LogicalTupleFilter logicalAndFilter4 = or(andFilter7, andFilter8);
+
+ LogicalTupleFilter finalFilter2 = or(logicalAndFilter3, logicalAndFilter4);
+
+ LogicalTupleFilter finalFinalFilter = or(finalFilter1, finalFilter2);
+
+ SegmentPruner segmentPruner = new SegmentPruner(finalFinalFilter);
+ Assert.assertTrue(segmentPruner.check(seg));
+ Assert.assertTrue(segmentPruner.check(seg2));
+ }
+ }
+ }
}