You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/05/27 11:26:54 UTC
[kylin] 04/04: KYLIN-3352 better filter transform for better seg
pruning
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch 2.3.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 72387b1cf483f2c63542df679816d4127e142f01
Author: Li Yang <li...@apache.org>
AuthorDate: Fri Apr 27 14:39:21 2018 +0800
KYLIN-3352 better filter transform for better seg pruning
---
.../java/org/apache/kylin/gridtable/GTUtil.java | 28 +++--
.../kylin/metadata/filter/CompareTupleFilter.java | 1 -
.../kylin/storage/gtrecord/DictGridTableTest.java | 127 +++++++++++++++++----
3 files changed, 126 insertions(+), 30 deletions(-)
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
index 5c9dfe3..65704d5 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
@@ -33,6 +33,7 @@ import org.apache.kylin.metadata.filter.ConstantTupleFilter;
import org.apache.kylin.metadata.filter.FilterOptimizeTransformer;
import org.apache.kylin.metadata.filter.IFilterCodeSystem;
import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
import org.apache.kylin.metadata.filter.TupleFilterSerializer;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TblColRef;
@@ -154,8 +155,7 @@ public class GTUtil {
// In case of NOT(unEvaluatableFilter), we should immediately replace it as TRUE,
// Otherwise, unEvaluatableFilter will later be replace with TRUE and NOT(unEvaluatableFilter)
// will always return FALSE.
- if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT
- && !TupleFilter.isEvaluableRecursively(filter)) {
+ if (filter.getOperator() == FilterOperatorEnum.NOT && !TupleFilter.isEvaluableRecursively(filter)) {
TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
return ConstantTupleFilter.TRUE;
}
@@ -181,7 +181,6 @@ public class GTUtil {
return filter;
}
- @SuppressWarnings({ "rawtypes", "unchecked" })
protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
// extract ColumnFilter & ConstantFilter
TblColRef externalCol = oldCompareFilter.getColumn();
@@ -249,9 +248,13 @@ public class GTUtil {
}
break;
case LT:
- code = translate(col, firstValue, 1);
+ code = translate(col, firstValue, 0);
if (code == null) {
- result = ConstantTupleFilter.TRUE;
+ code = translate(col, firstValue, -1);
+ if (code == null)
+ result = ConstantTupleFilter.FALSE;
+ else
+ result = newCompareFilter(FilterOperatorEnum.LTE, externalCol, code);
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
result = newCompareFilter;
@@ -267,9 +270,13 @@ public class GTUtil {
}
break;
case GT:
- code = translate(col, firstValue, -1);
+ code = translate(col, firstValue, 0);
if (code == null) {
- result = ConstantTupleFilter.TRUE;
+ code = translate(col, firstValue, 1);
+ if (code == null)
+ result = ConstantTupleFilter.FALSE;
+ else
+ result = newCompareFilter(FilterOperatorEnum.GTE, externalCol, code);
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
result = newCompareFilter;
@@ -290,6 +297,13 @@ public class GTUtil {
return result;
}
+ private TupleFilter newCompareFilter(FilterOperatorEnum op, TblColRef col, ByteArray code) {
+ CompareTupleFilter r = new CompareTupleFilter(op);
+ r.addChild(new ColumnTupleFilter(col));
+ r.addChild(new ConstantTupleFilter(code));
+ return r;
+ }
+
transient ByteBuffer buf;
protected ByteArray translate(int col, Object value, int roundingFlag) {
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
index 4875217..298477f 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
@@ -154,7 +154,6 @@ public class CompareTupleFilter extends TupleFilter implements IOptimizeableTupl
// TODO requires generalize, currently only evaluates COLUMN {op} CONST
@Override
- @SuppressWarnings({ "unchecked", "rawtypes" })
public boolean evaluate(IEvaluatableTuple tuple, IFilterCodeSystem cs) {
// extract tuple value
Object tupleValue = null;
diff --git a/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java b/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
index e80a67c..073c12c 100644
--- a/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
+++ b/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
@@ -34,7 +34,7 @@ import org.apache.kylin.common.util.ImmutableBitSet;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.gridtable.CubeCodeSystem;
-import org.apache.kylin.dict.NumberDictionaryBuilder;
+import org.apache.kylin.dict.NumberDictionaryForestBuilder;
import org.apache.kylin.dict.StringBytesConverter;
import org.apache.kylin.dict.TrieDictionaryBuilder;
import org.apache.kylin.dimension.DictionaryDimEnc;
@@ -441,19 +441,61 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");
- CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
- CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LT, "9");
- LogicalTupleFilter filter = and(fComp1, fComp2);
-
List<TblColRef> colMapping = Lists.newArrayList();
colMapping.add(extColA);
colMapping.add(extColB);
+
+ CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+
+ // $1<"9" round down to FALSE
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "9"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(ConstantTupleFilter.FALSE, newFilter);
+ }
- // $1<"9" round up to $1<"10"
- TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
- assertEquals(
- "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
- newFilter.toString());
+ // $1<"10" needs no rounding
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "10"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
+ newFilter.toString());
+ }
+
+ // $1<"11" round down to <="10"
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "11"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+ newFilter.toString());
+ }
+
+ // $1<="9" round down to FALSE
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "9"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(ConstantTupleFilter.FALSE, newFilter);
+ }
+
+ // $1<="10" needs no rounding
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "10"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+ newFilter.toString());
+ }
+
+ // $1<="11" round down to <="10"
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "11"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+ newFilter.toString());
+ }
}
@Test
@@ -464,17 +506,61 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");
- CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
- CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LTE, "9");
- LogicalTupleFilter filter = and(fComp1, fComp2);
-
List<TblColRef> colMapping = Lists.newArrayList();
colMapping.add(extColA);
colMapping.add(extColB);
+
+ CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+
+ // $1>"101" round up to FALSE
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "101"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(ConstantTupleFilter.FALSE, newFilter);
+ }
- // $1<="9" round down to FALSE
- TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
- assertEquals(ConstantTupleFilter.FALSE, newFilter);
+ // $1>"100" needs no rounding
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "100"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GT [\\x09]]",
+ newFilter.toString());
+ }
+
+ // $1>"99" round up to >="100"
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "99"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+ newFilter.toString());
+ }
+
+ // $1>="101" round up to FALSE
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "101"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(ConstantTupleFilter.FALSE, newFilter);
+ }
+
+ // $1>="100" needs no rounding
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "100"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+ newFilter.toString());
+ }
+
+ // $1>="99" round up to >="100"
+ {
+ LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "99"));
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals(
+ "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+ newFilter.toString());
+ }
}
@Test
@@ -639,7 +725,6 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
return info;
}
- @SuppressWarnings("unchecked")
private static CubeCodeSystem newDictCodeSystem() {
DimensionEncoding[] dimEncs = new DimensionEncoding[3];
dimEncs[1] = new DictionaryDimEnc(newDictionaryOfInteger());
@@ -647,7 +732,6 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
return new CubeCodeSystem(dimEncs);
}
- @SuppressWarnings("rawtypes")
private static Dictionary newDictionaryOfString() {
TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter());
builder.addValue("Dong");
@@ -663,9 +747,8 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
return builder.build(0);
}
- @SuppressWarnings("rawtypes")
private static Dictionary newDictionaryOfInteger() {
- NumberDictionaryBuilder builder = new NumberDictionaryBuilder();
+ NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder();
builder.addValue("10");
builder.addValue("20");
builder.addValue("30");
@@ -676,7 +759,7 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
builder.addValue("80");
builder.addValue("90");
builder.addValue("100");
- return builder.build(0);
+ return builder.build();
}
public static ImmutableBitSet setOf(int... values) {
--
To stop receiving notification emails like this one, please contact
shaofengshi@apache.org.