You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/05/27 11:26:54 UTC

[kylin] 04/04: KYLIN-3352 better filter transform for better seg pruning

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 2.3.x
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 72387b1cf483f2c63542df679816d4127e142f01
Author: Li Yang <li...@apache.org>
AuthorDate: Fri Apr 27 14:39:21 2018 +0800

    KYLIN-3352 better filter transform for better seg pruning
---
 .../java/org/apache/kylin/gridtable/GTUtil.java    |  28 +++--
 .../kylin/metadata/filter/CompareTupleFilter.java  |   1 -
 .../kylin/storage/gtrecord/DictGridTableTest.java  | 127 +++++++++++++++++----
 3 files changed, 126 insertions(+), 30 deletions(-)

diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
index 5c9dfe3..65704d5 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
@@ -33,6 +33,7 @@ import org.apache.kylin.metadata.filter.ConstantTupleFilter;
 import org.apache.kylin.metadata.filter.FilterOptimizeTransformer;
 import org.apache.kylin.metadata.filter.IFilterCodeSystem;
 import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
 import org.apache.kylin.metadata.filter.TupleFilterSerializer;
 import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TblColRef;
@@ -154,8 +155,7 @@ public class GTUtil {
             // In case of NOT(unEvaluatableFilter), we should immediately replace it as TRUE,
             // Otherwise, unEvaluatableFilter will later be replace with TRUE and NOT(unEvaluatableFilter)
             // will always return FALSE.
-            if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT
-                    && !TupleFilter.isEvaluableRecursively(filter)) {
+            if (filter.getOperator() == FilterOperatorEnum.NOT && !TupleFilter.isEvaluableRecursively(filter)) {
                 TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
                 return ConstantTupleFilter.TRUE;
             }
@@ -181,7 +181,6 @@ public class GTUtil {
             return filter;
         }
 
-        @SuppressWarnings({ "rawtypes", "unchecked" })
         protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
             // extract ColumnFilter & ConstantFilter
             TblColRef externalCol = oldCompareFilter.getColumn();
@@ -249,9 +248,13 @@ public class GTUtil {
                 }
                 break;
             case LT:
-                code = translate(col, firstValue, 1);
+                code = translate(col, firstValue, 0);
                 if (code == null) {
-                    result = ConstantTupleFilter.TRUE;
+                    code = translate(col, firstValue, -1);
+                    if (code == null)
+                        result = ConstantTupleFilter.FALSE;
+                    else
+                        result = newCompareFilter(FilterOperatorEnum.LTE, externalCol, code);
                 } else {
                     newCompareFilter.addChild(new ConstantTupleFilter(code));
                     result = newCompareFilter;
@@ -267,9 +270,13 @@ public class GTUtil {
                 }
                 break;
             case GT:
-                code = translate(col, firstValue, -1);
+                code = translate(col, firstValue, 0);
                 if (code == null) {
-                    result = ConstantTupleFilter.TRUE;
+                    code = translate(col, firstValue, 1);
+                    if (code == null)
+                        result = ConstantTupleFilter.FALSE;
+                    else
+                        result = newCompareFilter(FilterOperatorEnum.GTE, externalCol, code);
                 } else {
                     newCompareFilter.addChild(new ConstantTupleFilter(code));
                     result = newCompareFilter;
@@ -290,6 +297,13 @@ public class GTUtil {
             return result;
         }
 
+        private TupleFilter newCompareFilter(FilterOperatorEnum op, TblColRef col, ByteArray code) {
+            CompareTupleFilter r = new CompareTupleFilter(op);
+            r.addChild(new ColumnTupleFilter(col));
+            r.addChild(new ConstantTupleFilter(code));
+            return r;
+        }
+
         transient ByteBuffer buf;
 
         protected ByteArray translate(int col, Object value, int roundingFlag) {
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
index 4875217..298477f 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
@@ -154,7 +154,6 @@ public class CompareTupleFilter extends TupleFilter implements IOptimizeableTupl
 
     // TODO requires generalize, currently only evaluates COLUMN {op} CONST
     @Override
-    @SuppressWarnings({ "unchecked", "rawtypes" })
     public boolean evaluate(IEvaluatableTuple tuple, IFilterCodeSystem cs) {
         // extract tuple value
         Object tupleValue = null;
diff --git a/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java b/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
index e80a67c..073c12c 100644
--- a/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
+++ b/core-storage/src/test/java/org/apache/kylin/storage/gtrecord/DictGridTableTest.java
@@ -34,7 +34,7 @@ import org.apache.kylin.common.util.ImmutableBitSet;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.gridtable.CubeCodeSystem;
-import org.apache.kylin.dict.NumberDictionaryBuilder;
+import org.apache.kylin.dict.NumberDictionaryForestBuilder;
 import org.apache.kylin.dict.StringBytesConverter;
 import org.apache.kylin.dict.TrieDictionaryBuilder;
 import org.apache.kylin.dimension.DictionaryDimEnc;
@@ -441,19 +441,61 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
         TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");
 
-        CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
-        CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LT, "9");
-        LogicalTupleFilter filter = and(fComp1, fComp2);
-
         List<TblColRef> colMapping = Lists.newArrayList();
         colMapping.add(extColA);
         colMapping.add(extColB);
+        
+        CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+        
+        // $1<"9" round down to FALSE
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "9"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(ConstantTupleFilter.FALSE, newFilter);
+        }
 
-        // $1<"9" round up to $1<"10"
-        TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
-        assertEquals(
-                "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
-                newFilter.toString());
+        // $1<"10" needs no rounding
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "10"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
+                    newFilter.toString());
+        }
+        
+        // $1<"11" round down to <="10"
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "11"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+                    newFilter.toString());
+        }
+        
+        // $1<="9" round down to FALSE
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "9"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(ConstantTupleFilter.FALSE, newFilter);
+        }
+        
+        // $1<="10" needs no rounding
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "10"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+                    newFilter.toString());
+        }
+        
+        // $1<="11" round down to <="10"
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "11"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
+                    newFilter.toString());
+        }
     }
 
     @Test
@@ -464,17 +506,61 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
         TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");
 
-        CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
-        CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LTE, "9");
-        LogicalTupleFilter filter = and(fComp1, fComp2);
-
         List<TblColRef> colMapping = Lists.newArrayList();
         colMapping.add(extColA);
         colMapping.add(extColB);
+        
+        CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+        
+        // $1>"101" round up to FALSE
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "101"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(ConstantTupleFilter.FALSE, newFilter);
+        }
 
-        // $1<="9" round down to FALSE
-        TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
-        assertEquals(ConstantTupleFilter.FALSE, newFilter);
+        // $1>"100" needs no rounding
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "100"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GT [\\x09]]",
+                    newFilter.toString());
+        }
+        
+        // $1>"99" round up to >="100"
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "99"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+                    newFilter.toString());
+        }
+        
+        // $1>="101" round up to FALSE
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "101"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(ConstantTupleFilter.FALSE, newFilter);
+        }
+        
+        // $1>="100" needs no rounding
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "100"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+                    newFilter.toString());
+        }
+        
+        // $1>="99" round up to >="100"
+        {
+            LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "99"));
+            TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+            assertEquals(
+                    "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
+                    newFilter.toString());
+        }
     }
 
     @Test
@@ -639,7 +725,6 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         return info;
     }
 
-    @SuppressWarnings("unchecked")
     private static CubeCodeSystem newDictCodeSystem() {
         DimensionEncoding[] dimEncs = new DimensionEncoding[3];
         dimEncs[1] = new DictionaryDimEnc(newDictionaryOfInteger());
@@ -647,7 +732,6 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         return new CubeCodeSystem(dimEncs);
     }
 
-    @SuppressWarnings("rawtypes")
     private static Dictionary newDictionaryOfString() {
         TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter());
         builder.addValue("Dong");
@@ -663,9 +747,8 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         return builder.build(0);
     }
 
-    @SuppressWarnings("rawtypes")
     private static Dictionary newDictionaryOfInteger() {
-        NumberDictionaryBuilder builder = new NumberDictionaryBuilder();
+        NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder();
         builder.addValue("10");
         builder.addValue("20");
         builder.addValue("30");
@@ -676,7 +759,7 @@ public class DictGridTableTest extends LocalFileMetadataTestCase {
         builder.addValue("80");
         builder.addValue("90");
         builder.addValue("100");
-        return builder.build(0);
+        return builder.build();
     }
 
     public static ImmutableBitSet setOf(int... values) {

-- 
To stop receiving notification emails like this one, please contact
shaofengshi@apache.org.