You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/06/11 12:07:58 UTC

[1/2] incubator-kylin git commit: KYLIN-752 improve query performance for values that are not in dictionary

Repository: incubator-kylin
Updated Branches:
  refs/heads/0.7-staging f0f8521b8 -> aaec68018


KYLIN-752 improve query performance for values that are not in dictionary

A quick fix to drop the not-in-dictionary values before generating hbase
scan range.
And based on it, we can make a further optimization that as long as
there is at least one andFilter is empty(after we drop the
not-in-dictionary values), we can simply assert that the whole
andFilters is empty to avoid an unnecessary hbase scan.
However, this strategy only works for 'EQ', 'IN'. For 'LT', 'LTE', 'GT',
'GTE', we have to replace them with the some sort of closest smaller or
bigger values by calling "final public int getIdFromValue(T value, int
roundingFlag)" of "Dictionary" class.

Signed-off-by: Li, Yang <ya...@ebay.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/eaf66856
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/eaf66856
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/eaf66856

Branch: refs/heads/0.7-staging
Commit: eaf66856178feb79ebe55467808c476520da55c6
Parents: f0f8521
Author: superhua <hu...@mininglamp.com>
Authored: Thu May 21 17:56:48 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Jun 11 17:55:50 2015 +0800

----------------------------------------------------------------------
 .../kylin/storage/hbase/CubeStorageEngine.java  |  20 ++-
 .../hbase/TupleFilterValueOptimizer.java        | 136 +++++++++++++++++++
 2 files changed, 152 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/eaf66856/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 5c6be3b..06e1cd2 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -421,8 +421,10 @@ public class CubeStorageEngine implements IStorageEngine {
             }
 
             Collection<ColumnValueRange> andRanges = translateToAndDimRanges(andFilter.getChildren(), cubeSegment);
-
-            result.add(andRanges);
+            // ignore the empty-AND
+            if (andRanges != null && !andRanges.isEmpty()) {
+                result.add(andRanges);
+            }
         }
 
         return preprocessConstantConditions(result);
@@ -458,6 +460,7 @@ public class CubeStorageEngine implements IStorageEngine {
 
     private Collection<ColumnValueRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters, CubeSegment cubeSegment) {
         Map<TblColRef, ColumnValueRange> rangeMap = new HashMap<TblColRef, ColumnValueRange>();
+        boolean isEmptyAnd = false;
         for (TupleFilter filter : andFilters) {
             if ((filter instanceof CompareTupleFilter) == false) {
                 continue;
@@ -468,11 +471,20 @@ public class CubeStorageEngine implements IStorageEngine {
                 continue;
             }
 
-            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), comp.getValues(), comp.getOperator());
+            // optimize the values of tuple filter
+            Collection<String> newValues = TupleFilterValueOptimizer.doOptimization(cubeSegment, comp.getColumn(), comp);
+            // in case the current filter is an empty-AND, do not generate ColumnValueRange
+            // and also set isEmptyAnd to true so that an empty AND-list will be returned
+            if (TupleFilterValueOptimizer.isEmptyAnd(comp, newValues)) {
+                isEmptyAnd = true;
+                break;
+            }
+
+            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), newValues, comp.getOperator());
             andMerge(range, rangeMap);
 
         }
-        return rangeMap.values();
+        return isEmptyAnd ? Collections.<ColumnValueRange> emptyList() : rangeMap.values();
     }
 
     private void andMerge(ColumnValueRange range, Map<TblColRef, ColumnValueRange> rangeMap) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/eaf66856/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java b/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
new file mode 100644
index 0000000..49d3143
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.storage.hbase;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.kv.RowKeyColumnIO;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+/**
+ * @author Huang, Hua
+ */
+public class TupleFilterValueOptimizer {
+
+    private static boolean isInDictionary(Dictionary<String> dict, String value) {
+        boolean inFlag = true;
+        try {
+            int id = dict.getIdFromValue(value, 0);
+        } catch (IllegalArgumentException ex) {
+            inFlag = false;
+        }
+
+        return inFlag;
+    }
+
+    private static Collection<String> removeNonDictionaryValues(CubeSegment cubeSegment, TblColRef column, Collection<String> values) {
+        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
+
+        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
+        // in case that dict is null, just return values
+        if (dict == null) return values;
+
+        Collection<String> newValues = new LinkedList<String>();
+        for (String value : values) {
+            if (isInDictionary(dict, value)) newValues.add(value);
+        }
+
+        return newValues;
+    }
+
+    private static Collection<String> roundDictionaryValues(CubeSegment cubeSegment, TblColRef column, Collection<String> values, int roundingFlag) {
+        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
+
+        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
+        // in case that dict is null, just return values
+        if (dict == null) return values;
+
+        Collection<String> newValues = new LinkedList<String>();
+        for (String value : values) {
+            if (isInDictionary(dict, value)) {
+                newValues.add(value);
+            }
+            else {
+                try {
+                    int id = dict.getIdFromValue(value, roundingFlag);
+                    String newValue = dict.getValueFromId(id);
+                    newValues.add(newValue);
+                } catch (IllegalArgumentException ex) {
+                }
+            }
+        }
+
+        return newValues;
+    }
+
+    private static Collection<String> optimizeCompareTupleFilter(CubeSegment cubeSegment, TblColRef column, CompareTupleFilter comp) {
+        Collection<String> newValues = comp.getValues();
+        switch (comp.getOperator()) {
+            case EQ:
+            case IN:
+                newValues = removeNonDictionaryValues(cubeSegment, column, comp.getValues());
+                break;
+            case LT:
+            case LTE:
+                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(), -1);
+                break;
+            case GT:
+            case GTE:
+                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(), 1);
+                break;
+            default:
+                break;
+        }
+
+        return newValues;
+    }
+
+    public static Collection<String> doOptimization(CubeSegment cubeSegment, TblColRef column, TupleFilter filter) {
+        if (filter instanceof CompareTupleFilter) {
+            return optimizeCompareTupleFilter(cubeSegment, column, (CompareTupleFilter)filter);
+        }
+
+        return filter.getValues();
+    }
+
+    public static boolean isEmptyAnd(TupleFilter filter, Collection<String> values) {
+        boolean isEmptyAnd = false;
+        switch (filter.getOperator()) {
+            case EQ:
+            case IN:
+            case LT:
+            case LTE:
+            case GT:
+            case GTE:
+                if (values == null || values.isEmpty()) {
+                    isEmptyAnd = true;
+                }
+                break;
+            default:
+                break;
+        }
+
+        return isEmptyAnd;
+    }
+}


[2/2] incubator-kylin git commit: KYLIN-752 Improved IN clause performance

Posted by li...@apache.org.
KYLIN-752 Improved IN clause performance


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/aaec6801
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/aaec6801
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/aaec6801

Branch: refs/heads/0.7-staging
Commit: aaec680182712ed4906d1548213688de92cc5538
Parents: eaf6685
Author: Li, Yang <ya...@ebay.com>
Authored: Thu Jun 11 17:55:23 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Jun 11 18:07:13 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/dict/Dictionary.java  |  32 +++--
 .../kylin/storage/hbase/ColumnValueRange.java   |  45 +++++-
 .../kylin/storage/hbase/CubeStorageEngine.java  |  38 +++---
 .../hbase/TupleFilterValueOptimizer.java        | 136 -------------------
 .../storage/hbase/ColumnValueRangeTest.java     | 117 ++++++++++++++++
 5 files changed, 196 insertions(+), 172 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/aaec6801/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index 6a06ed2..3b5e88d 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@ -66,22 +66,24 @@ abstract public class Dictionary<T> implements Writable {
     /**
      * Convenient form of <code>getIdFromValue(value, 0)</code>
      */
-    final public int getIdFromValue(T value) {
+    final public int getIdFromValue(T value) throws IllegalArgumentException {
         return getIdFromValue(value, 0);
     }
 
     /**
-     * Returns the ID integer of given value. In case of not found - if
-     * roundingFlag=0, throw IllegalArgumentException; - if roundingFlag<0, the
-     * closest smaller ID integer if exist; - if roundingFlag>0, the closest
-     * bigger ID integer if exist. The implementation often has cache, thus
-     * faster than the byte[] version getIdFromValueBytes()
+     * Returns the ID integer of given value. In case of not found
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
+     * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
      * 
      * @throws IllegalArgumentException
      *             if value is not found in dictionary and rounding is off or
      *             failed
      */
-    final public int getIdFromValue(T value, int roundingFlag) {
+    final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
         if (isNullObjectForm(value))
             return nullId();
         else
@@ -117,16 +119,16 @@ abstract public class Dictionary<T> implements Writable {
     }
 
     /**
-     * A lower level API, return ID integer from raw value bytes. In case of not
-     * found - if roundingFlag=0, throw IllegalArgumentException; - if
-     * roundingFlag<0, the closest smaller ID integer if exist; - if
-     * roundingFlag>0, the closest bigger ID integer if exist. Bypassing the
-     * cache layer, this could be significantly slower than getIdFromValue(T
-     * value).
+     * A lower level API, return ID integer from raw value bytes. In case of not found
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br> 
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
+     * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
      * 
      * @throws IllegalArgumentException
-     *             if value is not found in dictionary and rounding is off or
-     *             failed
+     *             if value is not found in dictionary and rounding is off or failed
      */
     final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
         if (isNullByteForm(value, offset, len))

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/aaec6801/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
index 831bf3d..d5224c5 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
@@ -20,17 +20,17 @@ package org.apache.kylin.storage.hbase;
 
 import java.util.Collection;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 
 import com.google.common.collect.Sets;
+
 import org.apache.kylin.cube.kv.RowKeyColumnOrder;
+import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
 
 /**
- * 
- * @author xjiang
- * 
  */
 public class ColumnValueRange {
     private TblColRef column;
@@ -163,6 +163,44 @@ public class ColumnValueRange {
         return (beginValue == null || order.compare(beginValue, v) <= 0) && (endValue == null || order.compare(v, endValue) <= 0);
     }
 
+    // remove invalid EQ/IN values and round start/end according to dictionary
+    public void preEvaluateWithDict(Dictionary<String> dict) {
+        if (dict == null)
+            return;
+        
+        if (equalValues != null) {
+            Iterator<String> it = equalValues.iterator();
+            while (it.hasNext()) {
+                String v = it.next();
+                try {
+                    dict.getIdFromValue(v);
+                } catch (IllegalArgumentException e) {
+                    // value not in dictionary
+                    it.remove();
+                }
+            }
+            refreshBeginEndFromEquals();
+        }
+        
+        if (beginValue != null) {
+            try {
+                beginValue = dict.getValueFromId(dict.getIdFromValue(beginValue, -1));
+            } catch (IllegalArgumentException e) {
+                // value is less than the smallest in dictionary
+                beginValue = null;
+            }
+        }
+        
+        if (endValue != null) {
+            try {
+                endValue = dict.getValueFromId(dict.getIdFromValue(endValue, 1));
+            } catch (IllegalArgumentException e) {
+                // value is greater than the biggest in dictionary
+                endValue = null;
+            }
+        }
+    }
+
     public String toString() {
         if (equalValues == null) {
             return column.getName() + " between " + beginValue + " and " + endValue;
@@ -170,4 +208,5 @@ public class ColumnValueRange {
             return column.getName() + " in " + equalValues;
         }
     }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/aaec6801/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 06e1cd2..9b11756 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -26,7 +26,6 @@ import org.apache.kylin.metadata.model.SegmentStatusEnum;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.realization.SQLDigest;
 import org.apache.kylin.storage.hbase.coprocessor.observer.ObserverEnabler;
-
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.Pair;
@@ -37,6 +36,7 @@ import org.slf4j.LoggerFactory;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+
 import org.apache.kylin.common.persistence.HBaseConnection;
 import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
@@ -47,6 +47,7 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.HBaseColumnDesc;
 import org.apache.kylin.cube.model.HBaseMappingDesc;
 import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
+import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.dict.lookup.LookupStringTable;
 import org.apache.kylin.storage.IStorageEngine;
 import org.apache.kylin.metadata.filter.ColumnTupleFilter;
@@ -384,8 +385,7 @@ public class CubeStorageEngine implements IStorageEngine {
         // build row key range for each cube segment
         for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) {
 
-            // consider derived (lookup snapshot), filter on dimension may
-            // differ per segment
+            // consider derived (lookup snapshot), filter on dimension may differ per segment
             List<Collection<ColumnValueRange>> orAndDimRanges = translateToOrAndDimRanges(flatFilter, cubeSeg);
             if (orAndDimRanges == null) { // has conflict
                 continue;
@@ -421,8 +421,7 @@ public class CubeStorageEngine implements IStorageEngine {
             }
 
             Collection<ColumnValueRange> andRanges = translateToAndDimRanges(andFilter.getChildren(), cubeSegment);
-            // ignore the empty-AND
-            if (andRanges != null && !andRanges.isEmpty()) {
+            if (andRanges != null) {
                 result.add(andRanges);
             }
         }
@@ -458,9 +457,10 @@ public class CubeStorageEngine implements IStorageEngine {
         return orAndRanges;
     }
 
+    // return empty collection to mean true; return null to mean false
+    @SuppressWarnings("unchecked")
     private Collection<ColumnValueRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters, CubeSegment cubeSegment) {
         Map<TblColRef, ColumnValueRange> rangeMap = new HashMap<TblColRef, ColumnValueRange>();
-        boolean isEmptyAnd = false;
         for (TupleFilter filter : andFilters) {
             if ((filter instanceof CompareTupleFilter) == false) {
                 continue;
@@ -471,20 +471,22 @@ public class CubeStorageEngine implements IStorageEngine {
                 continue;
             }
 
-            // optimize the values of tuple filter
-            Collection<String> newValues = TupleFilterValueOptimizer.doOptimization(cubeSegment, comp.getColumn(), comp);
-            // in case the current filter is an empty-AND, do not generate ColumnValueRange
-            // and also set isEmptyAnd to true so that an empty AND-list will be returned
-            if (TupleFilterValueOptimizer.isEmptyAnd(comp, newValues)) {
-                isEmptyAnd = true;
-                break;
-            }
-
-            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), newValues, comp.getOperator());
+            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), comp.getValues(), comp.getOperator());
             andMerge(range, rangeMap);
-
         }
-        return isEmptyAnd ? Collections.<ColumnValueRange> emptyList() : rangeMap.values();
+        
+        // a little pre-evaluation to remove invalid EQ/IN values and round start/end according to dictionary
+        Iterator<ColumnValueRange> it = rangeMap.values().iterator();
+        while (it.hasNext()) {
+            ColumnValueRange range = it.next();
+            range.preEvaluateWithDict((Dictionary<String>) cubeSegment.getDictionary(range.getColumn()));
+            if (range.satisfyAll())
+                it.remove();
+            else if (range.satisfyNone())
+                return null;
+        }
+        
+        return rangeMap.values();
     }
 
     private void andMerge(ColumnValueRange range, Map<TblColRef, ColumnValueRange> rangeMap) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/aaec6801/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java b/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
deleted file mode 100644
index 49d3143..0000000
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.storage.hbase;
-
-import org.apache.kylin.cube.CubeSegment;
-import org.apache.kylin.cube.kv.RowKeyColumnIO;
-import org.apache.kylin.dict.Dictionary;
-import org.apache.kylin.metadata.model.TblColRef;
-import org.apache.kylin.metadata.filter.CompareTupleFilter;
-import org.apache.kylin.metadata.filter.TupleFilter;
-
-import java.util.Collection;
-import java.util.LinkedList;
-
-/**
- * @author Huang, Hua
- */
-public class TupleFilterValueOptimizer {
-
-    private static boolean isInDictionary(Dictionary<String> dict, String value) {
-        boolean inFlag = true;
-        try {
-            int id = dict.getIdFromValue(value, 0);
-        } catch (IllegalArgumentException ex) {
-            inFlag = false;
-        }
-
-        return inFlag;
-    }
-
-    private static Collection<String> removeNonDictionaryValues(CubeSegment cubeSegment, TblColRef column, Collection<String> values) {
-        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
-
-        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
-        // in case that dict is null, just return values
-        if (dict == null) return values;
-
-        Collection<String> newValues = new LinkedList<String>();
-        for (String value : values) {
-            if (isInDictionary(dict, value)) newValues.add(value);
-        }
-
-        return newValues;
-    }
-
-    private static Collection<String> roundDictionaryValues(CubeSegment cubeSegment, TblColRef column, Collection<String> values, int roundingFlag) {
-        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
-
-        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
-        // in case that dict is null, just return values
-        if (dict == null) return values;
-
-        Collection<String> newValues = new LinkedList<String>();
-        for (String value : values) {
-            if (isInDictionary(dict, value)) {
-                newValues.add(value);
-            }
-            else {
-                try {
-                    int id = dict.getIdFromValue(value, roundingFlag);
-                    String newValue = dict.getValueFromId(id);
-                    newValues.add(newValue);
-                } catch (IllegalArgumentException ex) {
-                }
-            }
-        }
-
-        return newValues;
-    }
-
-    private static Collection<String> optimizeCompareTupleFilter(CubeSegment cubeSegment, TblColRef column, CompareTupleFilter comp) {
-        Collection<String> newValues = comp.getValues();
-        switch (comp.getOperator()) {
-            case EQ:
-            case IN:
-                newValues = removeNonDictionaryValues(cubeSegment, column, comp.getValues());
-                break;
-            case LT:
-            case LTE:
-                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(), -1);
-                break;
-            case GT:
-            case GTE:
-                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(), 1);
-                break;
-            default:
-                break;
-        }
-
-        return newValues;
-    }
-
-    public static Collection<String> doOptimization(CubeSegment cubeSegment, TblColRef column, TupleFilter filter) {
-        if (filter instanceof CompareTupleFilter) {
-            return optimizeCompareTupleFilter(cubeSegment, column, (CompareTupleFilter)filter);
-        }
-
-        return filter.getValues();
-    }
-
-    public static boolean isEmptyAnd(TupleFilter filter, Collection<String> values) {
-        boolean isEmptyAnd = false;
-        switch (filter.getOperator()) {
-            case EQ:
-            case IN:
-            case LT:
-            case LTE:
-            case GT:
-            case GTE:
-                if (values == null || values.isEmpty()) {
-                    isEmptyAnd = true;
-                }
-                break;
-            default:
-                break;
-        }
-
-        return isEmptyAnd;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/aaec6801/storage/src/test/java/org/apache/kylin/storage/hbase/ColumnValueRangeTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/hbase/ColumnValueRangeTest.java b/storage/src/test/java/org/apache/kylin/storage/hbase/ColumnValueRangeTest.java
new file mode 100644
index 0000000..47b81d3
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/hbase/ColumnValueRangeTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.storage.hbase;
+
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.dict.TrieDictionaryBuilder;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.junit.Test;
+
+public class ColumnValueRangeTest {
+
+    @Test
+    public void testPreEvaluateWithDict() {
+        TblColRef col = mockupTblColRef();
+        Dictionary<String> dict = mockupDictionary(col, "CN", "US");
+
+        ColumnValueRange r1 = new ColumnValueRange(col, set("CN", "US", "Other"), FilterOperatorEnum.EQ);
+        r1.preEvaluateWithDict(dict);
+        assertEquals(set("CN", "US"), r1.getEqualValues());
+
+        {
+            ColumnValueRange r2 = new ColumnValueRange(col, set("CN"), FilterOperatorEnum.LT);
+            r2.preEvaluateWithDict(dict);
+            assertEquals(null, r2.getBeginValue());
+            assertEquals("CN", r2.getEndValue());
+
+            ColumnValueRange r3 = new ColumnValueRange(col, set("Other"), FilterOperatorEnum.LT);
+            r3.preEvaluateWithDict(dict);
+            assertEquals(null, r3.getBeginValue());
+            assertEquals("US", r3.getEndValue());
+
+            ColumnValueRange r4 = new ColumnValueRange(col, set("UT"), FilterOperatorEnum.LT);
+            r4.preEvaluateWithDict(dict);
+            assertEquals(null, r4.getBeginValue());
+            assertEquals(null, r4.getEndValue());
+        }
+
+        {
+            ColumnValueRange r2 = new ColumnValueRange(col, set("CN"), FilterOperatorEnum.GTE);
+            r2.preEvaluateWithDict(dict);
+            assertEquals("CN", r2.getBeginValue());
+            assertEquals(null, r2.getEndValue());
+            
+            ColumnValueRange r3 = new ColumnValueRange(col, set("Other"), FilterOperatorEnum.GTE);
+            r3.preEvaluateWithDict(dict);
+            assertEquals("CN", r3.getBeginValue());
+            assertEquals(null, r3.getEndValue());
+            
+            ColumnValueRange r4 = new ColumnValueRange(col, set("CI"), FilterOperatorEnum.GTE);
+            r4.preEvaluateWithDict(dict);
+            assertEquals(null, r4.getBeginValue());
+            assertEquals(null, r4.getEndValue());
+        }
+    }
+
+    public static Dictionary<String> mockupDictionary(TblColRef col, String... values) {
+        TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+        for (String v : values) {
+            builder.addValue(v);
+        }
+        return builder.build(0);
+    }
+
+    private static Set<String> set(String... values) {
+        HashSet<String> list = new HashSet<String>();
+        list.addAll(Arrays.asList(values));
+        return list;
+    }
+
+    public static TblColRef mockupTblColRef() {
+        TableDesc t = mockupTableDesc("table_a");
+        ColumnDesc c = mockupColumnDesc(t, 1, "col_1", "string");
+        return new TblColRef(c);
+    }
+
+    private static TableDesc mockupTableDesc(String tableName) {
+        TableDesc mockup = new TableDesc();
+        mockup.setName(tableName);
+        return mockup;
+    }
+
+    private static ColumnDesc mockupColumnDesc(TableDesc table, int oneBasedColumnIndex, String name, String datatype) {
+        ColumnDesc desc = new ColumnDesc();
+        String id = "" + oneBasedColumnIndex;
+        desc.setId(id);
+        desc.setName(name);
+        desc.setDatatype(datatype);
+        desc.init(table);
+        return desc;
+    }
+}