You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/06/11 12:23:57 UTC

incubator-kylin git commit: KYLIN-752 Improved IN clause performance

Repository: incubator-kylin
Updated Branches:
  refs/heads/0.8 80cccad50 -> 3e3aebb58


KYLIN-752 Improved IN clause performance


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3e3aebb5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3e3aebb5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3e3aebb5

Branch: refs/heads/0.8
Commit: 3e3aebb589199a30e7201c8780edfc0e1cf5cb53
Parents: 80cccad
Author: Li, Yang <ya...@ebay.com>
Authored: Thu Jun 11 18:23:45 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Jun 11 18:23:45 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/dict/Dictionary.java  | 30 +++++++------
 .../kylin/storage/hbase/ColumnValueRange.java   | 45 +++++++++++++++++++-
 .../kylin/storage/hbase/CubeStorageEngine.java  | 36 +++++++++++++---
 3 files changed, 89 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index cdc42b4..be2429c 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@ -67,23 +67,24 @@ abstract public class Dictionary<T> implements Writable {
     /**
      * Convenient form of <code>getIdFromValue(value, 0)</code>
      */
-    final public int getIdFromValue(T value) {
+    final public int getIdFromValue(T value) throws IllegalArgumentException {
         return getIdFromValue(value, 0);
     }
 
     /**
      * Returns the ID integer of given value. In case of not found
-     * - if roundingFlag=0, throw IllegalArgumentException;
-     * - if roundingFlag<0, the closest smaller ID integer if exist;
-     * - if roundingFlag>0, the closest bigger ID integer if exist.
-     * 
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
      * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
      * 
      * @throws IllegalArgumentException
      *             if value is not found in dictionary and rounding is off;
      *             or if rounding cannot find a smaller or bigger ID
      */
-    final public int getIdFromValue(T value, int roundingFlag) {
+    final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
         if (isNullObjectForm(value))
             return nullId();
         else
@@ -101,7 +102,7 @@ abstract public class Dictionary<T> implements Writable {
      * @throws IllegalArgumentException
      *             if ID is not found in dictionary
      */
-    final public T getValueFromId(int id) {
+    final public T getValueFromId(int id) throws IllegalArgumentException {
         if (isNullId(id))
             return null;
         else
@@ -114,23 +115,24 @@ abstract public class Dictionary<T> implements Writable {
      * Convenient form of
      * <code>getIdFromValueBytes(value, offset, len, 0)</code>
      */
-    final public int getIdFromValueBytes(byte[] value, int offset, int len) {
+    final public int getIdFromValueBytes(byte[] value, int offset, int len) throws IllegalArgumentException {
         return getIdFromValueBytes(value, offset, len, 0);
     }
 
     /**
      * A lower level API, return ID integer from raw value bytes. In case of not found 
-     * - if roundingFlag=0, throw IllegalArgumentException; 
-     * - if roundingFlag<0, the closest smaller ID integer if exist; 
-     * - if roundingFlag>0, the closest bigger ID integer if exist.
-     * 
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
      * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
      * 
      * @throws IllegalArgumentException
      *             if value is not found in dictionary and rounding is off;
      *             or if rounding cannot find a smaller or bigger ID
      */
-    final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
+    final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException {
         if (isNullByteForm(value, offset, len))
             return nullId();
         else
@@ -162,7 +164,7 @@ abstract public class Dictionary<T> implements Writable {
      * @throws IllegalArgumentException
      *             if ID is not found in dictionary
      */
-    final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
+    final public int getValueBytesFromId(int id, byte[] returnValue, int offset) throws IllegalArgumentException {
         if (isNullId(id))
             return -1;
         else

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
index 831bf3d..1a594e1 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
@@ -20,12 +20,15 @@ package org.apache.kylin.storage.hbase;
 
 import java.util.Collection;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 
-import com.google.common.collect.Sets;
 import org.apache.kylin.cube.kv.RowKeyColumnOrder;
-import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Sets;
 
 /**
  * 
@@ -163,6 +166,44 @@ public class ColumnValueRange {
         return (beginValue == null || order.compare(beginValue, v) <= 0) && (endValue == null || order.compare(v, endValue) <= 0);
     }
 
+    // remove invalid EQ/IN values and round start/end according to dictionary
+    public void preEvaluateWithDict(Dictionary<String> dict) {
+        if (dict == null)
+            return;
+
+        if (equalValues != null) {
+            Iterator<String> it = equalValues.iterator();
+            while (it.hasNext()) {
+                String v = it.next();
+                try {
+                    dict.getIdFromValue(v);
+                } catch (IllegalArgumentException e) {
+                    // value not in dictionary
+                    it.remove();
+                }
+            }
+            refreshBeginEndFromEquals();
+        }
+
+        if (beginValue != null) {
+            try {
+                beginValue = dict.getValueFromId(dict.getIdFromValue(beginValue, -1));
+            } catch (IllegalArgumentException e) {
+                // value is less than the smallest in dictionary
+                beginValue = null;
+            }
+        }
+
+        if (endValue != null) {
+            try {
+                endValue = dict.getValueFromId(dict.getIdFromValue(endValue, 1));
+            } catch (IllegalArgumentException e) {
+                // value is greater than the biggest in dictionary
+                endValue = null;
+            }
+        }
+    }
+
     public String toString() {
         if (equalValues == null) {
             return column.getName() + " between " + beginValue + " and " + endValue;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 4d143d5..d5132bb 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -18,10 +18,18 @@
 
 package org.apache.kylin.storage.hbase;
 
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Range;
-import com.google.common.collect.Sets;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.kylin.common.persistence.HBaseConnection;
 import org.apache.kylin.common.util.Bytes;
@@ -35,6 +43,7 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
 import org.apache.kylin.cube.model.HBaseColumnDesc;
 import org.apache.kylin.cube.model.HBaseMappingDesc;
+import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.dict.lookup.LookupStringTable;
 import org.apache.kylin.metadata.filter.ColumnTupleFilter;
 import org.apache.kylin.metadata.filter.CompareTupleFilter;
@@ -54,7 +63,10 @@ import org.apache.kylin.storage.tuple.TupleInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.*;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
+import com.google.common.collect.Sets;
 
 /**
  * @author xjiang, yangli9
@@ -479,6 +491,8 @@ public class CubeStorageEngine implements ICachableStorageEngine {
         return orAndRanges;
     }
 
+    // return empty collection to mean true; return null to mean false
+    @SuppressWarnings("unchecked")
     private Collection<ColumnValueRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters, CubeSegment cubeSegment) {
         Map<TblColRef, ColumnValueRange> rangeMap = new HashMap<TblColRef, ColumnValueRange>();
         for (TupleFilter filter : andFilters) {
@@ -491,11 +505,21 @@ public class CubeStorageEngine implements ICachableStorageEngine {
                 continue;
             }
 
-            @SuppressWarnings("unchecked")
             ColumnValueRange range = new ColumnValueRange(comp.getColumn(), (Collection<String>) comp.getValues(), comp.getOperator());
             andMerge(range, rangeMap);
+        }
 
+        // a little pre-evaluation to remove invalid EQ/IN values and round start/end according to dictionary
+        Iterator<ColumnValueRange> it = rangeMap.values().iterator();
+        while (it.hasNext()) {
+            ColumnValueRange range = it.next();
+            range.preEvaluateWithDict((Dictionary<String>) cubeSegment.getDictionary(range.getColumn()));
+            if (range.satisfyAll())
+                it.remove();
+            else if (range.satisfyNone())
+                return null;
         }
+
         return rangeMap.values();
     }