You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/06/11 12:23:57 UTC
incubator-kylin git commit: KYLIN-752 Improved IN clause performance
Repository: incubator-kylin
Updated Branches:
refs/heads/0.8 80cccad50 -> 3e3aebb58
KYLIN-752 Improved IN clause performance
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3e3aebb5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3e3aebb5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3e3aebb5
Branch: refs/heads/0.8
Commit: 3e3aebb589199a30e7201c8780edfc0e1cf5cb53
Parents: 80cccad
Author: Li, Yang <ya...@ebay.com>
Authored: Thu Jun 11 18:23:45 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Jun 11 18:23:45 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/dict/Dictionary.java | 30 +++++++------
.../kylin/storage/hbase/ColumnValueRange.java | 45 +++++++++++++++++++-
.../kylin/storage/hbase/CubeStorageEngine.java | 36 +++++++++++++---
3 files changed, 89 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index cdc42b4..be2429c 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@ -67,23 +67,24 @@ abstract public class Dictionary<T> implements Writable {
/**
* Convenient form of <code>getIdFromValue(value, 0)</code>
*/
- final public int getIdFromValue(T value) {
+ final public int getIdFromValue(T value) throws IllegalArgumentException {
return getIdFromValue(value, 0);
}
/**
* Returns the ID integer of given value. In case of not found
- * - if roundingFlag=0, throw IllegalArgumentException;
- * - if roundingFlag<0, the closest smaller ID integer if exist;
- * - if roundingFlag>0, the closest bigger ID integer if exist.
- *
+ * <p>
+ * - if roundingFlag=0, throw IllegalArgumentException; <br>
+ * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+ * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+ * <p>
* The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
*
* @throws IllegalArgumentException
* if value is not found in dictionary and rounding is off;
* or if rounding cannot find a smaller or bigger ID
*/
- final public int getIdFromValue(T value, int roundingFlag) {
+ final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
if (isNullObjectForm(value))
return nullId();
else
@@ -101,7 +102,7 @@ abstract public class Dictionary<T> implements Writable {
* @throws IllegalArgumentException
* if ID is not found in dictionary
*/
- final public T getValueFromId(int id) {
+ final public T getValueFromId(int id) throws IllegalArgumentException {
if (isNullId(id))
return null;
else
@@ -114,23 +115,24 @@ abstract public class Dictionary<T> implements Writable {
* Convenient form of
* <code>getIdFromValueBytes(value, offset, len, 0)</code>
*/
- final public int getIdFromValueBytes(byte[] value, int offset, int len) {
+ final public int getIdFromValueBytes(byte[] value, int offset, int len) throws IllegalArgumentException {
return getIdFromValueBytes(value, offset, len, 0);
}
/**
* A lower level API, return ID integer from raw value bytes. In case of not found
- * - if roundingFlag=0, throw IllegalArgumentException;
- * - if roundingFlag<0, the closest smaller ID integer if exist;
- * - if roundingFlag>0, the closest bigger ID integer if exist.
- *
+ * <p>
+ * - if roundingFlag=0, throw IllegalArgumentException; <br>
+ * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+ * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+ * <p>
* Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
*
* @throws IllegalArgumentException
* if value is not found in dictionary and rounding is off;
* or if rounding cannot find a smaller or bigger ID
*/
- final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
+ final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException {
if (isNullByteForm(value, offset, len))
return nullId();
else
@@ -162,7 +164,7 @@ abstract public class Dictionary<T> implements Writable {
* @throws IllegalArgumentException
* if ID is not found in dictionary
*/
- final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
+ final public int getValueBytesFromId(int id, byte[] returnValue, int offset) throws IllegalArgumentException {
if (isNullId(id))
return -1;
else
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
index 831bf3d..1a594e1 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/ColumnValueRange.java
@@ -20,12 +20,15 @@ package org.apache.kylin.storage.hbase;
import java.util.Collection;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Set;
-import com.google.common.collect.Sets;
import org.apache.kylin.cube.kv.RowKeyColumnOrder;
-import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Sets;
/**
*
@@ -163,6 +166,44 @@ public class ColumnValueRange {
return (beginValue == null || order.compare(beginValue, v) <= 0) && (endValue == null || order.compare(v, endValue) <= 0);
}
+ // remove invalid EQ/IN values and round start/end according to dictionary
+ public void preEvaluateWithDict(Dictionary<String> dict) {
+ if (dict == null)
+ return;
+
+ if (equalValues != null) {
+ Iterator<String> it = equalValues.iterator();
+ while (it.hasNext()) {
+ String v = it.next();
+ try {
+ dict.getIdFromValue(v);
+ } catch (IllegalArgumentException e) {
+ // value not in dictionary
+ it.remove();
+ }
+ }
+ refreshBeginEndFromEquals();
+ }
+
+ if (beginValue != null) {
+ try {
+ beginValue = dict.getValueFromId(dict.getIdFromValue(beginValue, -1));
+ } catch (IllegalArgumentException e) {
+ // value is less than the smallest in dictionary
+ beginValue = null;
+ }
+ }
+
+ if (endValue != null) {
+ try {
+ endValue = dict.getValueFromId(dict.getIdFromValue(endValue, 1));
+ } catch (IllegalArgumentException e) {
+ // value is greater than the biggest in dictionary
+ endValue = null;
+ }
+ }
+ }
+
public String toString() {
if (equalValues == null) {
return column.getName() + " between " + beginValue + " and " + endValue;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/3e3aebb5/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 4d143d5..d5132bb 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -18,10 +18,18 @@
package org.apache.kylin.storage.hbase;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Range;
-import com.google.common.collect.Sets;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.kylin.common.persistence.HBaseConnection;
import org.apache.kylin.common.util.Bytes;
@@ -35,6 +43,7 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
import org.apache.kylin.cube.model.HBaseColumnDesc;
import org.apache.kylin.cube.model.HBaseMappingDesc;
+import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.dict.lookup.LookupStringTable;
import org.apache.kylin.metadata.filter.ColumnTupleFilter;
import org.apache.kylin.metadata.filter.CompareTupleFilter;
@@ -54,7 +63,10 @@ import org.apache.kylin.storage.tuple.TupleInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.*;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
+import com.google.common.collect.Sets;
/**
* @author xjiang, yangli9
@@ -479,6 +491,8 @@ public class CubeStorageEngine implements ICachableStorageEngine {
return orAndRanges;
}
+ // return empty collection to mean true; return null to mean false
+ @SuppressWarnings("unchecked")
private Collection<ColumnValueRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters, CubeSegment cubeSegment) {
Map<TblColRef, ColumnValueRange> rangeMap = new HashMap<TblColRef, ColumnValueRange>();
for (TupleFilter filter : andFilters) {
@@ -491,11 +505,21 @@ public class CubeStorageEngine implements ICachableStorageEngine {
continue;
}
- @SuppressWarnings("unchecked")
ColumnValueRange range = new ColumnValueRange(comp.getColumn(), (Collection<String>) comp.getValues(), comp.getOperator());
andMerge(range, rangeMap);
+ }
+ // a little pre-evaluation to remove invalid EQ/IN values and round start/end according to dictionary
+ Iterator<ColumnValueRange> it = rangeMap.values().iterator();
+ while (it.hasNext()) {
+ ColumnValueRange range = it.next();
+ range.preEvaluateWithDict((Dictionary<String>) cubeSegment.getDictionary(range.getColumn()));
+ if (range.satisfyAll())
+ it.remove();
+ else if (range.satisfyNone())
+ return null;
}
+
return rangeMap.values();
}