You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by lu...@apache.org on 2015/09/06 09:59:51 UTC
[27/50] [abbrv] incubator-kylin git commit: KYLIN-740 fix many IN
clause performance issue
KYLIN-740 fix many IN clause performance issue
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/92b111c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/92b111c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/92b111c6
Branch: refs/heads/0.7
Commit: 92b111c61c05e70920a505036a51c1112fb861a5
Parents: ec41bf0
Author: honma <ho...@ebay.com>
Authored: Wed Aug 26 15:30:13 2015 +0800
Committer: Luke Han <lu...@apache.org>
Committed: Sun Sep 6 14:37:58 2015 +0800
----------------------------------------------------------------------
.../kylin/storage/hbase/CubeStorageEngine.java | 17 +++++++++++--
.../storage/hbase/FuzzyValueCombination.java | 26 +++++++++-----------
.../kylin/storage/hbase/HBaseKeyRange.java | 2 +-
.../hbase/FuzzyValueCombinationTest.java | 2 +-
4 files changed, 28 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/92b111c6/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 5fb6f0c..8eb7bcb 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -29,6 +30,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.kylin.common.persistence.HBaseConnection;
@@ -557,8 +559,19 @@ public class CubeStorageEngine implements IStorageEngine {
byte[] stopKey = keyRange.getStopKey();
long partitionColumnStartDate = Long.MAX_VALUE;
long partitionColumnEndDate = 0;
- List<Pair<byte[], byte[]>> newFuzzyKeys = new ArrayList<Pair<byte[], byte[]>>(mergeSize);
+
List<Collection<ColumnValueRange>> newFlatOrAndFilter = Lists.newLinkedList();
+ TreeSet<Pair<byte[], byte[]>> newFuzzyKeys = new TreeSet<>(new Comparator<Pair<byte[], byte[]>>() {
+ @Override
+ public int compare(Pair<byte[], byte[]> o1, Pair<byte[], byte[]> o2) {
+ int partialResult = Bytes.compareTo(o1.getFirst(), o2.getFirst());
+ if (partialResult != 0) {
+ return partialResult;
+ } else {
+ return Bytes.compareTo(o1.getSecond(), o2.getSecond());
+ }
+ }
+ });
boolean hasNonFuzzyRange = false;
for (int k = from; k <= to; k++) {
@@ -584,7 +597,7 @@ public class CubeStorageEngine implements IStorageEngine {
partitionColumnStartDate = (partitionColumnStartDate == Long.MAX_VALUE) ? 0 : partitionColumnStartDate;
partitionColumnEndDate = (partitionColumnEndDate == 0) ? Long.MAX_VALUE : partitionColumnEndDate;
- keyRange = new HBaseKeyRange(cubeSegment, cuboid, startKey, stopKey, newFuzzyKeys, newFlatOrAndFilter, partitionColumnStartDate, partitionColumnEndDate);
+ keyRange = new HBaseKeyRange(cubeSegment, cuboid, startKey, stopKey, Lists.newArrayList(newFuzzyKeys), newFlatOrAndFilter, partitionColumnStartDate, partitionColumnEndDate);
}
return keyRange;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/92b111c6/storage/src/main/java/org/apache/kylin/storage/hbase/FuzzyValueCombination.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/FuzzyValueCombination.java b/storage/src/main/java/org/apache/kylin/storage/hbase/FuzzyValueCombination.java
index d0208bd..616a232 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/FuzzyValueCombination.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/FuzzyValueCombination.java
@@ -52,8 +52,14 @@ public class FuzzyValueCombination {
public static List<Map<TblColRef, String>> calculate(Map<TblColRef, Set<String>> fuzzyValues, long cap) {
Dim[] dims = toDims(fuzzyValues);
- capDims(dims, cap);
- return combination(dims);
+ // If a query has many IN clause and each IN clause has many values, then it will easily generate
+ // thousands of fuzzy keys. When there are lots of fuzzy keys, the scan performance is bottle necked
+ // on it. So simply choose to abandon all fuzzy keys in this case.
+ if (exceedCap(dims, cap)) {
+ return Lists.newArrayList();
+ } else {
+ return combination(dims);
+ }
}
@SuppressWarnings("unchecked")
@@ -115,21 +121,11 @@ public class FuzzyValueCombination {
return dims;
}
- private static void capDims(Dim[] dims, long cap) {
- Arrays.sort(dims, new Comparator<Dim>() {
- @Override
- public int compare(Dim o1, Dim o2) {
- return -(o1.values.size() - o2.values.size());
- }
- });
-
- for (Dim dim : dims) {
- if (combCount(dims) < cap)
- break;
- dim.values = Collections.emptySet();
- }
+ private static boolean exceedCap(Dim[] dims, long cap) {
+ return combCount(dims) > cap;
}
+
private static long combCount(Dim[] dims) {
long count = 1;
for (Dim dim : dims) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/92b111c6/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
index 1db277d..cdfafaf 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
@@ -52,7 +52,7 @@ public class HBaseKeyRange implements Comparable<HBaseKeyRange> {
private static final Logger logger = LoggerFactory.getLogger(HBaseKeyRange.class);
- private static final int FUZZY_VALUE_CAP = 20;
+ private static final int FUZZY_VALUE_CAP = 100;
private static final byte[] ZERO_TAIL_BYTES = new byte[] { 0 };
private final CubeSegment cubeSeg;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/92b111c6/storage/src/test/java/org/apache/kylin/storage/hbase/FuzzyValueCombinationTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/hbase/FuzzyValueCombinationTest.java b/storage/src/test/java/org/apache/kylin/storage/hbase/FuzzyValueCombinationTest.java
index 46f6431..e803116 100644
--- a/storage/src/test/java/org/apache/kylin/storage/hbase/FuzzyValueCombinationTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/hbase/FuzzyValueCombinationTest.java
@@ -100,7 +100,7 @@ public class FuzzyValueCombinationTest {
for (Map<TblColRef, String> item : result) {
System.out.println(item);
}
- assertEquals(9, result.size());
+ assertEquals(0, result.size());
}
private static TblColRef col(int i, TableDesc t) {