You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/03/29 12:35:45 UTC
[1/2] kylin git commit: KYLIN-1543 Add FilterResultCache of last
record
Repository: kylin
Updated Branches:
refs/heads/master be30f4bef -> 094510cf3
KYLIN-1543 Add FilterResultCache of last record
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/094510cf
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/094510cf
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/094510cf
Branch: refs/heads/master
Commit: 094510cf3aded8021e73de631e614ac735f1497e
Parents: 6e2afbb
Author: Li Yang <li...@apache.org>
Authored: Tue Mar 29 18:32:40 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Tue Mar 29 18:35:03 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/gridtable/GTFilterScanner.java | 95 ++++++++++++++++++-
.../kylin/gridtable/DictGridTableTest.java | 96 ++++++++++++++++++--
2 files changed, 179 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
index 8017ca0..69e18ed 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
@@ -19,10 +19,15 @@
package org.apache.kylin.gridtable;
import java.io.IOException;
+import java.util.BitSet;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
+import java.util.Set;
import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.ImmutableBitSet;
import org.apache.kylin.metadata.filter.IFilterCodeSystem;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.model.TblColRef;
@@ -32,6 +37,7 @@ public class GTFilterScanner implements IGTScanner {
final private IGTScanner inputScanner;
final private TupleFilter filter;
+ final private IFilterCodeSystem<ByteArray> filterCodeSystem;
final private IEvaluatableTuple oneTuple; // avoid instance creation
private GTRecord next = null;
@@ -39,6 +45,7 @@ public class GTFilterScanner implements IGTScanner {
public GTFilterScanner(IGTScanner inputScanner, GTScanRequest req) throws IOException {
this.inputScanner = inputScanner;
this.filter = req.getFilterPushDown();
+ this.filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
this.oneTuple = new IEvaluatableTuple() {
@Override
public Object getValue(TblColRef col) {
@@ -70,17 +77,16 @@ public class GTFilterScanner implements IGTScanner {
return new Iterator<GTRecord>() {
private Iterator<GTRecord> inputIterator = inputScanner.iterator();
- private IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
+ private FilterResultCache resultCache = new FilterResultCache(getInfo(), filter);
@Override
public boolean hasNext() {
if (next != null)
return true;
-
while (inputIterator.hasNext()) {
next = inputIterator.next();
- if (filter != null && filter.evaluate(oneTuple, filterCodeSystem) == false) {
+ if (!evaluate()) {
continue;
}
return true;
@@ -89,6 +95,20 @@ public class GTFilterScanner implements IGTScanner {
return false;
}
+ private boolean evaluate() {
+ if (filter == null)
+ return true;
+
+ // 'next' and 'oneTuple' are referring to the same record
+ boolean[] cachedResult = resultCache.checkCache(next);
+ if (cachedResult != null)
+ return cachedResult[0];
+
+ boolean result = filter.evaluate(oneTuple, filterCodeSystem);
+ resultCache.setLastResult(result);
+ return result;
+ }
+
@Override
public GTRecord next() {
// fetch next record
@@ -110,4 +130,73 @@ public class GTFilterScanner implements IGTScanner {
};
}
+
+ // cache the last one input and result, can reuse because rowkey are ordered, and same input could come in small group
+ static class FilterResultCache {
+ static final int CHECKPOINT = 10000;
+ static final double HIT_RATE_THRESHOLD = 0.5;
+ static boolean ENABLED = false; // for debug
+
+ boolean enabled = ENABLED;
+ ImmutableBitSet colsInFilter;
+ int count;
+ int hit;
+ byte[] lastValues;
+ boolean[] lastResult;
+
+ public FilterResultCache(GTInfo info, TupleFilter filter) {
+ colsInFilter = collectColumnsInFilter(filter);
+ lastValues = new byte[info.getMaxColumnLength(colsInFilter)];
+ lastResult = new boolean[1];
+ }
+
+ public boolean[] checkCache(GTRecord record) {
+ if (!enabled)
+ return null;
+
+ count++;
+
+ // disable the cache if the hit rate is bad
+ if (count == CHECKPOINT) {
+ if ((double) hit / (double) count < HIT_RATE_THRESHOLD) {
+ enabled = false;
+ }
+ }
+
+ boolean match = count > 1;
+ int p = 0;
+ for (int i = 0; i < colsInFilter.trueBitCount(); i++) {
+ int c = colsInFilter.trueBitAt(i);
+ ByteArray col = record.get(c);
+ if (match) {
+ match = BytesUtil.compareBytes(col.array(), col.offset(), lastValues, p, col.length()) == 0;
+ }
+ if (!match) {
+ System.arraycopy(col.array(), col.offset(), lastValues, p, col.length());
+ }
+ p += col.length();
+ }
+
+ if (match) {
+ hit++;
+ return lastResult;
+ } else {
+ return null;
+ }
+ }
+
+ public void setLastResult(boolean evalResult) {
+ lastResult[0] = evalResult;
+ }
+
+ private ImmutableBitSet collectColumnsInFilter(TupleFilter filter) {
+ Set<TblColRef> columnsInFilter = new HashSet<TblColRef>();
+ TupleFilter.collectColumns(filter, columnsInFilter);
+ BitSet result = new BitSet();
+ for (TblColRef col : columnsInFilter)
+ result.set(col.getColumnDesc().getZeroBasedIndex());
+ return new ImmutableBitSet(result);
+ }
+
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
index 517299f..b1b5ee9 100644
--- a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
@@ -37,6 +37,7 @@ import org.apache.kylin.dict.TrieDictionaryBuilder;
import org.apache.kylin.dimension.Dictionary;
import org.apache.kylin.dimension.DictionaryDimEnc;
import org.apache.kylin.dimension.DimensionEncoding;
+import org.apache.kylin.gridtable.GTFilterScanner.FilterResultCache;
import org.apache.kylin.gridtable.GTInfo.Builder;
import org.apache.kylin.gridtable.memstore.GTSimpleMemStore;
import org.apache.kylin.metadata.datatype.DataType;
@@ -232,15 +233,15 @@ public class DictGridTableTest {
@Test
public void verifyFirstRow() throws IOException {
- doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]",//
- "[1421193600000, 30, Luke, 10, 10.5]",//
- "[1421280000000, 20, Dong, 10, 10.5]",//
- "[1421280000000, 20, Jason, 10, 10.5]",//
- "[1421280000000, 30, Xu, 10, 10.5]",//
- "[1421366400000, 20, Mahone, 10, 10.5]",//
- "[1421366400000, 20, Qianhao, 10, 10.5]",//
- "[1421366400000, 30, George, 10, 10.5]",//
- "[1421366400000, 30, Shaofeng, 10, 10.5]",//
+ doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]", //
+ "[1421193600000, 30, Luke, 10, 10.5]", //
+ "[1421280000000, 20, Dong, 10, 10.5]", //
+ "[1421280000000, 20, Jason, 10, 10.5]", //
+ "[1421280000000, 30, Xu, 10, 10.5]", //
+ "[1421366400000, 20, Mahone, 10, 10.5]", //
+ "[1421366400000, 20, Qianhao, 10, 10.5]", //
+ "[1421366400000, 30, George, 10, 10.5]", //
+ "[1421366400000, 30, Shaofeng, 10, 10.5]", //
"[1421452800000, 10, Kejia, 10, 10.5]");
}
@@ -289,6 +290,39 @@ public class DictGridTableTest {
}
@Test
+ public void testFilterScannerPerf() throws IOException {
+ GridTable table = newTestPerfTable();
+ GTInfo info = table.getInfo();
+
+ CompareTupleFilter fComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+ CompareTupleFilter fComp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10"));
+ LogicalTupleFilter filter = and(fComp1, fComp2);
+
+ FilterResultCache.ENABLED = false;
+ testFilterScannerPerfInner(table, info, filter);
+ FilterResultCache.ENABLED = true;
+ testFilterScannerPerfInner(table, info, filter);
+ FilterResultCache.ENABLED = false;
+ testFilterScannerPerfInner(table, info, filter);
+ FilterResultCache.ENABLED = true;
+ testFilterScannerPerfInner(table, info, filter);
+ }
+
+ @SuppressWarnings("unused")
+ private void testFilterScannerPerfInner(GridTable table, GTInfo info, LogicalTupleFilter filter) throws IOException {
+ long start = System.currentTimeMillis();
+ GTScanRequest req = new GTScanRequest(info, null, null, filter);
+ IGTScanner scanner = table.scan(req);
+ int i = 0;
+ for (GTRecord r : scanner) {
+ i++;
+ }
+ scanner.close();
+ long end = System.currentTimeMillis();
+ System.out.println((end - start) + "ms with filter cache enabled=" + FilterResultCache.ENABLED + ", " + i + " rows");
+ }
+
+ @Test
public void verifyConvertFilterConstants1() {
GTInfo info = table.getInfo();
@@ -448,6 +482,50 @@ public class DictGridTableTest {
return table;
}
+ static GridTable newTestPerfTable() throws IOException {
+ GTInfo info = newInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ GTRecord r = new GTRecord(table.getInfo());
+ GTBuilder builder = table.rebuild();
+
+ for (int i = 0; i < 100000; i++) {
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-14", "30", "Yang", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-14", "30", "Luke", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-15", "20", "Dong", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-15", "20", "Jason", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-15", "30", "Xu", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-16", "20", "Mahone", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-16", "20", "Qianhao", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-16", "30", "George", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new LongMutable(10), new BigDecimal("10.5")));
+
+ for (int j = 0; j < 10; j++)
+ builder.write(r.setValues("2015-01-17", "10", "Kejia", new LongMutable(10), new BigDecimal("10.5")));
+ }
+ builder.close();
+
+ return table;
+ }
+
static GTInfo newInfo() {
Builder builder = GTInfo.builder();
builder.setCodeSystem(newDictCodeSystem());
[2/2] kylin git commit: KYLIN-1543 Minor refactor
Posted by li...@apache.org.
KYLIN-1543 Minor refactor
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/6e2afbb9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/6e2afbb9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/6e2afbb9
Branch: refs/heads/master
Commit: 6e2afbb98d7670dc67ca7c6d38a3e199523da3ff
Parents: be30f4b
Author: Li Yang <li...@apache.org>
Authored: Tue Mar 29 14:36:01 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Tue Mar 29 18:35:03 2016 +0800
----------------------------------------------------------------------
.../src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java | 2 +-
.../org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/6e2afbb9/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
index 59f1df7..8017ca0 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
@@ -70,13 +70,13 @@ public class GTFilterScanner implements IGTScanner {
return new Iterator<GTRecord>() {
private Iterator<GTRecord> inputIterator = inputScanner.iterator();
+ private IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
@Override
public boolean hasNext() {
if (next != null)
return true;
- IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
while (inputIterator.hasNext()) {
next = inputIterator.next();
http://git-wip-us.apache.org/repos/asf/kylin/blob/6e2afbb9/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
index ff7498b..850b3d9 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
@@ -114,7 +114,7 @@ public class HBaseReadonlyStore implements IGTStore {
// metrics
for (int i = 0; i < hbaseColumns.size(); i++) {
Pair<byte[], byte[]> hbaseColumn = hbaseColumns.get(i);
- Cell cell = HBaseReadonlyStore.findCell(oneRow, hbaseColumn.getFirst(), hbaseColumn.getSecond());
+ Cell cell = findCell(oneRow, hbaseColumn.getFirst(), hbaseColumn.getSecond());
Preconditions.checkNotNull(cell);
buf = byteBuffer(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
oneRecord.loadColumns(hbaseColumnsToGT.get(i), buf);