You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/03/29 12:35:45 UTC

[1/2] kylin git commit: KYLIN-1543 Add FilterResultCache of last record

Repository: kylin
Updated Branches:
  refs/heads/master be30f4bef -> 094510cf3


KYLIN-1543 Add FilterResultCache of last record


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/094510cf
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/094510cf
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/094510cf

Branch: refs/heads/master
Commit: 094510cf3aded8021e73de631e614ac735f1497e
Parents: 6e2afbb
Author: Li Yang <li...@apache.org>
Authored: Tue Mar 29 18:32:40 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Tue Mar 29 18:35:03 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/gridtable/GTFilterScanner.java | 95 ++++++++++++++++++-
 .../kylin/gridtable/DictGridTableTest.java      | 96 ++++++++++++++++++--
 2 files changed, 179 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
index 8017ca0..69e18ed 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
@@ -19,10 +19,15 @@
 package org.apache.kylin.gridtable;
 
 import java.io.IOException;
+import java.util.BitSet;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Set;
 
 import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.ImmutableBitSet;
 import org.apache.kylin.metadata.filter.IFilterCodeSystem;
 import org.apache.kylin.metadata.filter.TupleFilter;
 import org.apache.kylin.metadata.model.TblColRef;
@@ -32,6 +37,7 @@ public class GTFilterScanner implements IGTScanner {
 
     final private IGTScanner inputScanner;
     final private TupleFilter filter;
+    final private IFilterCodeSystem<ByteArray> filterCodeSystem;
     final private IEvaluatableTuple oneTuple; // avoid instance creation
 
     private GTRecord next = null;
@@ -39,6 +45,7 @@ public class GTFilterScanner implements IGTScanner {
     public GTFilterScanner(IGTScanner inputScanner, GTScanRequest req) throws IOException {
         this.inputScanner = inputScanner;
         this.filter = req.getFilterPushDown();
+        this.filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
         this.oneTuple = new IEvaluatableTuple() {
             @Override
             public Object getValue(TblColRef col) {
@@ -70,17 +77,16 @@ public class GTFilterScanner implements IGTScanner {
         return new Iterator<GTRecord>() {
 
             private Iterator<GTRecord> inputIterator = inputScanner.iterator();
-            private IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
+            private FilterResultCache resultCache = new FilterResultCache(getInfo(), filter);
 
             @Override
             public boolean hasNext() {
                 if (next != null)
                     return true;
 
-
                 while (inputIterator.hasNext()) {
                     next = inputIterator.next();
-                    if (filter != null && filter.evaluate(oneTuple, filterCodeSystem) == false) {
+                    if (!evaluate()) {
                         continue;
                     }
                     return true;
@@ -89,6 +95,20 @@ public class GTFilterScanner implements IGTScanner {
                 return false;
             }
 
+            private boolean evaluate() {
+                if (filter == null)
+                    return true;
+                
+                // 'next' and 'oneTuple' are referring to the same record
+                boolean[] cachedResult = resultCache.checkCache(next);
+                if (cachedResult != null)
+                    return cachedResult[0];
+                
+                boolean result = filter.evaluate(oneTuple, filterCodeSystem);
+                resultCache.setLastResult(result);
+                return result;
+            }
+
             @Override
             public GTRecord next() {
                 // fetch next record
@@ -110,4 +130,73 @@ public class GTFilterScanner implements IGTScanner {
 
         };
     }
+
+    // cache the last one input and result, can reuse because rowkey are ordered, and same input could come in small group
+    static class FilterResultCache {
+        static final int CHECKPOINT = 10000;
+        static final double HIT_RATE_THRESHOLD = 0.5;
+        static boolean ENABLED = false; // for debug
+
+        boolean enabled = ENABLED;
+        ImmutableBitSet colsInFilter;
+        int count;
+        int hit;
+        byte[] lastValues;
+        boolean[] lastResult;
+
+        public FilterResultCache(GTInfo info, TupleFilter filter) {
+            colsInFilter = collectColumnsInFilter(filter);
+            lastValues = new byte[info.getMaxColumnLength(colsInFilter)];
+            lastResult = new boolean[1];
+        }
+
+        public boolean[] checkCache(GTRecord record) {
+            if (!enabled)
+                return null;
+            
+            count++;
+            
+            // disable the cache if the hit rate is bad
+            if (count == CHECKPOINT) {
+                if ((double) hit / (double) count < HIT_RATE_THRESHOLD) {
+                    enabled = false;
+                }
+            }
+            
+            boolean match = count > 1;
+            int p = 0;
+            for (int i = 0; i < colsInFilter.trueBitCount(); i++) {
+                int c = colsInFilter.trueBitAt(i);
+                ByteArray col = record.get(c);
+                if (match) {
+                    match = BytesUtil.compareBytes(col.array(), col.offset(), lastValues, p, col.length()) == 0;
+                }
+                if (!match) {
+                    System.arraycopy(col.array(), col.offset(), lastValues, p, col.length());
+                }
+                p += col.length();
+            }
+            
+            if (match) {
+                hit++;
+                return lastResult;
+            } else {
+                return null;
+            }
+        }
+
+        public void setLastResult(boolean evalResult) {
+            lastResult[0] = evalResult;
+        }
+
+        private ImmutableBitSet collectColumnsInFilter(TupleFilter filter) {
+            Set<TblColRef> columnsInFilter = new HashSet<TblColRef>();
+            TupleFilter.collectColumns(filter, columnsInFilter);
+            BitSet result = new BitSet();
+            for (TblColRef col : columnsInFilter)
+                result.set(col.getColumnDesc().getZeroBasedIndex());
+            return new ImmutableBitSet(result);
+        }
+
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
index 517299f..b1b5ee9 100644
--- a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java
@@ -37,6 +37,7 @@ import org.apache.kylin.dict.TrieDictionaryBuilder;
 import org.apache.kylin.dimension.Dictionary;
 import org.apache.kylin.dimension.DictionaryDimEnc;
 import org.apache.kylin.dimension.DimensionEncoding;
+import org.apache.kylin.gridtable.GTFilterScanner.FilterResultCache;
 import org.apache.kylin.gridtable.GTInfo.Builder;
 import org.apache.kylin.gridtable.memstore.GTSimpleMemStore;
 import org.apache.kylin.metadata.datatype.DataType;
@@ -232,15 +233,15 @@ public class DictGridTableTest {
 
     @Test
     public void verifyFirstRow() throws IOException {
-        doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]",//
-                "[1421193600000, 30, Luke, 10, 10.5]",//
-                "[1421280000000, 20, Dong, 10, 10.5]",//
-                "[1421280000000, 20, Jason, 10, 10.5]",//
-                "[1421280000000, 30, Xu, 10, 10.5]",//
-                "[1421366400000, 20, Mahone, 10, 10.5]",//
-                "[1421366400000, 20, Qianhao, 10, 10.5]",//
-                "[1421366400000, 30, George, 10, 10.5]",//
-                "[1421366400000, 30, Shaofeng, 10, 10.5]",//
+        doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]", //
+                "[1421193600000, 30, Luke, 10, 10.5]", //
+                "[1421280000000, 20, Dong, 10, 10.5]", //
+                "[1421280000000, 20, Jason, 10, 10.5]", //
+                "[1421280000000, 30, Xu, 10, 10.5]", //
+                "[1421366400000, 20, Mahone, 10, 10.5]", //
+                "[1421366400000, 20, Qianhao, 10, 10.5]", //
+                "[1421366400000, 30, George, 10, 10.5]", //
+                "[1421366400000, 30, Shaofeng, 10, 10.5]", //
                 "[1421452800000, 10, Kejia, 10, 10.5]");
     }
 
@@ -289,6 +290,39 @@ public class DictGridTableTest {
     }
 
     @Test
+    public void testFilterScannerPerf() throws IOException {
+        GridTable table = newTestPerfTable();
+        GTInfo info = table.getInfo();
+
+        CompareTupleFilter fComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+        CompareTupleFilter fComp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10"));
+        LogicalTupleFilter filter = and(fComp1, fComp2);
+
+        FilterResultCache.ENABLED = false;
+        testFilterScannerPerfInner(table, info, filter);
+        FilterResultCache.ENABLED = true;
+        testFilterScannerPerfInner(table, info, filter);
+        FilterResultCache.ENABLED = false;
+        testFilterScannerPerfInner(table, info, filter);
+        FilterResultCache.ENABLED = true;
+        testFilterScannerPerfInner(table, info, filter);
+    }
+
+    @SuppressWarnings("unused")
+    private void testFilterScannerPerfInner(GridTable table, GTInfo info, LogicalTupleFilter filter) throws IOException {
+        long start = System.currentTimeMillis();
+        GTScanRequest req = new GTScanRequest(info, null, null, filter);
+        IGTScanner scanner = table.scan(req);
+        int i = 0;
+        for (GTRecord r : scanner) {
+            i++;
+        }
+        scanner.close();
+        long end = System.currentTimeMillis();
+        System.out.println((end - start) + "ms with filter cache enabled=" + FilterResultCache.ENABLED + ", " + i + " rows");
+    }
+
+    @Test
     public void verifyConvertFilterConstants1() {
         GTInfo info = table.getInfo();
 
@@ -448,6 +482,50 @@ public class DictGridTableTest {
         return table;
     }
 
+    static GridTable newTestPerfTable() throws IOException {
+        GTInfo info = newInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        GTRecord r = new GTRecord(table.getInfo());
+        GTBuilder builder = table.rebuild();
+
+        for (int i = 0; i < 100000; i++) {
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-14", "30", "Yang", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-14", "30", "Luke", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-15", "20", "Dong", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-15", "20", "Jason", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-15", "30", "Xu", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-16", "20", "Mahone", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-16", "20", "Qianhao", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-16", "30", "George", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new LongMutable(10), new BigDecimal("10.5")));
+
+            for (int j = 0; j < 10; j++)
+                builder.write(r.setValues("2015-01-17", "10", "Kejia", new LongMutable(10), new BigDecimal("10.5")));
+        }
+        builder.close();
+
+        return table;
+    }
+
     static GTInfo newInfo() {
         Builder builder = GTInfo.builder();
         builder.setCodeSystem(newDictCodeSystem());


[2/2] kylin git commit: KYLIN-1543 Minor refactor

Posted by li...@apache.org.
KYLIN-1543 Minor refactor


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/6e2afbb9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/6e2afbb9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/6e2afbb9

Branch: refs/heads/master
Commit: 6e2afbb98d7670dc67ca7c6d38a3e199523da3ff
Parents: be30f4b
Author: Li Yang <li...@apache.org>
Authored: Tue Mar 29 14:36:01 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Tue Mar 29 18:35:03 2016 +0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java  | 2 +-
 .../org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/6e2afbb9/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
index 59f1df7..8017ca0 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java
@@ -70,13 +70,13 @@ public class GTFilterScanner implements IGTScanner {
         return new Iterator<GTRecord>() {
 
             private Iterator<GTRecord> inputIterator = inputScanner.iterator();
+            private IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
 
             @Override
             public boolean hasNext() {
                 if (next != null)
                     return true;
 
-                IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator());
 
                 while (inputIterator.hasNext()) {
                     next = inputIterator.next();

http://git-wip-us.apache.org/repos/asf/kylin/blob/6e2afbb9/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
index ff7498b..850b3d9 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/HBaseReadonlyStore.java
@@ -114,7 +114,7 @@ public class HBaseReadonlyStore implements IGTStore {
                         // metrics
                         for (int i = 0; i < hbaseColumns.size(); i++) {
                             Pair<byte[], byte[]> hbaseColumn = hbaseColumns.get(i);
-                            Cell cell = HBaseReadonlyStore.findCell(oneRow, hbaseColumn.getFirst(), hbaseColumn.getSecond());
+                            Cell cell = findCell(oneRow, hbaseColumn.getFirst(), hbaseColumn.getSecond());
                             Preconditions.checkNotNull(cell);
                             buf = byteBuffer(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                             oneRecord.loadColumns(hbaseColumnsToGT.get(i), buf);