You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/10/12 07:22:54 UTC

incubator-kylin git commit: minor, calculate in-mem base cuboid size by heap usage alone, ignore the row-by-row estimate which is usually a few times greater and very inaccurate

Repository: incubator-kylin
Updated Branches:
  refs/heads/2.x-staging 17e637a09 -> 18940e022


minor, calculate in-mem base cuboid size by heap usage alone, ignore the row-by-row estimate which is usually a few times greater and very inaccurate


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/18940e02
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/18940e02
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/18940e02

Branch: refs/heads/2.x-staging
Commit: 18940e02271801d6dff59faf74457e2e3bc3cead
Parents: 17e637a
Author: Li, Yang <ya...@ebay.com>
Authored: Mon Oct 12 13:22:09 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Mon Oct 12 13:22:42 2015 +0800

----------------------------------------------------------------------
 .../cube/inmemcubing/DoggedCubeBuilder.java     |  4 ++--
 .../cube/inmemcubing/InMemCubeBuilder.java      | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/18940e02/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java
index 6882eb9..c05bfaf 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java
@@ -69,7 +69,7 @@ public class DoggedCubeBuilder extends AbstractInMemCubeBuilder {
     }
 
     private class BuildOnce {
-
+        
         public void build(BlockingQueue<List<String>> input, ICuboidWriter output) throws IOException {
             final List<SplitThread> splits = new ArrayList<SplitThread>();
             final Merger merger = new Merger();
@@ -244,7 +244,7 @@ public class DoggedCubeBuilder extends AbstractInMemCubeBuilder {
 
             logger.debug(splitRowCount + " records went into split #" + nSplit + "; " + systemAvailMB + " MB left, " + reserveMemoryMB + " MB threshold");
 
-            return splitRowCount >= splitRowThreshold || systemAvailMB <= reserveMemoryMB;
+            return splitRowCount >= splitRowThreshold || systemAvailMB <= reserveMemoryMB * 1.5;
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/18940e02/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java
index 01cf494..bc01caf 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java
@@ -86,10 +86,9 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
 
     private CuboidResult baseResult;
     private Object[] totalSumForSanityCheck;
-    private ICuboidCollector resultCollector;    
+    private ICuboidCollector resultCollector;
     private Map<Integer, Dictionary<String>> topNDisplayColDictMap;
 
-
     public InMemCubeBuilder(CubeDesc cubeDesc, Map<TblColRef, Dictionary<?>> dictionaryMap) {
         super(cubeDesc, dictionaryMap);
         this.cuboidScheduler = new CuboidScheduler(cubeDesc);
@@ -100,7 +99,7 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
 
         this.measureCount = cubeDesc.getMeasures().size();
         this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]);
-        
+
         Map<String, Integer> measureIndexMap = Maps.newHashMap();
         List<String> metricsAggrFuncsList = Lists.newArrayList();
 
@@ -123,13 +122,14 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
                 int[] flatTableIdx = intermediateTableDesc.getMeasureColumnIndexes()[measureIdx];
                 int displayColIdx = flatTableIdx[flatTableIdx.length - 1];
                 TblColRef displayCol = func.getParameter().getColRefs().get(flatTableIdx.length - 1);
-                Dictionary<String> dictionary = (Dictionary<String>)dictionaryMap.get(displayCol);
+                @SuppressWarnings("unchecked")
+                Dictionary<String> dictionary = (Dictionary<String>) dictionaryMap.get(displayCol);
                 assert dictionary != null;
                 topNDisplayColDictMap.put(displayColIdx, dictionary);
             }
         }
     }
-    
+
     private GridTable newGridTableByCuboidID(long cuboidID) throws IOException {
         GTInfo info = CubeGridTable.newGTInfo(cubeDesc, cuboidID, dictionaryMap);
 
@@ -346,7 +346,7 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
     private void makeMemoryBudget() {
         int systemAvailMB = getSystemAvailMB();
         logger.info("System avail " + systemAvailMB + " MB");
-        int reserve = Math.max(reserveMemoryMB, baseResult.aggrCacheMB / 3);
+        int reserve = reserveMemoryMB;
         logger.info("Reserve " + reserve + " MB for system basics");
 
         int budget = systemAvailMB - reserve;
@@ -391,9 +391,8 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
 
         int mbBaseAggrCacheOnHeap = mbAfter == 0 ? 0 : mbBefore - mbAfter;
         int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB);
-        int mbBaseAggrCache = Math.max((int) (mbBaseAggrCacheOnHeap * 1.1), mbEstimateBaseAggrCache);
+        int mbBaseAggrCache = mbBaseAggrCacheOnHeap;
         mbBaseAggrCache = Math.max(mbBaseAggrCache, 10); // let it be at least 10 MB
-        mbBaseAggrCache = Math.min(mbBaseAggrCache, mbBaseAggrCacheOnHeap * 2); // let it be at most heap * 2, estimate like topn can be very wild..
         logger.info("Base aggr cache is " + mbBaseAggrCache + " MB (heap " + mbBaseAggrCacheOnHeap + " MB, estimate " + mbEstimateBaseAggrCache + " MB)");
 
         return updateCuboidResult(baseCuboidId, baseCuboid, count, timeSpent, mbBaseAggrCache);
@@ -482,7 +481,7 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
             }
 
             // disable sanity check for performance
-//            sanityCheck(scanner.getTotalSumForSanityCheck());
+            sanityCheck(scanner.getTotalSumForSanityCheck());
         } finally {
             scanner.close();
             builder.close();
@@ -623,8 +622,7 @@ public class InMemCubeBuilder extends AbstractInMemCubeBuilder {
                 FunctionDesc function = cubeDesc.getMeasures().get(i).getFunction();
                 if (flatTableIdx == null) {
                     value = measureCodec.getSerializer(i).valueOf(measureDesc.getFunction().getParameter().getValue());
-                }
-                else if (function.isCount() || function.isHolisticCountDistinct()) {
+                } else if (function.isCount() || function.isHolisticCountDistinct()) {
                     // note for holistic count distinct, this value will be ignored
                     value = ONE;
                 } else if (function.isTopN()) {