You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/20 11:27:30 UTC

[05/50] [abbrv] kylin git commit: KYLIN-2244 "kylin.job.cuboid.size.memhungry.ratio" shouldn't be applied on measures like TopN

KYLIN-2244 "kylin.job.cuboid.size.memhungry.ratio" shouldn't be applied on measures like TopN


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/8ffb0e71
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/8ffb0e71
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/8ffb0e71

Branch: refs/heads/master-cdh5.7
Commit: 8ffb0e7103d63d2c0f5d093f3afde1a0490eb8a0
Parents: 4408579
Author: shaofengshi <sh...@apache.org>
Authored: Mon Dec 12 14:19:55 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Dec 12 14:19:55 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    |  5 +++
 .../kylin/engine/mr/common/CubeStatsReader.java | 36 ++++++++------------
 2 files changed, 20 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/8ffb0e71/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 2b35c70..610c2af 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -261,10 +261,15 @@ abstract public class KylinConfigBase implements Serializable {
         return Double.parseDouble(getOptional("kylin.cube.size-estimate-ratio", "0.25"));
     }
 
+    @Deprecated
     public double getJobCuboidSizeMemHungryRatio() {
         return Double.parseDouble(getOptional("kylin.cube.size-estimate-memhungry-ratio", "0.05"));
     }
 
+    public double getJobCuboidSizeCountDistinctRatio() {
+        return Double.parseDouble(getOptional("kylin.cube.size-estimate-countdistinct-ratio", "0.05"));
+    }
+
     public String getCubeAlgorithm() {
         return getOptional("kylin.cube.algorithm", "auto");
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/8ffb0e71/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index 1cf5da6..21af1e6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -55,6 +55,7 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.HadoopUtil;
 import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
 import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
@@ -196,41 +197,34 @@ public class CubeStatsReader {
      */
     private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount, long baseCuboidId, List<Integer> rowKeyColumnLength) {
 
-        int bytesLength = cubeSegment.getRowKeyPreambleSize();
+        int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
         KylinConfig kylinConf = cubeSegment.getConfig();
 
         long mask = Long.highestOneBit(baseCuboidId);
         long parentCuboidIdActualLength = Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
         for (int i = 0; i < parentCuboidIdActualLength; i++) {
             if ((mask & cuboidId) > 0) {
-                bytesLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
+                rowkeyLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
             }
             mask = mask >> 1;
         }
 
         // add the measure length
-        int space = 0;
-        boolean isMemoryHungry = false;
+        int normalSpace = rowkeyLength;
+        int countDistinctSpace = 0;
         for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
-            if (measureDesc.getFunction().getMeasureType().isMemoryHungry()) {
-                isMemoryHungry = true;
-            }
             DataType returnType = measureDesc.getFunction().getReturnDataType();
-            space += returnType.getStorageBytesEstimate();
-        }
-        bytesLength += space;
-
-        double ret = 1.0 * bytesLength * rowCount / (1024L * 1024L);
-        if (isMemoryHungry) {
-            double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeMemHungryRatio();
-            logger.info("Cube is memory hungry, storage size estimation multiply " + cuboidSizeMemHungryRatio);
-            ret *= cuboidSizeMemHungryRatio;
-        } else {
-            double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
-            logger.info("Cube is not memory hungry, storage size estimation multiply " + cuboidSizeRatio);
-            ret *= cuboidSizeRatio;
+            if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_COUNT_DISTINCT)) {
+                countDistinctSpace += returnType.getStorageBytesEstimate();
+            } else {
+                normalSpace += returnType.getStorageBytesEstimate();
+            }
         }
-        logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + bytesLength + " bytes." + " Total size is " + ret + "M.");
+
+        double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
+        double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
+        double ret = (1.0 * normalSpace * rowCount * cuboidSizeRatio + 1.0 * countDistinctSpace * rowCount * cuboidSizeMemHungryRatio) / (1024L * 1024L);
+        logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + (normalSpace + countDistinctSpace) + " bytes." + " Total size is " + ret + "M.");
         return ret;
     }