You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/20 11:27:30 UTC
[05/50] [abbrv] kylin git commit: KYLIN-2244
"kylin.job.cuboid.size.memhungry.ratio" shouldn't be applied on measures like
TopN
KYLIN-2244 "kylin.job.cuboid.size.memhungry.ratio" shouldn't be applied on measures like TopN
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/8ffb0e71
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/8ffb0e71
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/8ffb0e71
Branch: refs/heads/master-cdh5.7
Commit: 8ffb0e7103d63d2c0f5d093f3afde1a0490eb8a0
Parents: 4408579
Author: shaofengshi <sh...@apache.org>
Authored: Mon Dec 12 14:19:55 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Dec 12 14:19:55 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 5 +++
.../kylin/engine/mr/common/CubeStatsReader.java | 36 ++++++++------------
2 files changed, 20 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/8ffb0e71/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 2b35c70..610c2af 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -261,10 +261,15 @@ abstract public class KylinConfigBase implements Serializable {
return Double.parseDouble(getOptional("kylin.cube.size-estimate-ratio", "0.25"));
}
+ @Deprecated
public double getJobCuboidSizeMemHungryRatio() {
return Double.parseDouble(getOptional("kylin.cube.size-estimate-memhungry-ratio", "0.05"));
}
+ public double getJobCuboidSizeCountDistinctRatio() {
+ return Double.parseDouble(getOptional("kylin.cube.size-estimate-countdistinct-ratio", "0.05"));
+ }
+
public String getCubeAlgorithm() {
return getOptional("kylin.cube.algorithm", "auto");
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/8ffb0e71/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index 1cf5da6..21af1e6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -55,6 +55,7 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.HadoopUtil;
import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
@@ -196,41 +197,34 @@ public class CubeStatsReader {
*/
private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount, long baseCuboidId, List<Integer> rowKeyColumnLength) {
- int bytesLength = cubeSegment.getRowKeyPreambleSize();
+ int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
KylinConfig kylinConf = cubeSegment.getConfig();
long mask = Long.highestOneBit(baseCuboidId);
long parentCuboidIdActualLength = Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
for (int i = 0; i < parentCuboidIdActualLength; i++) {
if ((mask & cuboidId) > 0) {
- bytesLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
+ rowkeyLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
}
mask = mask >> 1;
}
// add the measure length
- int space = 0;
- boolean isMemoryHungry = false;
+ int normalSpace = rowkeyLength;
+ int countDistinctSpace = 0;
for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
- if (measureDesc.getFunction().getMeasureType().isMemoryHungry()) {
- isMemoryHungry = true;
- }
DataType returnType = measureDesc.getFunction().getReturnDataType();
- space += returnType.getStorageBytesEstimate();
- }
- bytesLength += space;
-
- double ret = 1.0 * bytesLength * rowCount / (1024L * 1024L);
- if (isMemoryHungry) {
- double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeMemHungryRatio();
- logger.info("Cube is memory hungry, storage size estimation multiply " + cuboidSizeMemHungryRatio);
- ret *= cuboidSizeMemHungryRatio;
- } else {
- double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
- logger.info("Cube is not memory hungry, storage size estimation multiply " + cuboidSizeRatio);
- ret *= cuboidSizeRatio;
+ if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_COUNT_DISTINCT)) {
+ countDistinctSpace += returnType.getStorageBytesEstimate();
+ } else {
+ normalSpace += returnType.getStorageBytesEstimate();
+ }
}
- logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + bytesLength + " bytes." + " Total size is " + ret + "M.");
+
+ double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
+ double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
+ double ret = (1.0 * normalSpace * rowCount * cuboidSizeRatio + 1.0 * countDistinctSpace * rowCount * cuboidSizeMemHungryRatio) / (1024L * 1024L);
+ logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + (normalSpace + countDistinctSpace) + " bytes." + " Total size is " + ret + "M.");
return ret;
}