You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/11/21 07:45:05 UTC
[2/3] kylin git commit: KYLIN-2210 call CubeStatsReader.print at
SaveStatisticsStep
KYLIN-2210 call CubeStatsReader.print at SaveStatisticsStep
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/3ab966b6
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/3ab966b6
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/3ab966b6
Branch: refs/heads/master
Commit: 3ab966b650debb83eb219c9ed8d357d141466776
Parents: a179f5e
Author: Hongbin Ma <ma...@apache.org>
Authored: Fri Nov 18 14:56:30 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Mon Nov 21 13:37:58 2016 +0800
----------------------------------------------------------------------
.../kylin/engine/mr/common/CubeStatsReader.java | 2 +-
.../kylin/engine/mr/steps/SaveStatisticsStep.java | 17 ++++++++++++++---
2 files changed, 15 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/3ab966b6/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index c917cfb..c6839d6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -227,7 +227,7 @@ public class CubeStatsReader {
return ret;
}
- private void print(PrintWriter out) {
+ public void print(PrintWriter out) {
Map<Long, Long> cuboidRows = getCuboidRowEstimatesHLL();
Map<Long, Double> cuboidSizes = getCuboidSizeMap();
List<Long> cuboids = new ArrayList<Long>(cuboidRows.keySet());
http://git-wip-us.apache.org/repos/asf/kylin/blob/3ab966b6/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
index 23e81bc..7718bfb 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
@@ -19,6 +19,8 @@
package org.apache.kylin.engine.mr.steps;
import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.util.Random;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -29,8 +31,8 @@ import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.CubingJob;
-import org.apache.kylin.engine.mr.HadoopUtil;
import org.apache.kylin.engine.mr.CubingJob.AlgorithmEnum;
+import org.apache.kylin.engine.mr.HadoopUtil;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.CubeStatsReader;
import org.apache.kylin.job.exception.ExecuteException;
@@ -85,6 +87,16 @@ public class SaveStatisticsStep extends AbstractExecutable {
private void decideCubingAlgorithm(CubeSegment seg, KylinConfig kylinConf) throws IOException {
String algPref = kylinConf.getCubeAlgorithm();
+
+ CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf);
+ StringWriter sw = new StringWriter();
+ PrintWriter pw = new PrintWriter(sw);
+ cubeStats.print(pw);
+ pw.flush();
+ pw.close();
+ logger.info("Cube Stats Estimation for segment {} :", seg.toString());
+ logger.info(sw.toString());
+
AlgorithmEnum alg;
if (AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) {
alg = AlgorithmEnum.INMEM;
@@ -104,14 +116,13 @@ public class SaveStatisticsStep extends AbstractExecutable {
} else if ("random".equalsIgnoreCase(algPref)) { // for testing
alg = new Random().nextBoolean() ? AlgorithmEnum.INMEM : AlgorithmEnum.LAYER;
} else { // the default
- CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf);
int mapperNumber = cubeStats.getMapperNumberOfFirstBuild();
int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit();
double mapperOverlapRatio = cubeStats.getMapperOverlapRatioOfFirstBuild();
double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold();
logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit);
logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is " + overlapThreshold);
-
+
// in-mem cubing is good when
// 1) the cluster has enough mapper slots to run in parallel
// 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage