You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/11/21 07:45:05 UTC

[2/3] kylin git commit: KYLIN-2210 call CubeStatsReader.print at SaveStatisticsStep

KYLIN-2210 call CubeStatsReader.print at SaveStatisticsStep


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/3ab966b6
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/3ab966b6
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/3ab966b6

Branch: refs/heads/master
Commit: 3ab966b650debb83eb219c9ed8d357d141466776
Parents: a179f5e
Author: Hongbin Ma <ma...@apache.org>
Authored: Fri Nov 18 14:56:30 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Mon Nov 21 13:37:58 2016 +0800

----------------------------------------------------------------------
 .../kylin/engine/mr/common/CubeStatsReader.java    |  2 +-
 .../kylin/engine/mr/steps/SaveStatisticsStep.java  | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/3ab966b6/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index c917cfb..c6839d6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -227,7 +227,7 @@ public class CubeStatsReader {
         return ret;
     }
 
-    private void print(PrintWriter out) {
+    public void print(PrintWriter out) {
         Map<Long, Long> cuboidRows = getCuboidRowEstimatesHLL();
         Map<Long, Double> cuboidSizes = getCuboidSizeMap();
         List<Long> cuboids = new ArrayList<Long>(cuboidRows.keySet());

http://git-wip-us.apache.org/repos/asf/kylin/blob/3ab966b6/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
index 23e81bc..7718bfb 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
@@ -19,6 +19,8 @@
 package org.apache.kylin.engine.mr.steps;
 
 import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.util.Random;
 
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -29,8 +31,8 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.engine.mr.CubingJob;
-import org.apache.kylin.engine.mr.HadoopUtil;
 import org.apache.kylin.engine.mr.CubingJob.AlgorithmEnum;
+import org.apache.kylin.engine.mr.HadoopUtil;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.engine.mr.common.CubeStatsReader;
 import org.apache.kylin.job.exception.ExecuteException;
@@ -85,6 +87,16 @@ public class SaveStatisticsStep extends AbstractExecutable {
 
     private void decideCubingAlgorithm(CubeSegment seg, KylinConfig kylinConf) throws IOException {
         String algPref = kylinConf.getCubeAlgorithm();
+
+        CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf);
+        StringWriter sw = new StringWriter();
+        PrintWriter pw = new PrintWriter(sw);
+        cubeStats.print(pw);
+        pw.flush();
+        pw.close();
+        logger.info("Cube Stats Estimation for segment {} :", seg.toString());
+        logger.info(sw.toString());
+
         AlgorithmEnum alg;
         if (AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) {
             alg = AlgorithmEnum.INMEM;
@@ -104,14 +116,13 @@ public class SaveStatisticsStep extends AbstractExecutable {
             } else if ("random".equalsIgnoreCase(algPref)) { // for testing
                 alg = new Random().nextBoolean() ? AlgorithmEnum.INMEM : AlgorithmEnum.LAYER;
             } else { // the default
-                CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf);
                 int mapperNumber = cubeStats.getMapperNumberOfFirstBuild();
                 int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit();
                 double mapperOverlapRatio = cubeStats.getMapperOverlapRatioOfFirstBuild();
                 double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold();
                 logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit);
                 logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is " + overlapThreshold);
- 
+
                 // in-mem cubing is good when
                 // 1) the cluster has enough mapper slots to run in parallel
                 // 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage