You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/11/23 05:53:20 UTC

[18/18] kylin git commit: APACHE-KYLIN-2734: regard the imported hot cuboids as mandatory cuboids for cube planner phase one

APACHE-KYLIN-2734: regard the imported hot cuboids as mandatory cuboids for cube planner phase one


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/950e3dba
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/950e3dba
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/950e3dba

Branch: refs/heads/ci-dong
Commit: 950e3dba0d5fd1a8f4df7f3c8944d7cfb3253b24
Parents: 0aec755
Author: Zhong <nj...@apache.org>
Authored: Fri Sep 1 09:42:17 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Thu Nov 23 13:31:34 2017 +0800

----------------------------------------------------------------------
 .../cuboid/algorithm/CuboidRecommender.java     |  6 ++++
 .../org/apache/kylin/cube/model/CubeDesc.java   | 26 +++++++++++-----
 .../engine/mr/common/CuboidRecommenderUtil.java |  8 +++--
 .../mr/common/StatisticsDecisionUtil.java       | 32 +++++++++++++++-----
 .../mr/steps/FactDistinctColumnsMapper.java     |  9 +++++-
 5 files changed, 63 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
index 43b2318..057f7e8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
@@ -75,6 +75,9 @@ public class CuboidRecommender {
         return instance;
     }
 
+    /**
+     * Get recommend cuboids with their row count stats with cache
+     */
     public Map<Long, Long> getRecommendCuboidList(final CuboidStats cuboidStats, final KylinConfig kylinConfig) {
         if (cuboidStats == null) {
             return null;
@@ -113,6 +116,9 @@ public class CuboidRecommender {
         return results;
     }
 
+    /**
+     * Get recommend cuboids with their row count stats without cache
+     */
     public Map<Long, Long> getRecommendCuboidList(CuboidStats cuboidStats, KylinConfig kylinConf,
             boolean ifForceRecommend) {
         long Threshold1 = 1L << kylinConf.getCubePlannerAgreedyAlgorithmAutoThreshold();

http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 9c0a7cc..79116a8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -189,6 +189,8 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     @JsonInclude(JsonInclude.Include.NON_NULL)
     private List<Set<String>> mandatoryDimensionSetList = Collections.emptyList();
 
+    private Set<Long> mandatoryCuboids = Sets.newHashSet();
+
     private LinkedHashSet<TblColRef> allColumns = new LinkedHashSet<>();
     private LinkedHashSet<ColumnDesc> allColumnDescs = new LinkedHashSet<>();
     private LinkedHashSet<TblColRef> dimensionColumns = new LinkedHashSet<>();
@@ -458,6 +460,10 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         this.mandatoryDimensionSetList = mandatoryDimensionSetList;
     }
 
+    public Set<Long> getMandatoryCuboids() {
+        return mandatoryCuboids;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o)
@@ -651,24 +657,30 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         initDictionaryDesc();
         amendAllColumns();
 
-        // check if mandatory dimension set list is valid
-        validateMandatoryDimensionSetList();
+        // initialize mandatory cuboids based on mandatoryDimensionSetList
+        initMandatoryCuboids();
     }
 
-    public void validateMandatoryDimensionSetList() {
-        Set<String> rowKeyColumns = Sets.newHashSet();
+    private void initMandatoryCuboids() {
+        Map<String, RowKeyColDesc> rowKeyColDescMap = Maps.newHashMap();
         for (RowKeyColDesc entry : getRowkey().getRowKeyColumns()) {
-            rowKeyColumns.add(entry.getColumn());
+            rowKeyColDescMap.put(entry.getColumn(), entry);
         }
 
         for (Set<String> mandatoryDimensionSet : this.mandatoryDimensionSetList) {
+            long cuboid = 0L;
             for (String columnName : mandatoryDimensionSet) {
-                if (!rowKeyColumns.contains(columnName)) {
-                    logger.info("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
+                TblColRef tblColRef = model.findColumn(columnName);
+                RowKeyColDesc rowKeyColDesc = rowKeyColDescMap.get(tblColRef.getIdentity());
+                // check if mandatory dimension set list is valid
+                if (rowKeyColDesc == null) {
+                    logger.warn("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
                     throw new IllegalStateException(
                             "Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
                 }
+                cuboid |= 1L << rowKeyColDesc.getBitIndex();
             }
+            mandatoryCuboids.add(cuboid);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
index 649eeb6..05458b6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.common;
 
 import java.io.IOException;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.CubeInstance;
@@ -53,10 +54,13 @@ public class CuboidRecommenderUtil {
             return null;
         }
 
+        Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();
+
         String key = cube.getName();
         CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
-                cubeStatsReader.getCuboidSizeMap()).build();
-        return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(), false);
+                cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).build();
+        return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
+                !mandatoryCuboids.isEmpty());
     }
 
     /** Trigger cube planner phase two for optimization */

http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
index a5a1ba8..4efcb96 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
@@ -88,15 +88,9 @@ public class StatisticsDecisionUtil {
         cubingJob.setAlgorithm(alg);
     }
 
+    // For triggering cube planner phase one
     public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
-        CubeInstance cube = segment.getCubeInstance();
-        
-        if (cube.getConfig().isCubePlannerEnabled() == false)
-            return;
-        
-        List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
-        if (readySegments.size() == 0 || (cube.getConfig().isCubePlannerEnabledForExistingCube()
-                && readySegments.size() == 1 && (readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+        if (isAbleToOptimizeCubingPlan(segment)) {
             logger.info("It's able to trigger cuboid planner algorithm.");
         } else {
             return;
@@ -107,8 +101,30 @@ public class StatisticsDecisionUtil {
             return;
         }
 
+        CubeInstance cube = segment.getCubeInstance();
         CubeUpdate cubeBuilder = new CubeUpdate(cube);
         cubeBuilder.setCuboids(recommendCuboidsWithStats);
         CubeManager.getInstance(cube.getConfig()).updateCube(cubeBuilder);
     }
+
+    public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
+        CubeInstance cube = segment.getCubeInstance();
+        if (!cube.getConfig().isCubePlannerEnabled())
+            return false;
+
+        if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
+            logger.info("Has read pending segments and will not enable cube planner.");
+            return false;
+        }
+        List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
+        List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
+        if (newSegments.size() <= 1 && //
+                (readySegments.size() == 0 || //
+                        (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
+                                && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+            return true;
+        } else {
+            return false;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
index e9fd3bd..ace16a5 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
@@ -29,6 +29,7 @@ import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.StringUtil;
 import org.apache.kylin.cube.cuboid.CuboidUtil;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.measure.hllc.RegisterType;
@@ -37,6 +38,7 @@ import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Sets;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
@@ -86,7 +88,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
             samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
             nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
 
-            Set<Long> cuboidIdSet = cubeSeg.getCuboidScheduler().getAllCuboidIds();
+            Set<Long> cuboidIdSet = Sets.newHashSet(cubeSeg.getCuboidScheduler().getAllCuboidIds());
+            if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(cubeSeg)) {
+                // For cube planner, for every prebuilt cuboid, its related row count stats should be calculated
+                // If the precondition for trigger cube planner phase one is satisfied, we need to calculate row count stats for mandatory cuboids.
+                cuboidIdSet.addAll(cubeSeg.getCubeDesc().getMandatoryCuboids());
+            }
             cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
             allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);