You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/11/23 05:53:20 UTC
[18/18] kylin git commit: APACHE-KYLIN-2734: regard the imported hot
cuboids as mandatory cuboids for cube planner phase one
APACHE-KYLIN-2734: regard the imported hot cuboids as mandatory cuboids for cube planner phase one
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/950e3dba
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/950e3dba
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/950e3dba
Branch: refs/heads/ci-dong
Commit: 950e3dba0d5fd1a8f4df7f3c8944d7cfb3253b24
Parents: 0aec755
Author: Zhong <nj...@apache.org>
Authored: Fri Sep 1 09:42:17 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Thu Nov 23 13:31:34 2017 +0800
----------------------------------------------------------------------
.../cuboid/algorithm/CuboidRecommender.java | 6 ++++
.../org/apache/kylin/cube/model/CubeDesc.java | 26 +++++++++++-----
.../engine/mr/common/CuboidRecommenderUtil.java | 8 +++--
.../mr/common/StatisticsDecisionUtil.java | 32 +++++++++++++++-----
.../mr/steps/FactDistinctColumnsMapper.java | 9 +++++-
5 files changed, 63 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
index 43b2318..057f7e8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
@@ -75,6 +75,9 @@ public class CuboidRecommender {
return instance;
}
+ /**
+ * Get recommend cuboids with their row count stats with cache
+ */
public Map<Long, Long> getRecommendCuboidList(final CuboidStats cuboidStats, final KylinConfig kylinConfig) {
if (cuboidStats == null) {
return null;
@@ -113,6 +116,9 @@ public class CuboidRecommender {
return results;
}
+ /**
+ * Get recommend cuboids with their row count stats without cache
+ */
public Map<Long, Long> getRecommendCuboidList(CuboidStats cuboidStats, KylinConfig kylinConf,
boolean ifForceRecommend) {
long Threshold1 = 1L << kylinConf.getCubePlannerAgreedyAlgorithmAutoThreshold();
http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 9c0a7cc..79116a8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -189,6 +189,8 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
@JsonInclude(JsonInclude.Include.NON_NULL)
private List<Set<String>> mandatoryDimensionSetList = Collections.emptyList();
+ private Set<Long> mandatoryCuboids = Sets.newHashSet();
+
private LinkedHashSet<TblColRef> allColumns = new LinkedHashSet<>();
private LinkedHashSet<ColumnDesc> allColumnDescs = new LinkedHashSet<>();
private LinkedHashSet<TblColRef> dimensionColumns = new LinkedHashSet<>();
@@ -458,6 +460,10 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
this.mandatoryDimensionSetList = mandatoryDimensionSetList;
}
+ public Set<Long> getMandatoryCuboids() {
+ return mandatoryCuboids;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o)
@@ -651,24 +657,30 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
initDictionaryDesc();
amendAllColumns();
- // check if mandatory dimension set list is valid
- validateMandatoryDimensionSetList();
+ // initialize mandatory cuboids based on mandatoryDimensionSetList
+ initMandatoryCuboids();
}
- public void validateMandatoryDimensionSetList() {
- Set<String> rowKeyColumns = Sets.newHashSet();
+ private void initMandatoryCuboids() {
+ Map<String, RowKeyColDesc> rowKeyColDescMap = Maps.newHashMap();
for (RowKeyColDesc entry : getRowkey().getRowKeyColumns()) {
- rowKeyColumns.add(entry.getColumn());
+ rowKeyColDescMap.put(entry.getColumn(), entry);
}
for (Set<String> mandatoryDimensionSet : this.mandatoryDimensionSetList) {
+ long cuboid = 0L;
for (String columnName : mandatoryDimensionSet) {
- if (!rowKeyColumns.contains(columnName)) {
- logger.info("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
+ TblColRef tblColRef = model.findColumn(columnName);
+ RowKeyColDesc rowKeyColDesc = rowKeyColDescMap.get(tblColRef.getIdentity());
+ // check if mandatory dimension set list is valid
+ if (rowKeyColDesc == null) {
+ logger.warn("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
throw new IllegalStateException(
"Column " + columnName + " in " + mandatoryDimensionSet + " does not exist");
}
+ cuboid |= 1L << rowKeyColDesc.getBitIndex();
}
+ mandatoryCuboids.add(cuboid);
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
index 649eeb6..05458b6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.common;
import java.io.IOException;
import java.util.Map;
+import java.util.Set;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.CubeInstance;
@@ -53,10 +54,13 @@ public class CuboidRecommenderUtil {
return null;
}
+ Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();
+
String key = cube.getName();
CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
- cubeStatsReader.getCuboidSizeMap()).build();
- return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(), false);
+ cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).build();
+ return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
+ !mandatoryCuboids.isEmpty());
}
/** Trigger cube planner phase two for optimization */
http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
index a5a1ba8..4efcb96 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
@@ -88,15 +88,9 @@ public class StatisticsDecisionUtil {
cubingJob.setAlgorithm(alg);
}
+ // For triggering cube planner phase one
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
- CubeInstance cube = segment.getCubeInstance();
-
- if (cube.getConfig().isCubePlannerEnabled() == false)
- return;
-
- List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
- if (readySegments.size() == 0 || (cube.getConfig().isCubePlannerEnabledForExistingCube()
- && readySegments.size() == 1 && (readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+ if (isAbleToOptimizeCubingPlan(segment)) {
logger.info("It's able to trigger cuboid planner algorithm.");
} else {
return;
@@ -107,8 +101,30 @@ public class StatisticsDecisionUtil {
return;
}
+ CubeInstance cube = segment.getCubeInstance();
CubeUpdate cubeBuilder = new CubeUpdate(cube);
cubeBuilder.setCuboids(recommendCuboidsWithStats);
CubeManager.getInstance(cube.getConfig()).updateCube(cubeBuilder);
}
+
+ public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
+ CubeInstance cube = segment.getCubeInstance();
+ if (!cube.getConfig().isCubePlannerEnabled())
+ return false;
+
+ if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
+ logger.info("Has read pending segments and will not enable cube planner.");
+ return false;
+ }
+ List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
+ List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
+ if (newSegments.size() <= 1 && //
+ (readySegments.size() == 0 || //
+ (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
+ && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+ return true;
+ } else {
+ return false;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/950e3dba/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
index e9fd3bd..ace16a5 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
@@ -29,6 +29,7 @@ import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
import org.apache.kylin.measure.BufferedMeasureCodec;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
@@ -37,6 +38,7 @@ import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Sets;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
@@ -86,7 +88,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
- Set<Long> cuboidIdSet = cubeSeg.getCuboidScheduler().getAllCuboidIds();
+ Set<Long> cuboidIdSet = Sets.newHashSet(cubeSeg.getCuboidScheduler().getAllCuboidIds());
+ if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(cubeSeg)) {
+ // For cube planner, for every prebuilt cuboid, its related row count stats should be calculated
+ // If the precondition for trigger cube planner phase one is satisfied, we need to calculate row count stats for mandatory cuboids.
+ cuboidIdSet.addAll(cubeSeg.getCubeDesc().getMandatoryCuboids());
+ }
cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);