You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/04/14 06:07:18 UTC
kylin git commit: KYLIN-1584 Specify region cut size in cubedesc and
leave the RealizationCapacity in model as a hint
Repository: kylin
Updated Branches:
refs/heads/master 444302b2a -> 5dfda003e
KYLIN-1584 Specify region cut size in cubedesc and leave the RealizationCapacity in model as a hint
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5dfda003
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5dfda003
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5dfda003
Branch: refs/heads/master
Commit: 5dfda003e665b3dd3ea65a1fb8fc0cadec77c2d5
Parents: 444302b
Author: Hongbin Ma <ma...@apache.org>
Authored: Thu Apr 14 12:03:19 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Thu Apr 14 12:06:57 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 33 +++++--------
.../org/apache/kylin/cube/model/CubeDesc.java | 8 +++
.../storage/hbase/steps/CreateHTableJob.java | 8 +--
.../hbase/steps/RangeKeyDistributionJob.java | 3 +-
.../steps/RangeKeyDistributionReducer.java | 4 +-
.../kylin/storage/hbase/steps/RegionSize.java | 51 ++++++++++++++++++++
6 files changed, 76 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 286c20c..712bcf7 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -400,25 +400,6 @@ abstract public class KylinConfigBase implements Serializable {
return Integer.parseInt(getOptional("kylin.table.snapshot.max_mb", "300"));
}
- public int getHBaseRegionCut(String capacity) {
- String cut;
- switch (capacity) {
- case "SMALL":
- cut = getOptional("kylin.hbase.region.cut.small", "10");
- break;
- case "MEDIUM":
- cut = getOptional("kylin.hbase.region.cut.medium", "20");
- break;
- case "LARGE":
- cut = getOptional("kylin.hbase.region.cut.large", "100");
- break;
- default:
- throw new IllegalArgumentException("Capacity not recognized: " + capacity);
- }
-
- return Integer.valueOf(cut);
- }
-
public int getHBaseRegionCountMin() {
return Integer.parseInt(getOptional("kylin.hbase.region.count.min", "1"));
}
@@ -523,10 +504,20 @@ abstract public class KylinConfigBase implements Serializable {
return Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false"));
}
- public int getHBaseScanMaxResultSize() {
+ public float getKylinHBaseRegionCutSmall() {
+ return Float.valueOf(getOptional("kylin.hbase.region.cut.small", "10"));
+ }
- return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
+ public float getKylinHBaseRegionCutMedium() {
+ return Float.valueOf(getOptional("kylin.hbase.region.cut.medium", "20"));
+ }
+
+ public float getKylinHBaseRegionCutLarge() {
+ return Float.valueOf(getOptional("kylin.hbase.region.cut.large", "100"));
+ }
+ public int getHBaseScanMaxResultSize() {
+ return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
}
public int getCubingInMemSamplingPercent() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 65ba0a5..20d4277 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -145,6 +145,9 @@ public class CubeDesc extends RootPersistentEntity {
private long[] autoMergeTimeRanges;
@JsonProperty("retention_range")
private long retentionRange = 0;
+
+ @JsonProperty("region_size")
+ private float regionSize = 0;//https://issues.apache.org/jira/browse/KYLIN-1584
@JsonProperty("engine_type")
private int engineType = IEngineAware.ID_MR_V1;
@@ -410,6 +413,11 @@ public class CubeDesc extends RootPersistentEntity {
this.overrideKylinProps = overrideKylinProps;
}
+ public float getRegionSize() {
+ return regionSize;
+ }
+
+
@Override
public boolean equals(Object o) {
if (this == o)
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
index 7c738e2..ef5cc38 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
@@ -48,8 +48,6 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.CubeStatsReader;
import org.apache.kylin.engine.mr.common.CuboidShardUtil;
-import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
-import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.storage.hbase.HBaseConnection;
import org.slf4j.Logger;
@@ -159,11 +157,9 @@ public class CreateHTableJob extends AbstractHadoopJob {
public static byte[][] getSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap, KylinConfig kylinConfig, CubeSegment cubeSegment) throws IOException {
final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
+ float cut = RegionSize.getReionSize(kylinConfig, cubeDesc);
- DataModelDesc.RealizationCapacity cubeCapacity = cubeDesc.getModel().getCapacity();
- int cut = kylinConfig.getHBaseRegionCut(cubeCapacity.toString());
-
- logger.info("Cube capacity " + cubeCapacity.toString() + ", chosen cut for HTable is " + cut + "GB");
+ logger.info("chosen cut for HTable is " + cut + "GB");
double totalSizeInM = 0;
for (Double cuboidSize : cubeSizeMap.values()) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
index 4a6ddad..36a5732 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
@@ -96,8 +96,7 @@ public class RangeKeyDistributionJob extends AbstractHadoopJob {
CubeInstance cube = cubeMgr.getCube(cubeName);
KylinConfig config = cube.getConfig();
int hfileSizeGB = config.getHBaseHFileSizeGB();
- DataModelDesc.RealizationCapacity cubeCapacity = cube.getDescriptor().getModel().getCapacity();
- int regionSplitSize = config.getHBaseRegionCut(cubeCapacity.toString());
+ float regionSplitSize = RegionSize.getReionSize(config,cube.getDescriptor());
int maxRegionCount = config.getHBaseRegionCountMax();
int minRegionCount = config.getHBaseRegionCountMin();
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
index 4e53ca4..c927ceb 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
@@ -47,7 +47,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable
private int minRegionCount = 1;
private int maxRegionCount = 500;
- private int cut = 10;
+ private float cut = 10.0f;
private int hfileSizeGB = 1;
private long bytesRead = 0;
private List<Text> gbPoints = new ArrayList<Text>();
@@ -98,7 +98,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
- int nRegion = Math.round((float) gbPoints.size() / (float) cut);
+ int nRegion = Math.round((float) gbPoints.size() / cut);
nRegion = Math.max(minRegionCount, nRegion);
nRegion = Math.min(maxRegionCount, nRegion);
http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
new file mode 100644
index 0000000..20f3d73
--- /dev/null
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
@@ -0,0 +1,51 @@
+/*
+ *
+ * * Licensed to the Apache Software Foundation (ASF) under one
+ * * or more contributor license agreements. See the NOTICE file
+ * * distributed with this work for additional information
+ * * regarding copyright ownership. The ASF licenses this file
+ * * to you under the Apache License, Version 2.0 (the
+ * * "License"); you may not use this file except in compliance
+ * * with the License. You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ * /
+ */
+
+package org.apache.kylin.storage.hbase.steps;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class RegionSize {
+ protected static final Logger logger = LoggerFactory.getLogger(CreateHTableJob.class);
+
+ public static float getReionSize(KylinConfig config, CubeDesc cubeDesc) {
+ if (cubeDesc.getRegionSize() != 0) {
+ logger.info("Region size specified in Cube desc will be used");
+ return cubeDesc.getRegionSize();
+ } else {
+ logger.info("Region size specified in Model desc will be used");
+
+ switch (cubeDesc.getModel().getCapacity().toString()) {
+ case "SMALL":
+ return config.getKylinHBaseRegionCutSmall();
+ case "MEDIUM":
+ return config.getKylinHBaseRegionCutMedium();
+ case "LARGE":
+ return config.getKylinHBaseRegionCutLarge();
+ default:
+ throw new IllegalArgumentException("Capacity not recognized: " + cubeDesc.getModel().getCapacity().toString());
+ }
+ }
+ }
+
+}