You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/04/14 06:07:18 UTC

kylin git commit: KYLIN-1584 Specify region cut size in cubedesc and leave the RealizationCapacity in model as a hint

Repository: kylin
Updated Branches:
  refs/heads/master 444302b2a -> 5dfda003e


KYLIN-1584 Specify region cut size in cubedesc and leave the RealizationCapacity in model as a hint


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5dfda003
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5dfda003
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5dfda003

Branch: refs/heads/master
Commit: 5dfda003e665b3dd3ea65a1fb8fc0cadec77c2d5
Parents: 444302b
Author: Hongbin Ma <ma...@apache.org>
Authored: Thu Apr 14 12:03:19 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Thu Apr 14 12:06:57 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    | 33 +++++--------
 .../org/apache/kylin/cube/model/CubeDesc.java   |  8 +++
 .../storage/hbase/steps/CreateHTableJob.java    |  8 +--
 .../hbase/steps/RangeKeyDistributionJob.java    |  3 +-
 .../steps/RangeKeyDistributionReducer.java      |  4 +-
 .../kylin/storage/hbase/steps/RegionSize.java   | 51 ++++++++++++++++++++
 6 files changed, 76 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 286c20c..712bcf7 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -400,25 +400,6 @@ abstract public class KylinConfigBase implements Serializable {
         return Integer.parseInt(getOptional("kylin.table.snapshot.max_mb", "300"));
     }
 
-    public int getHBaseRegionCut(String capacity) {
-        String cut;
-        switch (capacity) {
-        case "SMALL":
-            cut = getOptional("kylin.hbase.region.cut.small", "10");
-            break;
-        case "MEDIUM":
-            cut = getOptional("kylin.hbase.region.cut.medium", "20");
-            break;
-        case "LARGE":
-            cut = getOptional("kylin.hbase.region.cut.large", "100");
-            break;
-        default:
-            throw new IllegalArgumentException("Capacity not recognized: " + capacity);
-        }
-
-        return Integer.valueOf(cut);
-    }
-
     public int getHBaseRegionCountMin() {
         return Integer.parseInt(getOptional("kylin.hbase.region.count.min", "1"));
     }
@@ -523,10 +504,20 @@ abstract public class KylinConfigBase implements Serializable {
         return Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false"));
     }
 
-    public int getHBaseScanMaxResultSize() {
+    public float getKylinHBaseRegionCutSmall() {
+        return Float.valueOf(getOptional("kylin.hbase.region.cut.small", "10"));
+    }
 
-        return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
+    public float getKylinHBaseRegionCutMedium() {
+        return Float.valueOf(getOptional("kylin.hbase.region.cut.medium", "20"));
+    }
+
+    public float getKylinHBaseRegionCutLarge() {
+        return Float.valueOf(getOptional("kylin.hbase.region.cut.large", "100"));
+    }
 
+    public int getHBaseScanMaxResultSize() {
+        return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
     }
 
     public int getCubingInMemSamplingPercent() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 65ba0a5..20d4277 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -145,6 +145,9 @@ public class CubeDesc extends RootPersistentEntity {
     private long[] autoMergeTimeRanges;
     @JsonProperty("retention_range")
     private long retentionRange = 0;
+    
+    @JsonProperty("region_size")
+    private float regionSize = 0;//https://issues.apache.org/jira/browse/KYLIN-1584
 
     @JsonProperty("engine_type")
     private int engineType = IEngineAware.ID_MR_V1;
@@ -410,6 +413,11 @@ public class CubeDesc extends RootPersistentEntity {
         this.overrideKylinProps = overrideKylinProps;
     }
 
+    public float getRegionSize() {
+        return regionSize;
+    }
+
+
     @Override
     public boolean equals(Object o) {
         if (this == o)

http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
index 7c738e2..ef5cc38 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
@@ -48,8 +48,6 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.CubeStatsReader;
 import org.apache.kylin.engine.mr.common.CuboidShardUtil;
-import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
-import org.apache.kylin.metadata.model.DataModelDesc;
 import org.apache.kylin.metadata.model.SegmentStatusEnum;
 import org.apache.kylin.storage.hbase.HBaseConnection;
 import org.slf4j.Logger;
@@ -159,11 +157,9 @@ public class CreateHTableJob extends AbstractHadoopJob {
     public static byte[][] getSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap, KylinConfig kylinConfig, CubeSegment cubeSegment) throws IOException {
 
         final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
+        float cut = RegionSize.getReionSize(kylinConfig, cubeDesc);
 
-        DataModelDesc.RealizationCapacity cubeCapacity = cubeDesc.getModel().getCapacity();
-        int cut = kylinConfig.getHBaseRegionCut(cubeCapacity.toString());
-
-        logger.info("Cube capacity " + cubeCapacity.toString() + ", chosen cut for HTable is " + cut + "GB");
+        logger.info("chosen cut for HTable is " + cut + "GB");
 
         double totalSizeInM = 0;
         for (Double cuboidSize : cubeSizeMap.values()) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
index 4a6ddad..36a5732 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java
@@ -96,8 +96,7 @@ public class RangeKeyDistributionJob extends AbstractHadoopJob {
             CubeInstance cube = cubeMgr.getCube(cubeName);
             KylinConfig config = cube.getConfig();
             int hfileSizeGB = config.getHBaseHFileSizeGB();
-            DataModelDesc.RealizationCapacity cubeCapacity = cube.getDescriptor().getModel().getCapacity();
-            int regionSplitSize = config.getHBaseRegionCut(cubeCapacity.toString());
+            float regionSplitSize = RegionSize.getReionSize(config,cube.getDescriptor());
             int maxRegionCount = config.getHBaseRegionCountMax();
             int minRegionCount = config.getHBaseRegionCountMin();
             job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
index 4e53ca4..c927ceb 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java
@@ -47,7 +47,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable
 
     private int minRegionCount = 1;
     private int maxRegionCount = 500;
-    private int cut = 10;
+    private float cut = 10.0f;
     private int hfileSizeGB = 1;
     private long bytesRead = 0;
     private List<Text> gbPoints = new ArrayList<Text>();
@@ -98,7 +98,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable
 
     @Override
     protected void cleanup(Context context) throws IOException, InterruptedException {
-        int nRegion = Math.round((float) gbPoints.size() / (float) cut);
+        int nRegion = Math.round((float) gbPoints.size() / cut);
         nRegion = Math.max(minRegionCount, nRegion);
         nRegion = Math.min(maxRegionCount, nRegion);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/5dfda003/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
new file mode 100644
index 0000000..20f3d73
--- /dev/null
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java
@@ -0,0 +1,51 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one
+ *  * or more contributor license agreements.  See the NOTICE file
+ *  * distributed with this work for additional information
+ *  * regarding copyright ownership.  The ASF licenses this file
+ *  * to you under the Apache License, Version 2.0 (the
+ *  * "License"); you may not use this file except in compliance
+ *  * with the License.  You may obtain a copy of the License at
+ *  * 
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  * 
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ * /
+ */
+
+package org.apache.kylin.storage.hbase.steps;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class RegionSize {
+    protected static final Logger logger = LoggerFactory.getLogger(CreateHTableJob.class);
+
+    public static float getReionSize(KylinConfig config, CubeDesc cubeDesc) {
+        if (cubeDesc.getRegionSize() != 0) { 
+            logger.info("Region size specified in Cube desc will be used");
+            return cubeDesc.getRegionSize();
+        } else {
+            logger.info("Region size specified in Model desc will be used");
+            
+            switch (cubeDesc.getModel().getCapacity().toString()) {
+            case "SMALL":
+                return config.getKylinHBaseRegionCutSmall();
+            case "MEDIUM":
+                return config.getKylinHBaseRegionCutMedium();
+            case "LARGE":
+                return config.getKylinHBaseRegionCutLarge();
+            default:
+                throw new IllegalArgumentException("Capacity not recognized: " + cubeDesc.getModel().getCapacity().toString());
+            }
+        }
+    }
+
+}