You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2015/05/25 11:14:03 UTC
incubator-kylin git commit: KYLIN-750 make
CreateHTableJob.getSplitsFromCuboidStatistics() as public for streaming
cubing to reuse.
Repository: incubator-kylin
Updated Branches:
refs/heads/0.8.0 cd7b91869 -> 9c0b3a954
KYLIN-750 make CreateHTableJob.getSplitsFromCuboidStatistics() as public for streaming cubing to reuse.
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9c0b3a95
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9c0b3a95
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9c0b3a95
Branch: refs/heads/0.8.0
Commit: 9c0b3a954e69d607be804890c83ca29f99304729
Parents: cd7b918
Author: shaofengshi <sh...@apache.org>
Authored: Mon May 25 17:13:34 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon May 25 17:13:34 2015 +0800
----------------------------------------------------------------------
.../kylin/job/hadoop/hbase/CreateHTableJob.java | 34 +++++++++++---------
1 file changed, 19 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9c0b3a95/job/src/main/java/org/apache/kylin/job/hadoop/hbase/CreateHTableJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/hbase/CreateHTableJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/hbase/CreateHTableJob.java
index 8f802b7..e2d97e8 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/hbase/CreateHTableJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/hbase/CreateHTableJob.java
@@ -102,6 +102,7 @@ public class CreateHTableJob extends AbstractHadoopJob {
cube = cubeMgr.getCube(cubeName);
cubeDesc = cube.getDescriptor();
segmentName = getOptionValue(OPTION_SEGMENT_NAME);
+ CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase();
HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
@@ -137,7 +138,18 @@ public class CreateHTableJob extends AbstractHadoopJob {
byte[][] splitKeys;
if (statistics_enabled) {
- splitKeys = getSplitsFromCuboidStatistics(conf);
+
+ List<Integer> rowkeyColumnSize = Lists.newArrayList();
+ long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+ Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
+ List<TblColRef> columnList = baseCuboid.getColumns();
+
+ for (int i = 0; i < columnList.size(); i++) {
+ logger.info("Rowkey column " + i + " length " + cubeSegment.getColumnLength(columnList.get(i)));
+ rowkeyColumnSize.add(cubeSegment.getColumnLength(columnList.get(i)));
+ }
+
+ splitKeys = getSplitsFromCuboidStatistics(conf, kylinConfig, rowkeyColumnSize, cubeSegment);
} else {
splitKeys = getSplits(conf, partitionFilePath);
}
@@ -200,19 +212,9 @@ public class CreateHTableJob extends AbstractHadoopJob {
@SuppressWarnings("deprecation")
- protected byte[][] getSplitsFromCuboidStatistics(Configuration conf) throws IOException {
-
- List<Integer> rowkeyColumnSize = Lists.newArrayList();
- CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
- long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
- Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
- List<TblColRef> columnList = baseCuboid.getColumns();
-
- for (int i = 0; i < columnList.size(); i++) {
- logger.info("Rowkey column " + i + " length " + cubeSegment.getColumnLength(columnList.get(i)));
- rowkeyColumnSize.add(cubeSegment.getColumnLength(columnList.get(i)));
- }
+ public static byte[][] getSplitsFromCuboidStatistics(Configuration conf, KylinConfig kylinConfig, List<Integer> rowkeyColumnSize, CubeSegment cubeSegment) throws IOException {
+ CubeDesc cubeDesc = cubeSegment.getCubeDesc();
DataModelDesc.RealizationCapacity cubeCapacity = cubeDesc.getModel().getCapacity();
int cut = kylinConfig.getHBaseRegionCut(cubeCapacity.toString());
@@ -265,8 +267,9 @@ public class CreateHTableJob extends AbstractHadoopJob {
allCuboids.addAll(cuboidSizeMap.keySet());
Collections.sort(allCuboids);
+ long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
for (long cuboidId : allCuboids) {
- long cuboidSize = estimateCuboidStorageSize(cuboidId, cuboidSizeMap.get(cuboidId), baseCuboidId, rowkeyColumnSize);
+ long cuboidSize = estimateCuboidStorageSize(cubeDesc, cuboidId, cuboidSizeMap.get(cuboidId), baseCuboidId, rowkeyColumnSize);
cuboidSizeMap.put(cuboidId, cuboidSize);
totalSizeInM += cuboidSize;
}
@@ -314,11 +317,12 @@ public class CreateHTableJob extends AbstractHadoopJob {
/**
* Estimate the cuboid's size
*
+ * @param cubeDesc
* @param cuboidId
* @param rowCount
* @return the cuboid size in M bytes
*/
- private long estimateCuboidStorageSize(long cuboidId, long rowCount, long baseCuboidId, List<Integer> rowKeyColumnLength) {
+ private static long estimateCuboidStorageSize(CubeDesc cubeDesc, long cuboidId, long rowCount, long baseCuboidId, List<Integer> rowKeyColumnLength) {
int bytesLength = RowConstants.ROWKEY_CUBOIDID_LEN;