You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/12/21 02:58:48 UTC
[03/16] kylin git commit: APACHE-KYLIN-2866: move hll shard base
config to BatchConstants
APACHE-KYLIN-2866: move hll shard base config to BatchConstants
Signed-off-by: lidongsjtu <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fba9d1d3
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fba9d1d3
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fba9d1d3
Branch: refs/heads/master
Commit: fba9d1d3dbe7d4dffbf8e0b87bf58b70baa7ffa6
Parents: 5425deb
Author: Zhong <nj...@apache.org>
Authored: Thu Sep 28 17:49:42 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Wed Dec 20 23:20:11 2017 +0800
----------------------------------------------------------------------
.../apache/kylin/engine/mr/common/BatchConstants.java | 2 ++
.../apache/kylin/engine/mr/common/MapReduceUtil.java | 14 ++++++++++++++
.../mr/steps/FactDistinctColumnPartitioner.java | 11 ++++-------
.../kylin/engine/mr/steps/FactDistinctColumnsJob.java | 14 +++-----------
.../engine/mr/steps/FactDistinctColumnsReducer.java | 8 ++++----
5 files changed, 27 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
index 129c6dd..50c589a 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
@@ -70,6 +70,8 @@ public interface BatchConstants {
String CFG_SPARK_META_URL = "spark.meta.url";
String CFG_GLOBAL_DICT_BASE_DIR = "global.dict.base.dir";
+ String CFG_HLL_SHARD_BASE = "mapreduce.partition.hll.shard.base";
+
/**
* command line ARGuments
*/
http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
index 0379f64..b249f12 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
@@ -35,6 +35,20 @@ public class MapReduceUtil {
private static final Logger logger = LoggerFactory.getLogger(MapReduceUtil.class);
/**
+ * @return reducer number for calculating hll
+ */
+ public static int getHLLShardBase(CubeSegment segment) {
+ int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
+ int shardBase = (nCuboids - 1) / segment.getConfig().getHadoopJobPerReducerHLLCuboidNumber() + 1;
+
+ int hllMaxReducerNumber = segment.getConfig().getHadoopJobHLLMaxReducerNumber();
+ if (shardBase > hllMaxReducerNumber) {
+ shardBase = hllMaxReducerNumber;
+ }
+ return shardBase;
+ }
+
+ /**
* @param cuboidScheduler specified can provide more flexibility
* */
public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
index 7ac5d02..141ca99 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.engine.mr.common.BatchConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -32,12 +33,6 @@ import org.slf4j.LoggerFactory;
public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortableKey, Text> implements Configurable {
private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnPartitioner.class);
- public static final String HLL_SHARD_BASE_PROPERTY_NAME = "mapreduce.partition.factdistinctcolumnpartitioner.hll.shard.base";
-
- public static void setHLLShard(Configuration conf, int hllShardBase) {
- conf.setInt(HLL_SHARD_BASE_PROPERTY_NAME, hllShardBase);
- }
-
private Configuration conf;
private int hllShardBase = 1;
@@ -60,12 +55,14 @@ public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortabl
}
}
+ @Override
public void setConf(Configuration conf) {
this.conf = conf;
- hllShardBase = conf.getInt(HLL_SHARD_BASE_PROPERTY_NAME, 1);
+ hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 1);
logger.info("shard base for hll is " + hllShardBase);
}
+ @Override
public Configuration getConf() {
return conf;
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
index dee384f..5200950 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
@@ -42,6 +42,7 @@ import org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat;
import org.apache.kylin.engine.mr.MRUtil;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.MapReduceUtil;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -136,15 +137,6 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
}
- private int getHLLShardBase(CubeSegment segment) {
- int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
- int shardBase = (nCuboids - 1) / segment.getConfig().getFactDistinctJobPerReducerHLLCuboidNumber() + 1;
- if (shardBase > segment.getConfig().getFactDistinctJobHLLMaxReducerNumber()) {
- shardBase = segment.getConfig().getFactDistinctJobHLLMaxReducerNumber();
- }
- return shardBase;
- }
-
private void setupMapper(CubeSegment cubeSeg) throws IOException {
IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
flatTableInputFormat.configureJob(job);
@@ -159,8 +151,8 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
throws IOException {
int numberOfReducers = reducerCount;
if ("true".equalsIgnoreCase(statistics_enabled)) {
- int hllShardBase = getHLLShardBase(cubeSeg);
- FactDistinctColumnPartitioner.setHLLShard(job.getConfiguration(), hllShardBase);
+ int hllShardBase = MapReduceUtil.getHLLShardBase(cubeSeg);
+ job.getConfiguration().setInt(BatchConstants.CFG_HLL_SHARD_BASE, hllShardBase);
numberOfReducers += (1 + hllShardBase);
}
job.setReducerClass(FactDistinctColumnsReducer.class);
http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index a733430..37972c0 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -109,11 +109,11 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
boolean ifCol = true;
if (collectStatistics) {
- int hllShardBase = conf.getInt(FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME, 0);
+ int hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 0);
if (hllShardBase <= 0) {
- throw new IllegalArgumentException("In job configuration the value for property "
- + FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME + " is " + hllShardBase
- + ". It should be set correctly!!!");
+ throw new IllegalArgumentException(
+ "In job configuration the value for property " + BatchConstants.CFG_HLL_SHARD_BASE
+ + " is " + hllShardBase + ". It should be set correctly!!!");
}
ifCol = false;
if (taskId >= numberOfTasks - hllShardBase) {