You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/12/21 02:58:48 UTC

[03/16] kylin git commit: APACHE-KYLIN-2866: move hll shard base config to BatchConstants

APACHE-KYLIN-2866: move hll shard base config to BatchConstants

Signed-off-by: lidongsjtu <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fba9d1d3
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fba9d1d3
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fba9d1d3

Branch: refs/heads/master
Commit: fba9d1d3dbe7d4dffbf8e0b87bf58b70baa7ffa6
Parents: 5425deb
Author: Zhong <nj...@apache.org>
Authored: Thu Sep 28 17:49:42 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Wed Dec 20 23:20:11 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/engine/mr/common/BatchConstants.java |  2 ++
 .../apache/kylin/engine/mr/common/MapReduceUtil.java  | 14 ++++++++++++++
 .../mr/steps/FactDistinctColumnPartitioner.java       | 11 ++++-------
 .../kylin/engine/mr/steps/FactDistinctColumnsJob.java | 14 +++-----------
 .../engine/mr/steps/FactDistinctColumnsReducer.java   |  8 ++++----
 5 files changed, 27 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
index 129c6dd..50c589a 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
@@ -70,6 +70,8 @@ public interface BatchConstants {
     String CFG_SPARK_META_URL = "spark.meta.url";
     String CFG_GLOBAL_DICT_BASE_DIR = "global.dict.base.dir";
 
+    String CFG_HLL_SHARD_BASE = "mapreduce.partition.hll.shard.base";
+
     /**
      * command line ARGuments
      */

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
index 0379f64..b249f12 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
@@ -35,6 +35,20 @@ public class MapReduceUtil {
     private static final Logger logger = LoggerFactory.getLogger(MapReduceUtil.class);
 
     /**
+     * @return reducer number for calculating hll
+     */
+    public static int getHLLShardBase(CubeSegment segment) {
+        int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
+        int shardBase = (nCuboids - 1) / segment.getConfig().getHadoopJobPerReducerHLLCuboidNumber() + 1;
+
+        int hllMaxReducerNumber = segment.getConfig().getHadoopJobHLLMaxReducerNumber();
+        if (shardBase > hllMaxReducerNumber) {
+            shardBase = hllMaxReducerNumber;
+        }
+        return shardBase;
+    }
+
+    /**
      * @param cuboidScheduler specified can provide more flexibility
      * */
     public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
index 7ac5d02..141ca99 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -32,12 +33,6 @@ import org.slf4j.LoggerFactory;
 public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortableKey, Text> implements Configurable {
     private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnPartitioner.class);
 
-    public static final String HLL_SHARD_BASE_PROPERTY_NAME = "mapreduce.partition.factdistinctcolumnpartitioner.hll.shard.base";
-
-    public static void setHLLShard(Configuration conf, int hllShardBase) {
-        conf.setInt(HLL_SHARD_BASE_PROPERTY_NAME, hllShardBase);
-    }
-
     private Configuration conf;
     private int hllShardBase = 1;
 
@@ -60,12 +55,14 @@ public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortabl
         }
     }
 
+    @Override
     public void setConf(Configuration conf) {
         this.conf = conf;
-        hllShardBase = conf.getInt(HLL_SHARD_BASE_PROPERTY_NAME, 1);
+        hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 1);
         logger.info("shard base for hll is " + hllShardBase);
     }
 
+    @Override
     public Configuration getConf() {
         return conf;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
index dee384f..5200950 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
@@ -42,6 +42,7 @@ import org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat;
 import org.apache.kylin.engine.mr.MRUtil;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.MapReduceUtil;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -136,15 +137,6 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
 
     }
 
-    private int getHLLShardBase(CubeSegment segment) {
-        int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
-        int shardBase = (nCuboids - 1) / segment.getConfig().getFactDistinctJobPerReducerHLLCuboidNumber() + 1;
-        if (shardBase > segment.getConfig().getFactDistinctJobHLLMaxReducerNumber()) {
-            shardBase = segment.getConfig().getFactDistinctJobHLLMaxReducerNumber();
-        }
-        return shardBase;
-    }
-
     private void setupMapper(CubeSegment cubeSeg) throws IOException {
         IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
         flatTableInputFormat.configureJob(job);
@@ -159,8 +151,8 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
             throws IOException {
         int numberOfReducers = reducerCount;
         if ("true".equalsIgnoreCase(statistics_enabled)) {
-            int hllShardBase = getHLLShardBase(cubeSeg);
-            FactDistinctColumnPartitioner.setHLLShard(job.getConfiguration(), hllShardBase);
+            int hllShardBase = MapReduceUtil.getHLLShardBase(cubeSeg);
+            job.getConfiguration().setInt(BatchConstants.CFG_HLL_SHARD_BASE, hllShardBase);
             numberOfReducers += (1 + hllShardBase);
         }
         job.setReducerClass(FactDistinctColumnsReducer.class);

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index a733430..37972c0 100755
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -109,11 +109,11 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
 
         boolean ifCol = true;
         if (collectStatistics) {
-            int hllShardBase = conf.getInt(FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME, 0);
+            int hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 0);
             if (hllShardBase <= 0) {
-                throw new IllegalArgumentException("In job configuration the value for property "
-                        + FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME + " is " + hllShardBase
-                        + ". It should be set correctly!!!");
+                throw new IllegalArgumentException(
+                        "In job configuration the value for property " + BatchConstants.CFG_HLL_SHARD_BASE
+                        + " is " + hllShardBase + ". It should be set correctly!!!");
             }
             ifCol = false;
             if (taskId >= numberOfTasks - hllShardBase) {