You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2016/04/26 12:45:23 UTC

kylin git commit: KYLIN-1623 Make the hll precision for data samping configurable

Repository: kylin
Updated Branches:
  refs/heads/master a22200730 -> e4ff6ef8a


KYLIN-1623 Make the hll precision for data samping configurable


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e4ff6ef8
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e4ff6ef8
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e4ff6ef8

Branch: refs/heads/master
Commit: e4ff6ef8af7893910240f62f4b811c1c8f8bc017
Parents: a222007
Author: shaofengshi <sh...@apache.org>
Authored: Tue Apr 26 18:43:14 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Tue Apr 26 18:43:14 2016 +0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/kylin/common/KylinConfigBase.java  | 4 ++++
 .../src/main/java/org/apache/kylin/cube/util/CubingUtils.java   | 2 +-
 .../java/org/apache/kylin/engine/mr/common/CubeStatsReader.java | 5 ++---
 .../kylin/engine/mr/steps/FactDistinctColumnsReducer.java       | 4 +++-
 .../kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java    | 2 +-
 .../org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java   | 2 +-
 .../main/java/org/apache/kylin/engine/spark/SparkCubing.java    | 2 +-
 7 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 931fbba..0e162bf 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -634,4 +634,8 @@ abstract public class KylinConfigBase implements Serializable {
     public int getDimCountDistinctMaxCardinality() {
         return Integer.parseInt(getOptional("kylin.query.dim.distinct.max", "5000000"));
     }
+
+    public int getCubeStatsHLLPrecision() {
+        return Integer.parseInt(getOptional("kylin.job.cubing.inmem.sampling.hll.precision", "14"));
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index c541326..ac81318 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -102,7 +102,7 @@ public class CubingUtils {
         });
         final Map<Long, HyperLogLogPlusCounter> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
         for (Long cuboidId : allCuboidIds) {
-            result.put(cuboidId, new HyperLogLogPlusCounter(14));
+            result.put(cuboidId, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
             Integer[] cuboidBitSet = new Integer[Long.bitCount(cuboidId)];
 
             long mask = Long.highestOneBit(baseCuboidId);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index bac074b..44d5ce1 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -42,7 +42,6 @@ import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.cube.kv.CubeDimEncMap;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
 import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -68,7 +67,7 @@ import java.util.Map;
  */
 public class CubeStatsReader {
 
-    private static final Logger logger = LoggerFactory.getLogger(InMemCuboidJob.class);
+    private static final Logger logger = LoggerFactory.getLogger(CubeStatsReader.class);
 
     final CubeSegment seg;
     final int samplingPercentage;
@@ -100,7 +99,7 @@ public class CubeStatsReader {
                 } else if (key.get() == -1) {
                     mapperOverlapRatio = Bytes.toDouble(value.getBytes());
                 } else {
-                    HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+                    HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConfig.getCubeStatsHLLPrecision());
                     ByteArray byteArray = new ByteArray(value.getBytes());
                     hll.readRegisters(byteArray.asBuffer());
                     counterMap.put(key.get(), hll);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 126eebd..c07738d 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -61,6 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
     private TblColRef col = null;
     private boolean isStatistics = false;
     private boolean outputTouched = false;
+    private KylinConfig cubeConfig;
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -70,6 +71,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
         KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
         CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
+        cubeConfig = cube.getConfig();
         cubeDesc = cube.getDescriptor();
         columnList =  CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);
 
@@ -106,7 +108,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
             // for hll
             long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
             for (Text value : values) {
-                HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+                HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(cubeConfig.getCubeStatsHLLPrecision());
                 ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
                 hll.readRegisters(bf);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index 8a130a7..2688919 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -73,7 +73,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
 
             allCuboidsHLL = new HyperLogLogPlusCounter[cuboidIds.length];
             for (int i = 0; i < cuboidIds.length; i++) {
-                allCuboidsHLL[i] = new HyperLogLogPlusCounter(14);
+                allCuboidsHLL[i] = new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
             }
 
             hf = Hashing.murmur3_32();

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index 8c37fec..fa6f9e2 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -97,7 +97,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
                             // sampling percentage;
                             averageSamplingPercentage += Bytes.toInt(value.getBytes());
                         } else if (key.get() > 0) {
-                            HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+                            HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConf.getCubeStatsHLLPrecision());
                             ByteArray byteArray = new ByteArray(value.getBytes());
                             hll.readRegisters(byteArray.asBuffer());
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
index ef35067..70c1032 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
@@ -246,7 +246,7 @@ public class SparkCubing extends AbstractApplication {
         List<Long> allCuboidIds = cuboidScheduler.getAllCuboidIds();
         final HashMap<Long, HyperLogLogPlusCounter> zeroValue = Maps.newHashMap();
         for (Long id : allCuboidIds) {
-            zeroValue.put(id, new HyperLogLogPlusCounter(14));
+            zeroValue.put(id, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
         }
 
         CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);