You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2016/04/26 12:45:23 UTC
kylin git commit: KYLIN-1623 Make the hll precision for data samping
configurable
Repository: kylin
Updated Branches:
refs/heads/master a22200730 -> e4ff6ef8a
KYLIN-1623 Make the hll precision for data samping configurable
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e4ff6ef8
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e4ff6ef8
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e4ff6ef8
Branch: refs/heads/master
Commit: e4ff6ef8af7893910240f62f4b811c1c8f8bc017
Parents: a222007
Author: shaofengshi <sh...@apache.org>
Authored: Tue Apr 26 18:43:14 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Tue Apr 26 18:43:14 2016 +0800
----------------------------------------------------------------------
.../src/main/java/org/apache/kylin/common/KylinConfigBase.java | 4 ++++
.../src/main/java/org/apache/kylin/cube/util/CubingUtils.java | 2 +-
.../java/org/apache/kylin/engine/mr/common/CubeStatsReader.java | 5 ++---
.../kylin/engine/mr/steps/FactDistinctColumnsReducer.java | 4 +++-
.../kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java | 2 +-
.../org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java | 2 +-
.../main/java/org/apache/kylin/engine/spark/SparkCubing.java | 2 +-
7 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 931fbba..0e162bf 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -634,4 +634,8 @@ abstract public class KylinConfigBase implements Serializable {
public int getDimCountDistinctMaxCardinality() {
return Integer.parseInt(getOptional("kylin.query.dim.distinct.max", "5000000"));
}
+
+ public int getCubeStatsHLLPrecision() {
+ return Integer.parseInt(getOptional("kylin.job.cubing.inmem.sampling.hll.precision", "14"));
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index c541326..ac81318 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -102,7 +102,7 @@ public class CubingUtils {
});
final Map<Long, HyperLogLogPlusCounter> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
for (Long cuboidId : allCuboidIds) {
- result.put(cuboidId, new HyperLogLogPlusCounter(14));
+ result.put(cuboidId, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
Integer[] cuboidBitSet = new Integer[Long.bitCount(cuboidId)];
long mask = Long.highestOneBit(baseCuboidId);
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index bac074b..44d5ce1 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -42,7 +42,6 @@ import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.kv.CubeDimEncMap;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.MeasureDesc;
@@ -68,7 +67,7 @@ import java.util.Map;
*/
public class CubeStatsReader {
- private static final Logger logger = LoggerFactory.getLogger(InMemCuboidJob.class);
+ private static final Logger logger = LoggerFactory.getLogger(CubeStatsReader.class);
final CubeSegment seg;
final int samplingPercentage;
@@ -100,7 +99,7 @@ public class CubeStatsReader {
} else if (key.get() == -1) {
mapperOverlapRatio = Bytes.toDouble(value.getBytes());
} else {
- HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+ HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConfig.getCubeStatsHLLPrecision());
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
counterMap.put(key.get(), hll);
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 126eebd..c07738d 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -61,6 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
private TblColRef col = null;
private boolean isStatistics = false;
private boolean outputTouched = false;
+ private KylinConfig cubeConfig;
@Override
protected void setup(Context context) throws IOException {
@@ -70,6 +71,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
+ cubeConfig = cube.getConfig();
cubeDesc = cube.getDescriptor();
columnList = CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);
@@ -106,7 +108,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
// for hll
long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
for (Text value : values) {
- HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+ HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(cubeConfig.getCubeStatsHLLPrecision());
ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
hll.readRegisters(bf);
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index 8a130a7..2688919 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -73,7 +73,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
allCuboidsHLL = new HyperLogLogPlusCounter[cuboidIds.length];
for (int i = 0; i < cuboidIds.length; i++) {
- allCuboidsHLL[i] = new HyperLogLogPlusCounter(14);
+ allCuboidsHLL[i] = new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
}
hf = Hashing.murmur3_32();
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index 8c37fec..fa6f9e2 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -97,7 +97,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
// sampling percentage;
averageSamplingPercentage += Bytes.toInt(value.getBytes());
} else if (key.get() > 0) {
- HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+ HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConf.getCubeStatsHLLPrecision());
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
http://git-wip-us.apache.org/repos/asf/kylin/blob/e4ff6ef8/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
index ef35067..70c1032 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
@@ -246,7 +246,7 @@ public class SparkCubing extends AbstractApplication {
List<Long> allCuboidIds = cuboidScheduler.getAllCuboidIds();
final HashMap<Long, HyperLogLogPlusCounter> zeroValue = Maps.newHashMap();
for (Long id : allCuboidIds) {
- zeroValue.put(id, new HyperLogLogPlusCounter(14));
+ zeroValue.put(id, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
}
CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);