You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/12/25 10:46:37 UTC
kylin git commit: minor,
set default of kylin.job.cubing.inMem.sampling.percent to 30
Repository: kylin
Updated Branches:
refs/heads/2.0-rc 45d021b73 -> 07b1c6aaa
minor, set default of kylin.job.cubing.inMem.sampling.percent to 30
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/07b1c6aa
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/07b1c6aa
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/07b1c6aa
Branch: refs/heads/2.0-rc
Commit: 07b1c6aaa24ef93ebf8134383a774ec22ac9500a
Parents: 45d021b
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Dec 25 17:46:30 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Dec 25 17:46:30 2015 +0800
----------------------------------------------------------------------
build/conf/kylin_job_conf.xml | 2 ++
.../java/org/apache/kylin/common/KylinConfigBase.java | 2 +-
.../kylin/engine/mr/steps/FactDistinctColumnsReducer.java | 4 ++--
.../engine/mr/steps/FactDistinctHiveColumnsMapper.java | 10 +++-------
4 files changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/build/conf/kylin_job_conf.xml
----------------------------------------------------------------------
diff --git a/build/conf/kylin_job_conf.xml b/build/conf/kylin_job_conf.xml
index 099605f..aab6e09 100644
--- a/build/conf/kylin_job_conf.xml
+++ b/build/conf/kylin_job_conf.xml
@@ -70,6 +70,7 @@ limitations under the License. See accompanying LICENSE file.
</property>
<!-- Properties for calculating cube by splits (in-mem), with which each Mapper need more mem to hold a full cube segment -->
+ <!--
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx2500m</value>
@@ -88,5 +89,6 @@ limitations under the License. See accompanying LICENSE file.
<value>3600000</value>
<description>Set task timeout to 1 hour</description>
</property>
+ -->
</configuration>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 302a2db..aee1bd8 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -445,7 +445,7 @@ public class KylinConfigBase implements Serializable {
}
public int getCubingInMemSamplingPercent() {
- int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "100"));
+ int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "30"));
percent = Math.max(percent, 1);
percent = Math.min(percent, 100);
return percent;
http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 5fa37fa..0416c3a 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -61,7 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
protected long baseCuboidId;
protected CubeDesc cubeDesc;
private long totalRowsBeforeMerge = 0;
- private int samplingPercentage = 100;
+ private int samplingPercentage;
@Override
protected void setup(Context context) throws IOException {
@@ -80,7 +80,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
if (collectStatistics) {
baseCuboidRowCountInMappers = Lists.newArrayList();
cuboidHLLMap = Maps.newHashMap();
- samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100"));
+ samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index e43d5d1..22c4357 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -23,22 +23,18 @@ import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.Collection;
import java.util.List;
-import java.util.Map;
-import com.google.common.collect.Maps;
import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.kv.RowConstants;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
import org.apache.kylin.engine.mr.common.BatchConstants;
import com.google.common.collect.Lists;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
-import org.apache.kylin.metadata.model.TblColRef;
/**
* @author yangli9
@@ -53,7 +49,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
private Long[] cuboidIds;
private HashFunction hf = null;
private int rowCount = 0;
- private int SAMPING_PERCENTAGE = 5;
+ private int samplingPercentage;
private ByteArray[] row_hashcodes = null;
@Override
@@ -63,7 +59,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
if (collectStatistics) {
- SAMPING_PERCENTAGE = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "5"));
+ samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
cuboidScheduler = new CuboidScheduler(cubeDesc);
nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
@@ -126,7 +122,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
handleErrorRecord(row, ex);
}
- if (collectStatistics && rowCount < SAMPING_PERCENTAGE) {
+ if (collectStatistics && rowCount < samplingPercentage) {
putRowKeyToHLL(row);
}