You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/12/25 10:46:37 UTC

kylin git commit: minor, set default of kylin.job.cubing.inMem.sampling.percent to 30

Repository: kylin
Updated Branches:
  refs/heads/2.0-rc 45d021b73 -> 07b1c6aaa


minor, set default of kylin.job.cubing.inMem.sampling.percent to 30


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/07b1c6aa
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/07b1c6aa
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/07b1c6aa

Branch: refs/heads/2.0-rc
Commit: 07b1c6aaa24ef93ebf8134383a774ec22ac9500a
Parents: 45d021b
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Dec 25 17:46:30 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Dec 25 17:46:30 2015 +0800

----------------------------------------------------------------------
 build/conf/kylin_job_conf.xml                             |  2 ++
 .../java/org/apache/kylin/common/KylinConfigBase.java     |  2 +-
 .../kylin/engine/mr/steps/FactDistinctColumnsReducer.java |  4 ++--
 .../engine/mr/steps/FactDistinctHiveColumnsMapper.java    | 10 +++-------
 4 files changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/build/conf/kylin_job_conf.xml
----------------------------------------------------------------------
diff --git a/build/conf/kylin_job_conf.xml b/build/conf/kylin_job_conf.xml
index 099605f..aab6e09 100644
--- a/build/conf/kylin_job_conf.xml
+++ b/build/conf/kylin_job_conf.xml
@@ -70,6 +70,7 @@ limitations under the License. See accompanying LICENSE file.
     </property>
 
     <!-- Properties for calculating cube by splits (in-mem), with which each Mapper need more mem to hold a full cube segment -->
+    <!--
     <property>
         <name>mapreduce.map.java.opts</name>
         <value>-Xmx2500m</value>
@@ -88,5 +89,6 @@ limitations under the License. See accompanying LICENSE file.
         <value>3600000</value>
         <description>Set task timeout to 1 hour</description>
     </property>
+    -->
 
 </configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 302a2db..aee1bd8 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -445,7 +445,7 @@ public class KylinConfigBase implements Serializable {
     }
 
     public int getCubingInMemSamplingPercent() {
-        int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "100"));
+        int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "30"));
         percent = Math.max(percent, 1);
         percent = Math.min(percent, 100);
         return percent;

http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 5fa37fa..0416c3a 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -61,7 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
     protected long baseCuboidId;
     protected CubeDesc cubeDesc;
     private long totalRowsBeforeMerge = 0;
-    private int samplingPercentage = 100;
+    private int samplingPercentage;
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -80,7 +80,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
         if (collectStatistics) {
             baseCuboidRowCountInMappers = Lists.newArrayList();
             cuboidHLLMap = Maps.newHashMap();
-            samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100"));
+            samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index e43d5d1..22c4357 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -23,22 +23,18 @@ import java.nio.ByteBuffer;
 import java.util.BitSet;
 import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 
-import com.google.common.collect.Maps;
 import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.cube.kv.RowConstants;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 
 import com.google.common.collect.Lists;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
-import org.apache.kylin.metadata.model.TblColRef;
 
 /**
  * @author yangli9
@@ -53,7 +49,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
     private Long[] cuboidIds;
     private HashFunction hf = null;
     private int rowCount = 0;
-    private int SAMPING_PERCENTAGE = 5;
+    private int samplingPercentage;
     private ByteArray[] row_hashcodes = null;
 
     @Override
@@ -63,7 +59,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
         
         collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
         if (collectStatistics) {
-            SAMPING_PERCENTAGE = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "5"));
+            samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
             cuboidScheduler = new CuboidScheduler(cubeDesc);
             nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
 
@@ -126,7 +122,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
             handleErrorRecord(row, ex);
         }
 
-        if (collectStatistics && rowCount < SAMPING_PERCENTAGE) {
+        if (collectStatistics && rowCount < samplingPercentage) {
             putRowKeyToHLL(row);
         }