You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/12/21 02:58:47 UTC

[02/16] kylin git commit: KYLIN-2866 minor refine

KYLIN-2866 minor refine


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7a32458
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7a32458
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7a32458

Branch: refs/heads/master
Commit: e7a32458c9f8b82345c029200db2658298a12815
Parents: ebafc76
Author: lidongsjtu <li...@apache.org>
Authored: Sun Dec 17 16:59:12 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Wed Dec 20 23:20:11 2017 +0800

----------------------------------------------------------------------
 .../main/java/org/apache/kylin/common/KylinConfigBase.java  | 9 +++++----
 .../main/java/org/apache/kylin/common/util/HadoopUtil.java  | 4 ++--
 .../kylin/engine/mr/steps/MergeStatisticsWithOldStep.java   | 2 +-
 .../apache/kylin/engine/mr/steps/SaveStatisticsStep.java    | 4 ++--
 4 files changed, 10 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a32458/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 23a2120..1b3aa03 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -1040,12 +1040,13 @@ abstract public class KylinConfigBase implements Serializable {
         return Integer.parseInt(getOptional("kylin.engine.mr.cuboid-number-per-stats-calculator", "100"));
     }
 
-    public int getFactDistinctJobPerReducerHLLCuboidNumber() {
-        return Integer.parseInt(getOptional("kylin.engine.mr.fact-distinct-per-reducer-hll-cuboid-number", "100"));
+    public int getHadoopJobPerReducerHLLCuboidNumber() {
+        return Integer.parseInt(getOptional("kylin.engine.mr.per-reducer-hll-cuboid-number", "100"));
     }
 
-    public int getFactDistinctJobHLLMaxReducerNumber() {
-        return Integer.parseInt(getOptional("kylin.engine.mr.fact-distinct-hll-max-reducer-number", "50"));
+    public int getHadoopJobHLLMaxReducerNumber() {
+        // by default multi-reducer hll calculation is disabled
+        return Integer.parseInt(getOptional("kylin.engine.mr.hll-max-reducer-number", "1"));
     }
 
     //UHC: ultra high cardinality columns, contain the ShardByColumns and the GlobalDictionaryColumns

http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a32458/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
index cafcaf2..64b9f46 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
@@ -161,7 +161,7 @@ public class HadoopUtil {
         }
     }
 
-    public static Path[] getFilterPath(FileSystem fs, Path baseDir, final String filter) throws IOException {
+    public static Path[] getFilteredPath(FileSystem fs, Path baseDir, final String prefix) throws IOException {
         if (fs.exists(baseDir) == false) {
             return null;
         }
@@ -169,7 +169,7 @@ public class HadoopUtil {
         FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() {
             @Override
             public boolean accept(Path path) {
-                return path.getName().startsWith(filter);
+                return path.getName().startsWith(prefix);
             }
         });
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a32458/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java
index 7855c06..eca0499 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java
@@ -85,7 +85,7 @@ public class MergeStatisticsWithOldStep extends AbstractExecutable {
                 throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
             }
 
-            Path[] statisticsFiles = HadoopUtil.getFilterPath(hdfs, statisticsDirPath,
+            Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                     BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
             if (statisticsFiles == null) {
                 throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a32458/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
index 99ebbef..cbf705c 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
@@ -70,7 +70,7 @@ public class SaveStatisticsStep extends AbstractExecutable {
             FileSystem fs = HadoopUtil.getWorkingFileSystem();
             Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
             Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
-            Path[] statisticsFiles = HadoopUtil.getFilterPath(fs, statisticsDir, BatchConstants.CFG_OUTPUT_STATISTICS);
+            Path[] statisticsFiles = HadoopUtil.getFilteredPath(fs, statisticsDir, BatchConstants.CFG_OUTPUT_STATISTICS);
             if (statisticsFiles == null) {
                 throw new IOException("fail to find the statistics file in base dir: " + statisticsDir);
             }
@@ -146,7 +146,7 @@ public class SaveStatisticsStep extends AbstractExecutable {
     private void logMapperAndCuboidStatistics(Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage,
             int mapperNumber, long grantTotal, long totalRowsBeforeMerge) throws IOException {
         logger.debug("Total cuboid number: \t" + cuboidHLLMap.size());
-        logger.debug("Samping percentage: \t" + samplingPercentage);
+        logger.debug("Sampling percentage: \t" + samplingPercentage);
         logger.debug("The following statistics are collected based on sampling data.");
         logger.debug("Number of Mappers: " + mapperNumber);