You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2016/11/08 06:19:00 UTC

kylin git commit: KYLIN-2167 FactDistinctColumnsReducer may get wrong max/min partition col value

Repository: kylin
Updated Branches:
  refs/heads/v1.6.0-rc1 cbee43ba3 -> 55bc8fb6e


KYLIN-2167 FactDistinctColumnsReducer may get wrong max/min partition col value


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/55bc8fb6
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/55bc8fb6
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/55bc8fb6

Branch: refs/heads/v1.6.0-rc1
Commit: 55bc8fb6e863760283a335b2b6e969a39f2c9aca
Parents: cbee43b
Author: shaofengshi <sh...@apache.org>
Authored: Tue Nov 8 14:18:49 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Tue Nov 8 14:18:49 2016 +0800

----------------------------------------------------------------------
 .../mr/steps/FactDistinctColumnsReducer.java    |  8 ----
 .../mr/steps/UpdateCubeInfoAfterBuildStep.java  | 47 +++++++++-----------
 2 files changed, 22 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/55bc8fb6/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 2889ba8..97d3829 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -130,14 +130,6 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri
                     cuboidHLLMap.put(cuboidId, hll);
                 }
             }
-        } else if (isPartitionCol == true) {
-            // for partition col min/max value
-            ByteArray value = new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1));
-            if (colValues.size() > 1) {
-                colValues.set(1, value);
-            } else {
-                colValues.add(value);
-            }
         } else {
             colValues.add(new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1)));
             if (colValues.size() == 1000000) { //spill every 1 million

http://git-wip-us.apache.org/repos/asf/kylin/blob/55bc8fb6/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
index 4e1be57..d6302b5 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
@@ -89,27 +89,8 @@ public class UpdateCubeInfoAfterBuildStep extends AbstractExecutable {
         final String outputPath = this.getParams().get(BatchConstants.CFG_OUTPUT_PATH);
         final Path outputFile = new Path(outputPath, partitionCol.getName());
 
-        String minValue = null, maxValue = null, currentValue = null;
-        FSDataInputStream inputStream = null;
-        BufferedReader bufferedReader = null;
-        try {
-            FileSystem fs = HadoopUtil.getFileSystem(outputPath);
-            inputStream = fs.open(outputFile);
-            bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
-            minValue = currentValue = bufferedReader.readLine();
-            while (currentValue != null) {
-                maxValue = currentValue;
-                currentValue = bufferedReader.readLine();
-            }
-        } catch (IOException e) {
-            throw e;
-        } finally {
-            IOUtils.closeQuietly(bufferedReader);
-            IOUtils.closeQuietly(inputStream);
-        }
-
         final DataType partitionColType = partitionCol.getType();
-        FastDateFormat dateFormat;
+        final FastDateFormat dateFormat;
         if (partitionColType.isDate()) {
             dateFormat = DateFormat.getDateFormat(DateFormat.DEFAULT_DATE_PATTERN);
         } else if (partitionColType.isDatetime() || partitionColType.isTimestamp()) {
@@ -124,14 +105,30 @@ public class UpdateCubeInfoAfterBuildStep extends AbstractExecutable {
             throw new IllegalStateException("Type " + partitionColType + " is not valid partition column type");
         }
 
+        long minValue = Long.MAX_VALUE, maxValue = Long.MIN_VALUE;
+        String currentValue;
+        FSDataInputStream inputStream = null;
+        BufferedReader bufferedReader = null;
         try {
-            long startTime = dateFormat.parse(minValue).getTime();
-            long endTime = dateFormat.parse(maxValue).getTime();
-            segment.setDateRangeStart(startTime);
-            segment.setDateRangeEnd(endTime);
+            FileSystem fs = HadoopUtil.getFileSystem(outputPath);
+            inputStream = fs.open(outputFile);
+            bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
+            currentValue = bufferedReader.readLine();
+            while (currentValue != null) {
+                long time = dateFormat.parse(currentValue).getTime();
+                minValue = Math.min(time, minValue);
+                maxValue = Math.max(time, maxValue);
+                currentValue = bufferedReader.readLine();
+            }
         } catch (Exception e) {
-            throw new IllegalStateException(e);
+            throw new IOException(e);
+        } finally {
+            IOUtils.closeQuietly(bufferedReader);
+            IOUtils.closeQuietly(inputStream);
         }
+
+        segment.setDateRangeStart(minValue);
+        segment.setDateRangeEnd(maxValue);
     }
 
 }