You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/11/01 10:28:20 UTC

[kylin] branch master updated: KYLIN-3644 Fix SparkFactDistinct step NumberFormatException

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/master by this push:
     new 6544143  KYLIN-3644 Fix SparkFactDistinct step NumberFormatException
6544143 is described below

commit 65441434e0aff410050009b4538ef3bf9e7af93f
Author: chao long <wa...@qq.com>
AuthorDate: Thu Nov 1 16:43:11 2018 +0800

    KYLIN-3644 Fix SparkFactDistinct step NumberFormatException
---
 .../src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java    | 2 +-
 .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java   | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java
index 0ba4fd8..0636a5c 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java
@@ -56,7 +56,7 @@ public class KeyValueBuilder implements Serializable {
         }
     }
 
-    protected boolean isNull(String v) {
+    public boolean isNull(String v) {
         return nullStrs.contains(v);
     }
 
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
index 5cfd2d7..cdd0ac2 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
@@ -63,6 +63,7 @@ import org.apache.kylin.cube.DimensionRangeInfo;
 import org.apache.kylin.cube.cuboid.CuboidUtil;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.CubeJoinedFlatTableEnrich;
+import org.apache.kylin.cube.util.KeyValueBuilder;
 import org.apache.kylin.dict.DictionaryGenerator;
 import org.apache.kylin.dict.IDictionaryBuilder;
 import org.apache.kylin.engine.EngineFactory;
@@ -250,6 +251,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab
         private Map<Integer, DimensionRangeInfo> dimensionRangeInfoMap;
         private transient ByteBuffer tmpbuf;
         private LongAccumulator bytesWritten;
+        private KeyValueBuilder keyValueBuilder;
 
         public FlatOutputFucntion(String cubeName, String segmentId, String metaurl, SerializableConfiguration conf,
                 int samplingPercent, LongAccumulator bytesWritten) {
@@ -272,6 +274,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab
                 CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(
                         EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
 
+                keyValueBuilder = new KeyValueBuilder(intermediateTableDesc);
                 reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
                 tmpbuf = ByteBuffer.allocate(4096);
 
@@ -317,7 +320,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab
 
                 for (int i = 0; i < allCols.size(); i++) {
                     String fieldValue = row[columnIndex[i]];
-                    if (fieldValue == null)
+                    if (fieldValue == null || keyValueBuilder.isNull(fieldValue))
                         continue;
 
                     final DataType type = allCols.get(i).getType();