You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2018/06/30 12:22:52 UTC

[kylin] 06/12: KYLIN-3370 refactor the check of Hive \N

This is an automated email from the ASF dual-hosted git repository.

liyang pushed a commit to branch sync
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 6cb90482b2909223046aa2f90be402fd4ca0ea22
Author: Li Yang <li...@apache.org>
AuthorDate: Wed May 23 22:36:26 2018 +0800

    KYLIN-3370 refactor the check of Hive \N
---
 .../cube/inmemcubing/InputConverterUnitForRawData.java      |  2 --
 .../apache/kylin/cube/model/CubeJoinedFlatTableDesc.java    | 13 +++----------
 .../apache/kylin/engine/mr/common/BaseCuboidBuilder.java    |  3 +--
 .../apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java  |  9 ++++-----
 .../main/java/org/apache/kylin/source/hive/HiveMRInput.java |  1 -
 .../java/org/apache/kylin/source/hive/HiveTableReader.java  |  2 +-
 6 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InputConverterUnitForRawData.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InputConverterUnitForRawData.java
index fc34f37..2ff7ee0 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InputConverterUnitForRawData.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InputConverterUnitForRawData.java
@@ -43,7 +43,6 @@ public class InputConverterUnitForRawData implements InputConverterUnit<String[]
     @SuppressWarnings("unused")
     private static final Logger logger = LoggerFactory.getLogger(InputConverterUnitForRawData.class);
     
-    public static final byte[] HIVE_NULL = Bytes.toBytes("\\N");
     public static final String[] END_ROW = new String[0];
     public static final String[] CUT_ROW = { "" };
 
@@ -149,7 +148,6 @@ public class InputConverterUnitForRawData implements InputConverterUnit<String[]
 
     private void initNullBytes(CubeDesc cubeDesc) {
         nullBytes = Lists.newArrayList();
-        nullBytes.add(HIVE_NULL);
         String[] nullStrings = cubeDesc.getNullStrings();
         if (nullStrings != null) {
             for (String s : nullStrings) {
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java
index 2ab7aac..467a294 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java
@@ -22,7 +22,6 @@ import java.io.Serializable;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.kylin.common.util.BytesSplitter;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.metadata.MetadataConstants;
 import org.apache.kylin.metadata.model.DataModelDesc;
@@ -147,16 +146,10 @@ public class CubeJoinedFlatTableDesc implements IJoinedFlatTableDesc, Serializab
         return factColumns;
     }
 
-    // sanity check the input record (in bytes) matches what's expected
-    public void sanityCheck(BytesSplitter bytesSplitter) {
-        if (columnCount != bytesSplitter.getBufferSize()) {
-            throw new IllegalArgumentException("Expect " + columnCount + " columns, but see "
-                    + bytesSplitter.getBufferSize() + " -- " + bytesSplitter);
-        }
-
-        // TODO: check data types here
+    public int getColumnCount() {
+        return columnCount;
     }
-
+    
     @Override
     public String getTableName() {
         return tableName;
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BaseCuboidBuilder.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BaseCuboidBuilder.java
index 40f1ac5..5dd55b2 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BaseCuboidBuilder.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BaseCuboidBuilder.java
@@ -43,10 +43,10 @@ import com.google.common.collect.Sets;
 
 /**
  */
+@SuppressWarnings("serial")
 public class BaseCuboidBuilder implements java.io.Serializable {
 
     protected static final Logger logger = LoggerFactory.getLogger(BaseCuboidBuilder.class);
-    public static final String HIVE_NULL = "\\N";
     protected String cubeName;
     protected Cuboid baseCuboid;
     protected CubeDesc cubeDesc;
@@ -95,7 +95,6 @@ public class BaseCuboidBuilder implements java.io.Serializable {
 
     private void initNullBytes() {
         nullStrs = Sets.newHashSet();
-        nullStrs.add(HIVE_NULL);
         String[] nullStrings = cubeDesc.getNullStrings();
         if (nullStrings != null) {
             for (String s : nullStrings) {
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java
index 0ad4b9e..091f9a2 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java
@@ -18,6 +18,10 @@
 
 package org.apache.kylin.engine.mr.steps;
 
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
 import org.apache.hadoop.io.Text;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.Bytes;
@@ -34,15 +38,10 @@ import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-
 /**
  */
 abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, Text, Text> {
     protected static final Logger logger = LoggerFactory.getLogger(BaseCuboidMapperBase.class);
-    public static final String HIVE_NULL = "\\N";
     public static final byte[] ONE = Bytes.toBytes("1");
     protected String cubeName;
     protected String segmentID;
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
index 8653500..2ecc3b1 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
@@ -63,7 +63,6 @@ import com.google.common.collect.Sets;
 
 public class HiveMRInput implements IMRInput {
 
-    @SuppressWarnings("unused")
     private static final Logger logger = LoggerFactory.getLogger(HiveMRInput.class);
 
     public static String getTableNameForHCat(TableDesc table) {
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableReader.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableReader.java
index 75f322f..4ebdf3d 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableReader.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableReader.java
@@ -126,7 +126,7 @@ public class HiveTableReader implements TableReader {
         String[] arr = new String[record.size()];
         for (int i = 0; i < arr.length; i++) {
             Object o = record.get(i);
-            arr[i] = (o == null) ? null : o.toString();
+            arr[i] = (o == null || "\\N".equals(o)) ? null : o.toString();
         }
         return arr;
     }