You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/02/20 09:00:40 UTC

kylin git commit: minor, move utf8Length to class StringUtil

Repository: kylin
Updated Branches:
  refs/heads/master bf8134010 -> c52b6981f


minor, move utf8Length to class StringUtil

Signed-off-by: Li Yang <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/c52b6981
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/c52b6981
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/c52b6981

Branch: refs/heads/master
Commit: c52b6981f9404a03dfa988eec67fa2dd23b99d94
Parents: bf81340
Author: Cheng Wang <ch...@kyligence.io>
Authored: Mon Feb 20 16:57:04 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Mon Feb 20 17:00:34 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/util/StringUtil.java    | 19 +++++++++++++
 .../mr/steps/FactDistinctColumnsMapper.java     | 30 +++++---------------
 2 files changed, 26 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/c52b6981/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
index 96d294b..964bf0d 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
@@ -164,4 +164,23 @@ public class StringUtil {
         return r.toArray(new String[r.size()]);
     }
 
+    // calculating length in UTF-8 of Java String without actually encoding it
+    public static int utf8Length(CharSequence sequence) {
+        int count = 0;
+        for (int i = 0, len = sequence.length(); i < len; i++) {
+            char ch = sequence.charAt(i);
+            if (ch <= 0x7F) {
+                count++;
+            } else if (ch <= 0x7FF) {
+                count += 2;
+            } else if (Character.isHighSurrogate(ch)) {
+                count += 4;
+                ++i;
+            } else {
+                count += 3;
+            }
+        }
+        return count;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c52b6981/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
index 9d0ff10..d9c1309 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
@@ -26,6 +26,7 @@ import java.util.List;
 import org.apache.hadoop.io.Text;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.StringUtil;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.measure.BufferedMeasureCodec;
@@ -44,8 +45,10 @@ import com.google.common.hash.Hashing;
 public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, Object> {
 
     private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnsMapper.class);
-    
-    public static enum RawDataCounter { BYTES };
+
+    public static enum RawDataCounter {
+        BYTES
+    };
 
     protected boolean collectStatistics = false;
     protected CuboidScheduler cuboidScheduler = null;
@@ -132,7 +135,7 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
     @Override
     public void doMap(KEYIN key, Object record, Context context) throws IOException, InterruptedException {
         String[] row = flatTableInputFormat.parseMapperInput(record);
-        
+
         context.getCounter(RawDataCounter.BYTES).increment(countSizeInBytes(row));
 
         for (int i = 0; i < factDictCols.size(); i++) {
@@ -188,30 +191,11 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
     private long countSizeInBytes(String[] row) {
         int size = 0;
         for (String s : row) {
-            size += s == null ? 1 : utf8Length(s);
+            size += s == null ? 1 : StringUtil.utf8Length(s);
             size++; // delimiter
         }
         return size;
     }
-    
-    // calculating length in UTF-8 of Java String without actually encoding it
-    public static int utf8Length(CharSequence sequence) {
-        int count = 0;
-        for (int i = 0, len = sequence.length(); i < len; i++) {
-            char ch = sequence.charAt(i);
-            if (ch <= 0x7F) {
-                count++;
-            } else if (ch <= 0x7FF) {
-                count += 2;
-            } else if (Character.isHighSurrogate(ch)) {
-                count += 4;
-                ++i;
-            } else {
-                count += 3;
-            }
-        }
-        return count;
-    }
 
     private void putRowKeyToHLL(String[] row) {