You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/02/20 09:00:40 UTC
kylin git commit: minor, move utf8Length to class StringUtil
Repository: kylin
Updated Branches:
refs/heads/master bf8134010 -> c52b6981f
minor, move utf8Length to class StringUtil
Signed-off-by: Li Yang <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/c52b6981
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/c52b6981
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/c52b6981
Branch: refs/heads/master
Commit: c52b6981f9404a03dfa988eec67fa2dd23b99d94
Parents: bf81340
Author: Cheng Wang <ch...@kyligence.io>
Authored: Mon Feb 20 16:57:04 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Mon Feb 20 17:00:34 2017 +0800
----------------------------------------------------------------------
.../apache/kylin/common/util/StringUtil.java | 19 +++++++++++++
.../mr/steps/FactDistinctColumnsMapper.java | 30 +++++---------------
2 files changed, 26 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/c52b6981/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
index 96d294b..964bf0d 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java
@@ -164,4 +164,23 @@ public class StringUtil {
return r.toArray(new String[r.size()]);
}
+ // calculating length in UTF-8 of Java String without actually encoding it
+ public static int utf8Length(CharSequence sequence) {
+ int count = 0;
+ for (int i = 0, len = sequence.length(); i < len; i++) {
+ char ch = sequence.charAt(i);
+ if (ch <= 0x7F) {
+ count++;
+ } else if (ch <= 0x7FF) {
+ count += 2;
+ } else if (Character.isHighSurrogate(ch)) {
+ count += 4;
+ ++i;
+ } else {
+ count += 3;
+ }
+ }
+ return count;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c52b6981/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
index 9d0ff10..d9c1309 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
@@ -26,6 +26,7 @@ import java.util.List;
import org.apache.hadoop.io.Text;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.measure.BufferedMeasureCodec;
@@ -44,8 +45,10 @@ import com.google.common.hash.Hashing;
public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, Object> {
private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnsMapper.class);
-
- public static enum RawDataCounter { BYTES };
+
+ public static enum RawDataCounter {
+ BYTES
+ };
protected boolean collectStatistics = false;
protected CuboidScheduler cuboidScheduler = null;
@@ -132,7 +135,7 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
@Override
public void doMap(KEYIN key, Object record, Context context) throws IOException, InterruptedException {
String[] row = flatTableInputFormat.parseMapperInput(record);
-
+
context.getCounter(RawDataCounter.BYTES).increment(countSizeInBytes(row));
for (int i = 0; i < factDictCols.size(); i++) {
@@ -188,30 +191,11 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB
private long countSizeInBytes(String[] row) {
int size = 0;
for (String s : row) {
- size += s == null ? 1 : utf8Length(s);
+ size += s == null ? 1 : StringUtil.utf8Length(s);
size++; // delimiter
}
return size;
}
-
- // calculating length in UTF-8 of Java String without actually encoding it
- public static int utf8Length(CharSequence sequence) {
- int count = 0;
- for (int i = 0, len = sequence.length(); i < len; i++) {
- char ch = sequence.charAt(i);
- if (ch <= 0x7F) {
- count++;
- } else if (ch <= 0x7FF) {
- count += 2;
- } else if (Character.isHighSurrogate(ch)) {
- count += 4;
- ++i;
- } else {
- count += 3;
- }
- }
- return count;
- }
private void putRowKeyToHLL(String[] row) {