You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/04/14 07:27:34 UTC
kylin git commit: KYLIN-2545 improve Number2BytesConverter to accept
malformed numbers
Repository: kylin
Updated Branches:
refs/heads/KYLIN-2545 [created] d896b26ee
KYLIN-2545 improve Number2BytesConverter to accept malformed numbers
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d896b26e
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d896b26e
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d896b26e
Branch: refs/heads/KYLIN-2545
Commit: d896b26eee717cd7c8de154b2825203f4868c0db
Parents: 674410f
Author: Li Yang <li...@apache.org>
Authored: Fri Apr 14 15:26:32 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Apr 14 15:26:32 2017 +0800
----------------------------------------------------------------------
.../kylin/dict/Number2BytesConverter.java | 22 ++++-
.../mr/steps/NumberDictionaryForestTest.java | 86 ++++++++++++++++----
2 files changed, 88 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/d896b26e/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java b/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
index 814c95a..397ca9f 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
@@ -17,9 +17,10 @@
*/
package org.apache.kylin.dict;
-import org.apache.kylin.common.util.Bytes;
-
import java.io.Serializable;
+import java.math.BigDecimal;
+
+import org.apache.kylin.common.util.Bytes;
/**
* Created by xiefan on 17-1-20.
@@ -59,12 +60,28 @@ public class Number2BytesConverter implements BytesConverter<String>, Serializab
@Override
public byte[] convertToBytes(String v) {
+ v = normalizeNumber(v);
NumberBytesCodec codec = getCodec(this.maxDigitsBeforeDecimalPoint);
byte[] num = Bytes.toBytes(v);
codec.encodeNumber(num, 0, num.length);
return Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
}
+ public static String normalizeNumber(String v) {
+ boolean badBegin = (v.startsWith("0") && v.length() > 1 && v.charAt(1) != '.') //
+ || (v.startsWith("-0") && v.length() > 2 && v.charAt(2) != '.') //
+ || v.startsWith("+");
+ if (badBegin) {
+ v = new BigDecimal(v).toPlainString();
+ }
+
+ while (v.contains(".") && (v.endsWith("0") || v.endsWith("."))) {
+ v = v.substring(0, v.length() - 1);
+ }
+
+ return v;
+ }
+
@Override
public String convertFromBytes(byte[] b, int offset, int length) {
NumberBytesCodec codec = getCodec(this.maxDigitsBeforeDecimalPoint);
@@ -224,5 +241,4 @@ public class Number2BytesConverter implements BytesConverter<String>, Serializab
return out - offset;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/d896b26e/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index c31377c..414ab95 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -35,51 +35,56 @@ import java.util.Random;
import org.apache.hadoop.io.Text;
import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.dict.Number2BytesConverter;
import org.apache.kylin.dict.NumberDictionary;
import org.apache.kylin.dict.NumberDictionaryBuilder;
import org.apache.kylin.dict.NumberDictionaryForestBuilder;
import org.apache.kylin.dict.TrieDictionaryForest;
-import org.junit.Ignore;
import org.junit.Test;
/**
* Created by xiefan on 16-11-2.
*/
-
-
public class NumberDictionaryForestTest {
@Test
public void testNumberDictionaryForestLong() {
List<String> list = randomLongData(100);
- testData(list, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
+ testData(list, list, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
+ List<String> list2 = randomLongData(100);
+ testData(putInDregs(list2, false), list2, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
}
@Test
public void testNumberDictionaryForestDouble() {
List<String> list = randomDoubleData(100);
- testData(list, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
+ testData(list, list, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
+ List<String> list2 = randomDoubleData(100);
+ testData(putInDregs(list2, true), list2, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
}
- private void testData(List<String> list, SelfDefineSortableKey.TypeFlag flag) {
+ private void testData(List<String> humanList, List<String> expectedList, SelfDefineSortableKey.TypeFlag flag) {
//stimulate map-reduce job
- ArrayList<SelfDefineSortableKey> keyList = createKeyList(list, (byte) flag.ordinal());
+ ArrayList<SelfDefineSortableKey> keyList = createKeyList(humanList, (byte) flag.ordinal());
Collections.sort(keyList);
+
//build tree
NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 0);
-
- for (SelfDefineSortableKey key : keyList) {
- String fieldValue = printKey(key);
- b.addValue(fieldValue);
+ expectedList = numberSort(expectedList);
+ for (String value : expectedList) {
+ b.addValue(value);
}
TrieDictionaryForest<String> dict = b.build();
dict.dump(System.out);
+
ArrayList<Integer> resultIds = new ArrayList<>();
- for (SelfDefineSortableKey key : keyList) {
+ for (int i = 0; i < keyList.size(); i++) {
+ SelfDefineSortableKey key = keyList.get(i);
String fieldValue = getFieldValue(key);
resultIds.add(dict.getIdFromValue(fieldValue));
- assertEquals(fieldValue, dict.getValueFromId(dict.getIdFromValue(fieldValue)));
+ assertEquals(expectedList.get(i), dict.getValueFromId(dict.getIdFromValue(fieldValue)));
}
+
assertTrue(isIncreasedOrder(resultIds, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
@@ -88,6 +93,18 @@ public class NumberDictionaryForestTest {
}));
}
+ private List<String> numberSort(List<String> list) {
+ ArrayList<String> result = new ArrayList<>(list);
+ Collections.sort(result, new Comparator<String>() {
+ @Override
+ public int compare(String o1, String o2) {
+ double d1 = Double.parseDouble(o1);
+ double d2 = Double.parseDouble(o2);
+ return Double.compare(d1, d2);
+ }});
+ return result;
+ }
+
@Test
public void serializeTest() {
List<String> testData = new ArrayList<>();
@@ -106,7 +123,6 @@ public class NumberDictionaryForestTest {
}
}
-
@Test
public void testVerySmallDouble() {
List<String> testData = new ArrayList<>();
@@ -148,8 +164,6 @@ public class NumberDictionaryForestTest {
assertTrue(dict1.getSizeOfId() == dict2.getSizeOfId());
assertTrue(dict1.getSizeOfValue() == dict2.getSizeOfValue());
- byte[] buf = new byte[dict1.getSizeOfValue()];
-
{
int newId = dict2.getIdFromValue(dict1.getValueFromId(0));
assertTrue(newId == 0);
@@ -165,7 +179,6 @@ public class NumberDictionaryForestTest {
}
}
- @Ignore
@Test
public void testDecimalsWithBeginZero() {
List<String> testData = new ArrayList<>();
@@ -221,6 +234,25 @@ public class NumberDictionaryForestTest {
return list;
}
+ private List<String> putInDregs(List<String> numbers, boolean isDouble) {
+ Random rand = new Random();
+ ArrayList<String> result = new ArrayList<>();
+ for (String s : numbers) {
+ if (rand.nextDouble() < 0.5) {
+ int cut = s.startsWith("-") ? 1 : 0;
+ s = s.substring(0, cut) + "0" + s.substring(cut);
+ }
+ if (isDouble && rand.nextDouble() < 0.5) {
+ if (s.contains(".") == false)
+ s = s + ".";
+ s = s + "0";
+ }
+ result.add(s);
+ }
+
+ return result;
+ }
+
private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
int partationId = 0;
ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
@@ -267,4 +299,24 @@ public class NumberDictionaryForestTest {
}
return true;
}
+
+ @Test
+ public void testNormalizeNumber() {
+ assertEquals("0", Number2BytesConverter.normalizeNumber("+0000.000"));
+ assertEquals("0", Number2BytesConverter.normalizeNumber("-0000.000"));
+ assertEquals("0", Number2BytesConverter.normalizeNumber("00.000"));
+ assertEquals("123", Number2BytesConverter.normalizeNumber("00123.000"));
+ assertEquals("-123", Number2BytesConverter.normalizeNumber("-0123"));
+ assertEquals("-123.78", Number2BytesConverter.normalizeNumber("-0123.780"));
+ assertEquals("200", Number2BytesConverter.normalizeNumber("200"));
+ assertEquals("200", Number2BytesConverter.normalizeNumber("200.00"));
+ assertEquals("200.01", Number2BytesConverter.normalizeNumber("200.010"));
+
+ for (int i = -100; i < 101; i++) {
+ String expected = "" + i;
+ int cut = expected.startsWith("-") ? 1 : 0;
+ String str = expected.substring(0, cut) + "00" + expected.substring(cut) + ".000";
+ assertEquals(expected, Number2BytesConverter.normalizeNumber(str));
+ }
+ }
}