You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/04/14 07:27:34 UTC

kylin git commit: KYLIN-2545 improve Number2BytesConverter to accept malformed numbers

Repository: kylin
Updated Branches:
  refs/heads/KYLIN-2545 [created] d896b26ee


KYLIN-2545 improve Number2BytesConverter to accept malformed numbers


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d896b26e
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d896b26e
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d896b26e

Branch: refs/heads/KYLIN-2545
Commit: d896b26eee717cd7c8de154b2825203f4868c0db
Parents: 674410f
Author: Li Yang <li...@apache.org>
Authored: Fri Apr 14 15:26:32 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Apr 14 15:26:32 2017 +0800

----------------------------------------------------------------------
 .../kylin/dict/Number2BytesConverter.java       | 22 ++++-
 .../mr/steps/NumberDictionaryForestTest.java    | 86 ++++++++++++++++----
 2 files changed, 88 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/d896b26e/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java b/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
index 814c95a..397ca9f 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/Number2BytesConverter.java
@@ -17,9 +17,10 @@
 */
 package org.apache.kylin.dict;
 
-import org.apache.kylin.common.util.Bytes;
-
 import java.io.Serializable;
+import java.math.BigDecimal;
+
+import org.apache.kylin.common.util.Bytes;
 
 /**
  * Created by xiefan on 17-1-20.
@@ -59,12 +60,28 @@ public class Number2BytesConverter implements BytesConverter<String>, Serializab
 
     @Override
     public byte[] convertToBytes(String v) {
+        v = normalizeNumber(v);
         NumberBytesCodec codec = getCodec(this.maxDigitsBeforeDecimalPoint);
         byte[] num = Bytes.toBytes(v);
         codec.encodeNumber(num, 0, num.length);
         return Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
     }
 
+    public static String normalizeNumber(String v) {
+        boolean badBegin = (v.startsWith("0") && v.length() > 1 && v.charAt(1) != '.') //
+                || (v.startsWith("-0") && v.length() > 2 && v.charAt(2) != '.') //
+                || v.startsWith("+");
+        if (badBegin) {
+            v = new BigDecimal(v).toPlainString();
+        }
+        
+        while (v.contains(".") && (v.endsWith("0") || v.endsWith("."))) {
+            v = v.substring(0, v.length() - 1);
+        }
+        
+        return v;
+    }
+
     @Override
     public String convertFromBytes(byte[] b, int offset, int length) {
         NumberBytesCodec codec = getCodec(this.maxDigitsBeforeDecimalPoint);
@@ -224,5 +241,4 @@ public class Number2BytesConverter implements BytesConverter<String>, Serializab
             return out - offset;
         }
     }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/d896b26e/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index c31377c..414ab95 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -35,51 +35,56 @@ import java.util.Random;
 
 import org.apache.hadoop.io.Text;
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.dict.Number2BytesConverter;
 import org.apache.kylin.dict.NumberDictionary;
 import org.apache.kylin.dict.NumberDictionaryBuilder;
 import org.apache.kylin.dict.NumberDictionaryForestBuilder;
 import org.apache.kylin.dict.TrieDictionaryForest;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
  * Created by xiefan on 16-11-2.
  */
-
-
 public class NumberDictionaryForestTest {
 
     @Test
     public void testNumberDictionaryForestLong() {
         List<String> list = randomLongData(100);
-        testData(list, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
+        testData(list, list, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
+        List<String> list2 = randomLongData(100);
+        testData(putInDregs(list2, false), list2, SelfDefineSortableKey.TypeFlag.INTEGER_FAMILY_TYPE);
     }
 
     @Test
     public void testNumberDictionaryForestDouble() {
         List<String> list = randomDoubleData(100);
-        testData(list, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
+        testData(list, list, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
+        List<String> list2 = randomDoubleData(100);
+        testData(putInDregs(list2, true), list2, SelfDefineSortableKey.TypeFlag.DOUBLE_FAMILY_TYPE);
     }
 
-    private void testData(List<String> list, SelfDefineSortableKey.TypeFlag flag) {
+    private void testData(List<String> humanList, List<String> expectedList, SelfDefineSortableKey.TypeFlag flag) {
         //stimulate map-reduce job
-        ArrayList<SelfDefineSortableKey> keyList = createKeyList(list, (byte) flag.ordinal());
+        ArrayList<SelfDefineSortableKey> keyList = createKeyList(humanList, (byte) flag.ordinal());
         Collections.sort(keyList);
+        
         //build tree
         NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 0);
-
-        for (SelfDefineSortableKey key : keyList) {
-            String fieldValue = printKey(key);
-            b.addValue(fieldValue);
+        expectedList = numberSort(expectedList);
+        for (String value : expectedList) {
+            b.addValue(value);
         }
         TrieDictionaryForest<String> dict = b.build();
         dict.dump(System.out);
+        
         ArrayList<Integer> resultIds = new ArrayList<>();
-        for (SelfDefineSortableKey key : keyList) {
+        for (int i = 0; i < keyList.size(); i++) {
+            SelfDefineSortableKey key = keyList.get(i);
             String fieldValue = getFieldValue(key);
             resultIds.add(dict.getIdFromValue(fieldValue));
-            assertEquals(fieldValue, dict.getValueFromId(dict.getIdFromValue(fieldValue)));
+            assertEquals(expectedList.get(i), dict.getValueFromId(dict.getIdFromValue(fieldValue)));
         }
+        
         assertTrue(isIncreasedOrder(resultIds, new Comparator<Integer>() {
             @Override
             public int compare(Integer o1, Integer o2) {
@@ -88,6 +93,18 @@ public class NumberDictionaryForestTest {
         }));
     }
 
+    private List<String> numberSort(List<String> list) {
+        ArrayList<String> result = new ArrayList<>(list);
+        Collections.sort(result, new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                double d1 = Double.parseDouble(o1);
+                double d2 = Double.parseDouble(o2);
+                return Double.compare(d1, d2);
+            }});
+        return result;
+    }
+
     @Test
     public void serializeTest() {
         List<String> testData = new ArrayList<>();
@@ -106,7 +123,6 @@ public class NumberDictionaryForestTest {
         }
     }
 
-
     @Test
     public void testVerySmallDouble() {
         List<String> testData = new ArrayList<>();
@@ -148,8 +164,6 @@ public class NumberDictionaryForestTest {
         assertTrue(dict1.getSizeOfId() == dict2.getSizeOfId());
         assertTrue(dict1.getSizeOfValue() == dict2.getSizeOfValue());
 
-        byte[] buf = new byte[dict1.getSizeOfValue()];
-
         {
             int newId = dict2.getIdFromValue(dict1.getValueFromId(0));
             assertTrue(newId == 0);
@@ -165,7 +179,6 @@ public class NumberDictionaryForestTest {
         }
     }
 
-    @Ignore
     @Test
     public void testDecimalsWithBeginZero() {
         List<String> testData = new ArrayList<>();
@@ -221,6 +234,25 @@ public class NumberDictionaryForestTest {
         return list;
     }
 
+    private List<String> putInDregs(List<String> numbers, boolean isDouble) {
+        Random rand = new Random();
+        ArrayList<String> result = new ArrayList<>();
+        for (String s : numbers) {
+            if (rand.nextDouble() < 0.5) {
+                int cut = s.startsWith("-") ? 1 : 0;
+                s = s.substring(0, cut) + "0" + s.substring(cut);
+            }
+            if (isDouble && rand.nextDouble() < 0.5) {
+                if (s.contains(".") == false)
+                    s = s + ".";
+                s = s + "0";
+            }
+            result.add(s);
+        }
+
+        return result;
+    }
+
     private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
         int partationId = 0;
         ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
@@ -267,4 +299,24 @@ public class NumberDictionaryForestTest {
         }
         return true;
     }
+    
+    @Test
+    public void testNormalizeNumber() {
+        assertEquals("0", Number2BytesConverter.normalizeNumber("+0000.000"));
+        assertEquals("0", Number2BytesConverter.normalizeNumber("-0000.000"));
+        assertEquals("0", Number2BytesConverter.normalizeNumber("00.000"));
+        assertEquals("123", Number2BytesConverter.normalizeNumber("00123.000"));
+        assertEquals("-123", Number2BytesConverter.normalizeNumber("-0123"));
+        assertEquals("-123.78", Number2BytesConverter.normalizeNumber("-0123.780"));
+        assertEquals("200", Number2BytesConverter.normalizeNumber("200"));
+        assertEquals("200", Number2BytesConverter.normalizeNumber("200.00"));
+        assertEquals("200.01", Number2BytesConverter.normalizeNumber("200.010"));
+        
+        for (int i = -100; i < 101; i++) {
+            String expected = "" + i;
+            int cut = expected.startsWith("-") ? 1 : 0;
+            String str = expected.substring(0, cut) + "00" + expected.substring(cut) + ".000";
+            assertEquals(expected, Number2BytesConverter.normalizeNumber(str));
+        }
+    }
 }