You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/11/03 09:22:32 UTC

kylin git commit: KYLIN-2794 MultipleDictionaryValueEnumerator consider value order based on data type

Repository: kylin
Updated Branches:
  refs/heads/master 3e21f104d -> 95268596b


KYLIN-2794 MultipleDictionaryValueEnumerator consider value order based on data type

Signed-off-by: Li Yang <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/95268596
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/95268596
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/95268596

Branch: refs/heads/master
Commit: 95268596bd4d315f2d2908a5a9e521f1ea5d0c0d
Parents: 3e21f10
Author: lptong <tl...@163.com>
Authored: Fri Nov 3 17:10:41 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Nov 3 17:22:24 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/dict/DictionaryGenerator.java  |  32 +++---
 .../dict/MultipleDictionaryValueEnumerator.java |  17 ++--
 .../MultipleDictionaryValueEnumeratorTest.java  | 100 +++++++++++++------
 .../kylin/metadata/datatype/DataType.java       |  19 ++++
 4 files changed, 116 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 5fdecdb..db0c302 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -60,11 +60,13 @@ public class DictionaryGenerator {
         return builder;
     }
 
-    public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+    public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator)
+            throws IOException {
         return buildDictionary(newDictionaryBuilder(dataType), null, valueEnumerator);
     }
 
-    static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+    static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo,
+            IDictionaryValueEnumerator valueEnumerator) throws IOException {
         int baseId = 0; // always 0 for now
         int nSamples = 5;
         ArrayList<String> samples = new ArrayList<String>(nSamples);
@@ -101,7 +103,7 @@ public class DictionaryGenerator {
     }
 
     public static Dictionary mergeDictionaries(DataType dataType, List<DictionaryInfo> sourceDicts) throws IOException {
-        return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(sourceDicts));
+        return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(dataType, sourceDicts));
     }
 
     private static class DateDictBuilder implements IDictionaryBuilder {
@@ -119,7 +121,7 @@ public class DictionaryGenerator {
         public boolean addValue(String value) {
             if (StringUtils.isBlank(value)) // empty string is treated as null
                 return false;
-            
+
             // detect date pattern on the first value
             if (datePattern == null) {
                 for (String p : DATE_PATTERNS) {
@@ -134,7 +136,7 @@ public class DictionaryGenerator {
                 if (datePattern == null)
                     throw new IllegalArgumentException("Unknown date pattern for input value: " + value);
             }
-            
+
             // check the date format
             DateFormat.stringToDate(value, datePattern);
             return true;
@@ -174,28 +176,28 @@ public class DictionaryGenerator {
     private static class StringTrieDictBuilder implements IDictionaryBuilder {
         int baseId;
         TrieDictionaryBuilder builder;
-        
+
         @Override
         public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException {
             this.baseId = baseId;
             this.builder = new TrieDictionaryBuilder(new StringBytesConverter());
         }
-        
+
         @Override
         public boolean addValue(String value) {
             if (value == null)
                 return false;
-            
+
             builder.addValue(value);
             return true;
         }
-        
+
         @Override
         public Dictionary<String> build() throws IOException {
             return builder.build(baseId);
         }
     }
-    
+
     private static class StringTrieDictForestBuilder implements IDictionaryBuilder {
         TrieDictionaryForestBuilder builder;
 
@@ -223,28 +225,28 @@ public class DictionaryGenerator {
     private static class NumberTrieDictBuilder implements IDictionaryBuilder {
         int baseId;
         NumberDictionaryBuilder builder;
-        
+
         @Override
         public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException {
             this.baseId = baseId;
             this.builder = new NumberDictionaryBuilder();
         }
-        
+
         @Override
         public boolean addValue(String value) {
             if (StringUtils.isBlank(value)) // empty string is treated as null
                 return false;
-            
+
             builder.addValue(value);
             return true;
         }
-        
+
         @Override
         public Dictionary<String> build() throws IOException {
             return builder.build(baseId);
         }
     }
-    
+
     private static class NumberTrieDictForestBuilder implements IDictionaryBuilder {
         NumberDictionaryForestBuilder builder;
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
index f0d4e34..c1686d3 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.List;
 
 import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.datatype.DataType;
 
 import com.google.common.collect.Lists;
 
@@ -33,8 +34,10 @@ public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumer
     private List<Integer> curKeys = Lists.newArrayList();
     private String curValue = null;
     private List<Dictionary<String>> dictionaryList;
+    private DataType dataType;
 
-    public MultipleDictionaryValueEnumerator(List<DictionaryInfo> dictionaryInfoList) {
+    public MultipleDictionaryValueEnumerator(DataType dataType, List<DictionaryInfo> dictionaryInfoList) {
+        this.dataType = dataType;
         dictionaryList = Lists.newArrayListWithCapacity(dictionaryInfoList.size());
         for (DictionaryInfo dictInfo : dictionaryInfoList) {
             Dictionary<String> dictionary = (Dictionary<String>) dictInfo.getDictionaryObject();
@@ -52,29 +55,29 @@ public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumer
     public boolean moveNext() throws IOException {
         String minValue = null;
         int curDictIndex = 0;
-        
+
         // multi-merge dictionary forest
         for (int i = 0; i < dictionaryList.size(); i++) {
             Dictionary<String> dict = dictionaryList.get(i);
             if (dict == null)
                 continue;
-            
+
             int curKey = curKeys.get(i);
             if (curKey > dict.getMaxId())
                 continue;
-            
+
             String curValue = dict.getValueFromId(curKey);
-            if (minValue == null || minValue.compareTo(curValue) > 0) {
+            if (minValue == null || dataType.compare(minValue, curValue) > 0) {
                 minValue = curValue;
                 curDictIndex = i;
             }
         }
-        
+
         if (minValue == null) {
             curValue = null;
             return false;
         }
-        
+
         curValue = minValue;
         curKeys.set(curDictIndex, curKeys.get(curDictIndex) + 1);
         return true;

http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
index 3496c00..a5e4564 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
@@ -23,20 +23,33 @@ import static org.junit.Assert.assertEquals;
 
 import java.io.DataInput;
 import java.io.DataOutput;
+import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.ClassUtil;
 import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
  * Created by sunyerui on 16/8/2.
  */
 public class MultipleDictionaryValueEnumeratorTest {
+    private MultipleDictionaryValueEnumerator enumerator;
 
-    private static DictionaryInfo createDictInfo(int[] values) {
+    @BeforeClass
+    public static void beforeClass() throws Exception {
+        ClassUtil.addClasspath(new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
+        System.setProperty(KylinConfig.KYLIN_CONF, HBaseMetadataTestCase.SANDBOX_TEST_DATA);
+    }
+
+    private static DictionaryInfo createDictInfo(String[] values) {
         MockDictionary mockDict = new MockDictionary();
         mockDict.values = values;
         DictionaryInfo info = new DictionaryInfo();
@@ -44,76 +57,103 @@ public class MultipleDictionaryValueEnumeratorTest {
         return info;
     }
 
-    private static Integer[] enumerateDictInfoList(List<DictionaryInfo> dictionaryInfoList) throws IOException {
-        MultipleDictionaryValueEnumerator enumerator = new MultipleDictionaryValueEnumerator(dictionaryInfoList);
-        List<Integer> values = new ArrayList<>();
+    private String[] enumerateDictInfoList(List<DictionaryInfo> dictionaryInfoList, String dataType) throws IOException {
+        enumerator = new MultipleDictionaryValueEnumerator(DataType.getType(dataType), dictionaryInfoList);
+        List<String> values = new ArrayList<>();
         while (enumerator.moveNext()) {
-            values.add(Integer.parseInt(enumerator.current()));
+            values.add(enumerator.current());
         }
-        return values.toArray(new Integer[0]);
+        return values.toArray(new String[0]);
     }
 
     @Test
     public void testNormalDicts() throws IOException {
         List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
-        dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
-        dictionaryInfoList.add(createDictInfo(new int[] { 4, 5, 6 }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "0", "11", "21" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "4", "5", "6" }));
+
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "string");
+        assertEquals(6, values.length);
+        assertArrayEquals(new String[] { "0", "11", "21", "4", "5", "6" }, values);
+
+        String[] values2 = enumerateDictInfoList(dictionaryInfoList, "integer");
+        assertEquals(6, values2.length);
+        assertArrayEquals(new String[] { "0", "4", "5", "6", "11", "21" }, values2);
+    }
+
+    @Test
+    public void testNormalDictsWithDate() throws IOException {
+        List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
+        dictionaryInfoList.add(createDictInfo(new String[] { "2017-01-02", "2017-01-11", "2017-05-10" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "2017-01-21", "2017-03-01", "2017-04-12" }));
+
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "date");
+        assertEquals(6, values.length);
+        assertArrayEquals(new String[] { "2017-01-02", "2017-01-11", "2017-01-21", "2017-03-01", "2017-04-12",
+                "2017-05-10" }, values);
+    }
+
+    @Test
+    public void testNormalDictsWithNumbers() throws IOException {
+        List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
+        dictionaryInfoList.add(createDictInfo(new String[] { "6.25", "11.25", "1000.25779" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "9.88", "1000.25778", "8765.456" }));
 
-        Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "float");
         assertEquals(6, values.length);
-        assertArrayEquals(new Integer[] { 0, 1, 2, 4, 5, 6 }, values);
+        assertArrayEquals(new String[] { "6.25", "9.88", "11.25", "1000.25778", "1000.25779", "8765.456" }, values);
     }
 
     @Test
     public void testFirstEmptyDicts() throws IOException {
         List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
-        dictionaryInfoList.add(createDictInfo(new int[] {}));
-        dictionaryInfoList.add(createDictInfo(new int[] { 4, 5, 6 }));
+        dictionaryInfoList.add(createDictInfo(new String[] {}));
+        dictionaryInfoList.add(createDictInfo(new String[] { "4", "5", "6" }));
 
-        Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
         assertEquals(3, values.length);
-        assertArrayEquals(new Integer[] { 4, 5, 6 }, values);
+        assertArrayEquals(new String[] { "4", "5", "6" }, values);
     }
 
     @Test
     public void testMiddleEmptyDicts() throws IOException {
         List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
-        dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
-        dictionaryInfoList.add(createDictInfo(new int[] {}));
-        dictionaryInfoList.add(createDictInfo(new int[] { 7, 8, 9 }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "2" }));
+        dictionaryInfoList.add(createDictInfo(new String[] {}));
+        dictionaryInfoList.add(createDictInfo(new String[] { "7", "8", "9" }));
 
-        Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
         assertEquals(6, values.length);
-        assertArrayEquals(new Integer[] { 0, 1, 2, 7, 8, 9 }, values);
+        assertArrayEquals(new String[] { "0", "1", "2", "7", "8", "9" }, values);
     }
 
     @Test
     public void testLastEmptyDicts() throws IOException {
         List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
-        dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
-        dictionaryInfoList.add(createDictInfo(new int[] { 6, 7, 8 }));
-        dictionaryInfoList.add(createDictInfo(new int[] {}));
+        dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "2" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "6", "7", "8" }));
+        dictionaryInfoList.add(createDictInfo(new String[] {}));
 
-        Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
         assertEquals(6, values.length);
-        assertArrayEquals(new Integer[] { 0, 1, 2, 6, 7, 8 }, values);
+        assertArrayEquals(new String[] { "0", "1", "2", "6", "7", "8" }, values);
     }
 
     @Test
     public void testUnorderedDicts() throws IOException {
         List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
-        dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 6 }));
-        dictionaryInfoList.add(createDictInfo(new int[] { 3, 7, 8 }));
-        dictionaryInfoList.add(createDictInfo(new int[] { 2, 7, 9 }));
-        Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+        dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "6" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "3", "7", "8" }));
+        dictionaryInfoList.add(createDictInfo(new String[] { "2", "7", "9" }));
+        String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
         assertEquals(9, values.length);
-        assertArrayEquals(new Integer[] { 0, 1, 2, 3, 6, 7, 7, 8, 9 }, values);
+        assertArrayEquals(new String[] { "0", "1", "2", "3", "6", "7", "7", "8", "9" }, values);
     }
 
     public static class MockDictionary extends Dictionary<String> {
         private static final long serialVersionUID = 1L;
 
-        public int[] values;
+        public String[] values;
 
         @Override
         public int getMinId() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
index 13cc2be..12f7411 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
@@ -19,6 +19,7 @@
 package org.apache.kylin.metadata.datatype;
 
 import java.io.Serializable;
+import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -34,6 +35,7 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.BytesSerializer;
 import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.measure.MeasureTypeFactory;
 import org.apache.kylin.metadata.model.TblColRef.InnerDataTypeEnum;
 
@@ -196,6 +198,23 @@ public class DataType implements Serializable {
 
     }
 
+    public int compare(String value1, String value2) {
+        if (isDateTimeFamily()) {
+            Long millis1 = DateFormat.stringToMillis(value1);
+            Long millis2 = DateFormat.stringToMillis(value2);
+            return millis1.compareTo(millis2);
+        } else if (isIntegerFamily()) {
+            Long l1 = new Long(value1);
+            Long l2 = new Long(value2);
+            return l1.compareTo(l2);
+        } else if (isNumberFamily()) {
+            BigDecimal bigDecimal1 = new BigDecimal(value1);
+            BigDecimal bigDecimal2 = new BigDecimal(value2);
+            return bigDecimal1.compareTo(bigDecimal2);
+        }
+        return value1.compareTo(value2);
+    }
+
     private String replaceLegacy(String str) {
         String replace = LEGACY_TYPE_MAP.get(str);
         return replace == null ? str : replace;