You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/11/03 09:22:32 UTC
kylin git commit: KYLIN-2794 MultipleDictionaryValueEnumerator
consider value order based on data type
Repository: kylin
Updated Branches:
refs/heads/master 3e21f104d -> 95268596b
KYLIN-2794 MultipleDictionaryValueEnumerator consider value order based on data type
Signed-off-by: Li Yang <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/95268596
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/95268596
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/95268596
Branch: refs/heads/master
Commit: 95268596bd4d315f2d2908a5a9e521f1ea5d0c0d
Parents: 3e21f10
Author: lptong <tl...@163.com>
Authored: Fri Nov 3 17:10:41 2017 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Nov 3 17:22:24 2017 +0800
----------------------------------------------------------------------
.../apache/kylin/dict/DictionaryGenerator.java | 32 +++---
.../dict/MultipleDictionaryValueEnumerator.java | 17 ++--
.../MultipleDictionaryValueEnumeratorTest.java | 100 +++++++++++++------
.../kylin/metadata/datatype/DataType.java | 19 ++++
4 files changed, 116 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 5fdecdb..db0c302 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -60,11 +60,13 @@ public class DictionaryGenerator {
return builder;
}
- public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+ public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator)
+ throws IOException {
return buildDictionary(newDictionaryBuilder(dataType), null, valueEnumerator);
}
- static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+ static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo,
+ IDictionaryValueEnumerator valueEnumerator) throws IOException {
int baseId = 0; // always 0 for now
int nSamples = 5;
ArrayList<String> samples = new ArrayList<String>(nSamples);
@@ -101,7 +103,7 @@ public class DictionaryGenerator {
}
public static Dictionary mergeDictionaries(DataType dataType, List<DictionaryInfo> sourceDicts) throws IOException {
- return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(sourceDicts));
+ return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(dataType, sourceDicts));
}
private static class DateDictBuilder implements IDictionaryBuilder {
@@ -119,7 +121,7 @@ public class DictionaryGenerator {
public boolean addValue(String value) {
if (StringUtils.isBlank(value)) // empty string is treated as null
return false;
-
+
// detect date pattern on the first value
if (datePattern == null) {
for (String p : DATE_PATTERNS) {
@@ -134,7 +136,7 @@ public class DictionaryGenerator {
if (datePattern == null)
throw new IllegalArgumentException("Unknown date pattern for input value: " + value);
}
-
+
// check the date format
DateFormat.stringToDate(value, datePattern);
return true;
@@ -174,28 +176,28 @@ public class DictionaryGenerator {
private static class StringTrieDictBuilder implements IDictionaryBuilder {
int baseId;
TrieDictionaryBuilder builder;
-
+
@Override
public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException {
this.baseId = baseId;
this.builder = new TrieDictionaryBuilder(new StringBytesConverter());
}
-
+
@Override
public boolean addValue(String value) {
if (value == null)
return false;
-
+
builder.addValue(value);
return true;
}
-
+
@Override
public Dictionary<String> build() throws IOException {
return builder.build(baseId);
}
}
-
+
private static class StringTrieDictForestBuilder implements IDictionaryBuilder {
TrieDictionaryForestBuilder builder;
@@ -223,28 +225,28 @@ public class DictionaryGenerator {
private static class NumberTrieDictBuilder implements IDictionaryBuilder {
int baseId;
NumberDictionaryBuilder builder;
-
+
@Override
public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException {
this.baseId = baseId;
this.builder = new NumberDictionaryBuilder();
}
-
+
@Override
public boolean addValue(String value) {
if (StringUtils.isBlank(value)) // empty string is treated as null
return false;
-
+
builder.addValue(value);
return true;
}
-
+
@Override
public Dictionary<String> build() throws IOException {
return builder.build(baseId);
}
}
-
+
private static class NumberTrieDictForestBuilder implements IDictionaryBuilder {
NumberDictionaryForestBuilder builder;
http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
index f0d4e34..c1686d3 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.List;
import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.datatype.DataType;
import com.google.common.collect.Lists;
@@ -33,8 +34,10 @@ public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumer
private List<Integer> curKeys = Lists.newArrayList();
private String curValue = null;
private List<Dictionary<String>> dictionaryList;
+ private DataType dataType;
- public MultipleDictionaryValueEnumerator(List<DictionaryInfo> dictionaryInfoList) {
+ public MultipleDictionaryValueEnumerator(DataType dataType, List<DictionaryInfo> dictionaryInfoList) {
+ this.dataType = dataType;
dictionaryList = Lists.newArrayListWithCapacity(dictionaryInfoList.size());
for (DictionaryInfo dictInfo : dictionaryInfoList) {
Dictionary<String> dictionary = (Dictionary<String>) dictInfo.getDictionaryObject();
@@ -52,29 +55,29 @@ public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumer
public boolean moveNext() throws IOException {
String minValue = null;
int curDictIndex = 0;
-
+
// multi-merge dictionary forest
for (int i = 0; i < dictionaryList.size(); i++) {
Dictionary<String> dict = dictionaryList.get(i);
if (dict == null)
continue;
-
+
int curKey = curKeys.get(i);
if (curKey > dict.getMaxId())
continue;
-
+
String curValue = dict.getValueFromId(curKey);
- if (minValue == null || minValue.compareTo(curValue) > 0) {
+ if (minValue == null || dataType.compare(minValue, curValue) > 0) {
minValue = curValue;
curDictIndex = i;
}
}
-
+
if (minValue == null) {
curValue = null;
return false;
}
-
+
curValue = minValue;
curKeys.set(curDictIndex, curKeys.get(curDictIndex) + 1);
return true;
http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
index 3496c00..a5e4564 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
@@ -23,20 +23,33 @@ import static org.junit.Assert.assertEquals;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.ClassUtil;
import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.BeforeClass;
import org.junit.Test;
/**
* Created by sunyerui on 16/8/2.
*/
public class MultipleDictionaryValueEnumeratorTest {
+ private MultipleDictionaryValueEnumerator enumerator;
- private static DictionaryInfo createDictInfo(int[] values) {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ ClassUtil.addClasspath(new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
+ System.setProperty(KylinConfig.KYLIN_CONF, HBaseMetadataTestCase.SANDBOX_TEST_DATA);
+ }
+
+ private static DictionaryInfo createDictInfo(String[] values) {
MockDictionary mockDict = new MockDictionary();
mockDict.values = values;
DictionaryInfo info = new DictionaryInfo();
@@ -44,76 +57,103 @@ public class MultipleDictionaryValueEnumeratorTest {
return info;
}
- private static Integer[] enumerateDictInfoList(List<DictionaryInfo> dictionaryInfoList) throws IOException {
- MultipleDictionaryValueEnumerator enumerator = new MultipleDictionaryValueEnumerator(dictionaryInfoList);
- List<Integer> values = new ArrayList<>();
+ private String[] enumerateDictInfoList(List<DictionaryInfo> dictionaryInfoList, String dataType) throws IOException {
+ enumerator = new MultipleDictionaryValueEnumerator(DataType.getType(dataType), dictionaryInfoList);
+ List<String> values = new ArrayList<>();
while (enumerator.moveNext()) {
- values.add(Integer.parseInt(enumerator.current()));
+ values.add(enumerator.current());
}
- return values.toArray(new Integer[0]);
+ return values.toArray(new String[0]);
}
@Test
public void testNormalDicts() throws IOException {
List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
- dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
- dictionaryInfoList.add(createDictInfo(new int[] { 4, 5, 6 }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "0", "11", "21" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "4", "5", "6" }));
+
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "string");
+ assertEquals(6, values.length);
+ assertArrayEquals(new String[] { "0", "11", "21", "4", "5", "6" }, values);
+
+ String[] values2 = enumerateDictInfoList(dictionaryInfoList, "integer");
+ assertEquals(6, values2.length);
+ assertArrayEquals(new String[] { "0", "4", "5", "6", "11", "21" }, values2);
+ }
+
+ @Test
+ public void testNormalDictsWithDate() throws IOException {
+ List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
+ dictionaryInfoList.add(createDictInfo(new String[] { "2017-01-02", "2017-01-11", "2017-05-10" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "2017-01-21", "2017-03-01", "2017-04-12" }));
+
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "date");
+ assertEquals(6, values.length);
+ assertArrayEquals(new String[] { "2017-01-02", "2017-01-11", "2017-01-21", "2017-03-01", "2017-04-12",
+ "2017-05-10" }, values);
+ }
+
+ @Test
+ public void testNormalDictsWithNumbers() throws IOException {
+ List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
+ dictionaryInfoList.add(createDictInfo(new String[] { "6.25", "11.25", "1000.25779" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "9.88", "1000.25778", "8765.456" }));
- Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "float");
assertEquals(6, values.length);
- assertArrayEquals(new Integer[] { 0, 1, 2, 4, 5, 6 }, values);
+ assertArrayEquals(new String[] { "6.25", "9.88", "11.25", "1000.25778", "1000.25779", "8765.456" }, values);
}
@Test
public void testFirstEmptyDicts() throws IOException {
List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(2);
- dictionaryInfoList.add(createDictInfo(new int[] {}));
- dictionaryInfoList.add(createDictInfo(new int[] { 4, 5, 6 }));
+ dictionaryInfoList.add(createDictInfo(new String[] {}));
+ dictionaryInfoList.add(createDictInfo(new String[] { "4", "5", "6" }));
- Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
assertEquals(3, values.length);
- assertArrayEquals(new Integer[] { 4, 5, 6 }, values);
+ assertArrayEquals(new String[] { "4", "5", "6" }, values);
}
@Test
public void testMiddleEmptyDicts() throws IOException {
List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
- dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
- dictionaryInfoList.add(createDictInfo(new int[] {}));
- dictionaryInfoList.add(createDictInfo(new int[] { 7, 8, 9 }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "2" }));
+ dictionaryInfoList.add(createDictInfo(new String[] {}));
+ dictionaryInfoList.add(createDictInfo(new String[] { "7", "8", "9" }));
- Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
assertEquals(6, values.length);
- assertArrayEquals(new Integer[] { 0, 1, 2, 7, 8, 9 }, values);
+ assertArrayEquals(new String[] { "0", "1", "2", "7", "8", "9" }, values);
}
@Test
public void testLastEmptyDicts() throws IOException {
List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
- dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 2 }));
- dictionaryInfoList.add(createDictInfo(new int[] { 6, 7, 8 }));
- dictionaryInfoList.add(createDictInfo(new int[] {}));
+ dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "2" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "6", "7", "8" }));
+ dictionaryInfoList.add(createDictInfo(new String[] {}));
- Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
assertEquals(6, values.length);
- assertArrayEquals(new Integer[] { 0, 1, 2, 6, 7, 8 }, values);
+ assertArrayEquals(new String[] { "0", "1", "2", "6", "7", "8" }, values);
}
@Test
public void testUnorderedDicts() throws IOException {
List<DictionaryInfo> dictionaryInfoList = new ArrayList<>(3);
- dictionaryInfoList.add(createDictInfo(new int[] { 0, 1, 6 }));
- dictionaryInfoList.add(createDictInfo(new int[] { 3, 7, 8 }));
- dictionaryInfoList.add(createDictInfo(new int[] { 2, 7, 9 }));
- Integer[] values = enumerateDictInfoList(dictionaryInfoList);
+ dictionaryInfoList.add(createDictInfo(new String[] { "0", "1", "6" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "3", "7", "8" }));
+ dictionaryInfoList.add(createDictInfo(new String[] { "2", "7", "9" }));
+ String[] values = enumerateDictInfoList(dictionaryInfoList, "integer");
assertEquals(9, values.length);
- assertArrayEquals(new Integer[] { 0, 1, 2, 3, 6, 7, 7, 8, 9 }, values);
+ assertArrayEquals(new String[] { "0", "1", "2", "3", "6", "7", "7", "8", "9" }, values);
}
public static class MockDictionary extends Dictionary<String> {
private static final long serialVersionUID = 1L;
- public int[] values;
+ public String[] values;
@Override
public int getMinId() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/95268596/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
index 13cc2be..12f7411 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
@@ -19,6 +19,7 @@
package org.apache.kylin.metadata.datatype;
import java.io.Serializable;
+import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.HashSet;
@@ -34,6 +35,7 @@ import org.apache.commons.lang.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.BytesSerializer;
import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.DateFormat;
import org.apache.kylin.measure.MeasureTypeFactory;
import org.apache.kylin.metadata.model.TblColRef.InnerDataTypeEnum;
@@ -196,6 +198,23 @@ public class DataType implements Serializable {
}
+ public int compare(String value1, String value2) {
+ if (isDateTimeFamily()) {
+ Long millis1 = DateFormat.stringToMillis(value1);
+ Long millis2 = DateFormat.stringToMillis(value2);
+ return millis1.compareTo(millis2);
+ } else if (isIntegerFamily()) {
+ Long l1 = new Long(value1);
+ Long l2 = new Long(value2);
+ return l1.compareTo(l2);
+ } else if (isNumberFamily()) {
+ BigDecimal bigDecimal1 = new BigDecimal(value1);
+ BigDecimal bigDecimal2 = new BigDecimal(value2);
+ return bigDecimal1.compareTo(bigDecimal2);
+ }
+ return value1.compareTo(value2);
+ }
+
private String replaceLegacy(String str) {
String replace = LEGACY_TYPE_MAP.get(str);
return replace == null ? str : replace;