You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/11/17 03:27:13 UTC
kylin git commit: KYLIN-1851 Add unit test of unsorted data and
modify TrieDictionaryForest a little
Repository: kylin
Updated Branches:
refs/heads/master 4fcaa8ac3 -> 3b1850eae
KYLIN-1851 Add unit test of unsorted data and modify TrieDictionaryForest a little
Signed-off-by: Li Yang <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/3b1850ea
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/3b1850ea
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/3b1850ea
Branch: refs/heads/master
Commit: 3b1850eae6ff9ec2ea5cd2d03fb1fa4e05aaafed
Parents: 4fcaa8a
Author: xiefan46 <95...@qq.com>
Authored: Thu Nov 17 10:22:28 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Nov 17 11:26:45 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/dict/TrieDictionaryForest.java | 59 +++++++++++++-------
.../kylin/dict/TrieDictionaryForestTest.java | 43 ++++++++++++++
2 files changed, 82 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/3b1850ea/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index 9a26c4c..eb33a5a 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -53,16 +53,19 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
private int baseId;
+ private ArrayList<ByteArray> maxValue;
+
public TrieDictionaryForest() { // default constructor for Writable interface
}
public TrieDictionaryForest(ArrayList<TrieDictionary<T>> trees, ArrayList<ByteArray> valueDivide, //
- ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) {
+ ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) {
this.trees = trees;
this.valueDivide = valueDivide;
this.accuOffset = accuOffset;
this.bytesConvert = bytesConverter;
this.baseId = baseId;
+ initMaxValue();
}
@Override
@@ -116,27 +119,30 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
// id = tree_inner_offset + accumulate_offset + baseId
protected int _getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException {
- ByteArray search = new ByteArray(value, offset, len);
- int index = findIndexByValue(search);
- if (index < 0) {
+ int index;
+ if (trees.size() == 1) {
+ index = 0;
+ } else {
+ ByteArray search = new ByteArray(value, offset, len);
+ index = findIndexByValue(search);
+ if (index < 0) {
+ if (roundingFlag > 0) {
+ return getMinId(); //searching value smaller than the smallest value in dict
+ } else {
+ throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
+ }
+ }
+
if (roundingFlag > 0) {
- return getMinId(); //searching value smaller than the smallest value in dict
- } else {
- throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
+ ByteArray maxValueOfTree = maxValue.get(index);
+ if (search.compareTo(maxValueOfTree) > 0)
+ index++;
+ if (index >= trees.size())
+ throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
}
}
- int id;
- if (roundingFlag > 0) {
- T curTreeMax = trees.get(index).getValueFromId(trees.get(index).getMaxId());
- byte[] b1 = bytesConvert.convertToBytes(curTreeMax);
- ByteArray ba1 = new ByteArray(b1, 0, b1.length);
- if (search.compareTo(ba1) > 0)
- index++;
- if (index >= trees.size())
- throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
- }
TrieDictionary<T> tree = trees.get(index);
- id = tree.getIdFromValueBytes(value, offset, len, roundingFlag);
+ int id = tree.getIdFromValueBytes(value, offset, len, roundingFlag);
id = id + accuOffset.get(index);
id += baseId;
return id;
@@ -154,7 +160,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
@Override
protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) throws IllegalArgumentException {
- int index = findIndexById(id);
+ int index = (trees.size() == 1) ? 0 : findIndexById(id);
int treeInnerOffset = getTreeInnerOffset(id, index);
TrieDictionary<T> tree = trees.get(index);
int size = tree.getValueBytesFromIdImpl(treeInnerOffset, returnValue, offset);
@@ -163,7 +169,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
@Override
protected byte[] getValueBytesFromIdImpl(int id) throws IllegalArgumentException {
- int index = findIndexById(id);
+ int index = (trees.size() == 1) ? 0 : findIndexById(id);
int treeInnerOffset = getTreeInnerOffset(id, index);
TrieDictionary<T> tree = trees.get(index);
byte[] result = tree.getValueBytesFromId(treeInnerOffset);
@@ -264,6 +270,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
dict.readFields(in);
trees.add(dict);
}
+ initMaxValue();
} catch (Exception e) {
if (e instanceof RuntimeException)
throw (RuntimeException) e;
@@ -363,6 +370,18 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
}
}
+ private void initMaxValue() throws IllegalStateException {
+ this.maxValue = new ArrayList<>();
+ if (this.trees == null || trees.isEmpty())
+ throw new IllegalStateException("Trees not init yet. Could not init max value of each tree");
+ for (int i = 0; i < trees.size(); i++) {
+ T curTreeMax = trees.get(i).getValueFromId(trees.get(i).getMaxId());
+ byte[] b1 = bytesConvert.convertToBytes(curTreeMax);
+ ByteArray ba1 = new ByteArray(b1, 0, b1.length);
+ this.maxValue.add(ba1);
+ }
+ }
+
public static void main(String[] args) {
ArrayList<String> list = new ArrayList<>();
list.add("\u4e00");
http://git-wip-us.apache.org/repos/asf/kylin/blob/3b1850ea/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index 3def7e0..07511d1 100755
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -38,6 +38,7 @@ import static org.junit.Assert.assertEquals;
public class TrieDictionaryForestTest {
+
@Test
public void testBasicFound() {
ArrayList<String> strs = new ArrayList<String>();
@@ -432,6 +433,48 @@ public class TrieDictionaryForestTest {
}
}
+ @Test
+ public void testUnsortedData(){
+ ArrayList<String> strs = new ArrayList<>();
+ Iterator<String> it = new RandomStrings(10000).iterator();
+ int totalSize = 0;
+ final StringBytesConverter converter = new StringBytesConverter();
+ while (it.hasNext()) {
+ String str = it.next();
+ byte[] data = converter.convertToBytes(str);
+ if (data != null) {
+ totalSize += data.length;
+ }
+ strs.add(str);
+ }
+ Collections.shuffle(strs);
+ int baseId = 20;
+ int maxTreeSize = totalSize / 10;
+ System.out.println("data size:" + totalSize / 1024 + "KB max tree size:" + maxTreeSize / 1024 + "KB");
+ //test maintain one trie
+ TrieDictionaryForestBuilder<String> builder = new TrieDictionaryForestBuilder<String>(converter);
+ builder.setMaxTrieTreeSize(maxTreeSize);
+ for(String str : strs){
+ builder.addValue(str);
+ }
+ TrieDictionaryForest<String> dict = builder.build();
+ assertEquals(1,dict.getTrees().size());
+ //test throws Exception
+ Collections.sort(strs);
+ strs.add("f");
+ strs.add("a");
+ builder = new TrieDictionaryForestBuilder<String>(converter);
+ builder.setMaxTrieTreeSize(maxTreeSize);
+ try {
+ for (String str : strs)
+ builder.addValue(str);
+ dict = builder.build();
+ fail("Input data no sorted and builder have multi trees. Should throw IllegalStateException");
+ }catch (IllegalStateException e){
+ //correct
+ }
+ }
+
/*
can not pass cases like 1.7695564055819624E-4
*/