You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/01/03 07:43:34 UTC
kylin git commit: KYLIN-2322 fix TictionaryDictionaryForest cache bug
and add CacheDictionary interface
Repository: kylin
Updated Branches:
refs/heads/master d7971d9bd -> 1d6a36bf6
KYLIN-2322 fix TictionaryDictionaryForest cache bug and add CacheDictionary interface
Signed-off-by: Li Yang <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1d6a36bf
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1d6a36bf
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1d6a36bf
Branch: refs/heads/master
Commit: 1d6a36bf66085e0bed79876379b21677ec55788f
Parents: d7971d9
Author: xiefan46 <95...@qq.com>
Authored: Wed Dec 28 18:10:07 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Tue Jan 3 15:43:24 2017 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 2 +-
.../apache/kylin/common/util/Dictionary.java | 2 +
.../apache/kylin/dict/AppendTrieDictionary.java | 46 +----
.../org/apache/kylin/dict/CacheDictionary.java | 107 +++++++++++
.../apache/kylin/dict/DateStrDictionary.java | 1 +
.../org/apache/kylin/dict/NumberDictionary.java | 26 +--
.../apache/kylin/dict/TimeStrDictionary.java | 1 +
.../org/apache/kylin/dict/TrieDictionary.java | 137 +-------------
.../apache/kylin/dict/TrieDictionaryForest.java | 45 +----
.../kylin/dict/TrieDictionaryForestBuilder.java | 1 -
.../MultipleDictionaryValueEnumeratorTest.java | 1 +
.../dict/TrieDictionaryForestBenchmark.java | 180 +++++++++++++++++++
.../kylin/dict/TrieDictionaryForestTest.java | 35 ++--
13 files changed, 325 insertions(+), 259 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index d73b694..bb8880b 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -217,7 +217,7 @@ abstract public class KylinConfigBase implements Serializable {
// ============================================================================
public boolean isUseForestTrieDictionary() {
- return Boolean.parseBoolean(getOptional("kylin.dictionary.use-forest-trie", "false"));
+ return Boolean.parseBoolean(getOptional("kylin.dictionary.use-forest-trie", "true"));
}
public int getTrieDictionaryForestMaxTrieSizeMB() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
index 1e172bc..03996a7 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
@@ -254,4 +254,6 @@ abstract public class Dictionary<T> implements Serializable {
*/
public abstract void readFields(DataInput in) throws IOException;
+
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
index 32bfde6..503c29e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
@@ -27,10 +27,8 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
-import java.lang.ref.SoftReference;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedList;
@@ -76,7 +74,7 @@ import org.slf4j.LoggerFactory;
* @author sunyerui
*/
@SuppressWarnings({ "rawtypes", "unchecked", "serial" })
-public class AppendTrieDictionary<T> extends Dictionary<T> {
+public class AppendTrieDictionary<T> extends CacheDictionary<T> {
public static final byte[] HEAD_MAGIC = new byte[] { 0x41, 0x70, 0x70, 0x65, 0x63, 0x64, 0x54, 0x72, 0x69, 0x65, 0x44, 0x69, 0x63, 0x74 }; // "AppendTrieDict"
public static final int HEAD_SIZE_I = HEAD_MAGIC.length;
@@ -87,22 +85,16 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
private static final Logger logger = LoggerFactory.getLogger(AppendTrieDictionary.class);
transient private String baseDir;
- transient private int baseId;
transient private int maxId;
transient private int maxValueLength;
transient private int nValues;
- transient private BytesConverter<T> bytesConverter;
volatile private TreeMap<DictSliceKey, DictSlice> dictSliceMap;
- transient private boolean enableValueCache = true;
- transient private SoftReference<HashMap> valueToIdCache;
// Constructor both for build and deserialize
public AppendTrieDictionary() {
- if (enableValueCache) {
- valueToIdCache = new SoftReference<>(new HashMap());
- }
+ enableCache();
}
public void initParams(String baseDir, int baseId, int maxId, int maxValueLength, int nValues, BytesConverter bytesConverter) throws IOException {
@@ -111,7 +103,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
this.maxId = maxId;
this.maxValueLength = maxValueLength;
this.nValues = nValues;
- this.bytesConverter = bytesConverter;
+ this.bytesConvert = bytesConverter;
}
public void initDictSliceMap(CachedTreeMap dictMap) throws IOException {
@@ -893,7 +885,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
} else {
logger.info("GlobalDict {} exist, append value", resourcePath);
builder = new Builder<>(resourcePath, dictToUse, dictToUse.baseDir, dictToUse.maxId, dictToUse.maxValueLength,
- dictToUse.nValues, dictToUse.bytesConverter, dictToUse.writeDictMap());
+ dictToUse.nValues, dictToUse.bytesConvert, dictToUse.writeDictMap());
}
return builder;
@@ -1156,31 +1148,6 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
return maxValueLength;
}
- @Override
- final protected int getIdFromValueImpl(T value, int roundingFlag) {
- if (enableValueCache && roundingFlag == 0) {
- HashMap cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory
- if (cache != null) {
- Integer id = null;
- id = (Integer) cache.get(value);
- if (id != null)
- return id.intValue();
-
- byte[] valueBytes = bytesConverter.convertToBytes(value);
- id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-
- cache.put(value, id);
- return id;
- }
- }
- byte[] valueBytes = bytesConverter.convertToBytes(value);
- return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
- }
-
- @Override
- final protected T getValueFromIdImpl(int id) {
- throw new UnsupportedOperationException("AppendTrieDictionary can't retrive value from id");
- }
@Override
protected byte[] getValueBytesFromIdImpl(int id) {
@@ -1198,7 +1165,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
indexOut.writeInt(maxId);
indexOut.writeInt(maxValueLength);
indexOut.writeInt(nValues);
- indexOut.writeUTF(bytesConverter.getClass().getName());
+ indexOut.writeUTF(bytesConvert.getClass().getName());
dictSliceMap.write(indexOut);
dictSliceMap.commit(keepAppend);
}
@@ -1208,7 +1175,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
public AppendTrieDictionary copyToAnotherMeta(KylinConfig srcConfig, KylinConfig dstConfig) throws IOException {
Configuration conf = new Configuration();
AppendTrieDictionary newDict = new AppendTrieDictionary();
- newDict.initParams(baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()), baseId, maxId, maxValueLength, nValues, bytesConverter);
+ newDict.initParams(baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()), baseId, maxId, maxValueLength, nValues, bytesConvert);
newDict.initDictSliceMap((CachedTreeMap)dictSliceMap);
logger.info("Copy AppendDict from {} to {}", this.baseDir, newDict.baseDir);
Path srcPath = new Path(this.baseDir);
@@ -1256,6 +1223,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T> {
}
}
+
@Override
public void dump(PrintStream out) {
out.println("Total " + nValues + " values, " + (dictSliceMap == null ? 0 : dictSliceMap.size()) + " slice");
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
new file mode 100644
index 0000000..575358e
--- /dev/null
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.apache.kylin.common.util.Dictionary;
+
+import java.lang.ref.SoftReference;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Created by xiefan on 16-12-30.
+ */
+abstract public class CacheDictionary<T> extends Dictionary<T> {
+ private static final long serialVersionUID = 1L;
+
+ transient protected boolean enableValueCache = true;
+
+ transient private SoftReference<Map> valueToIdCache;
+
+ transient private SoftReference<Object[]> idToValueCache;
+
+ transient protected int baseId;
+
+ transient protected BytesConverter<T> bytesConvert;
+
+ public CacheDictionary() {
+
+ }
+
+ //value --> id
+ @Override
+ final protected int getIdFromValueImpl(T value, int roundingFlag) {
+ if (enableValueCache && roundingFlag == 0) {
+ Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory
+ if (cache != null) {
+ Integer id = null;
+ id = (Integer) cache.get(value);
+ if (id != null)
+ return id.intValue();
+ byte[] valueBytes = bytesConvert.convertToBytes(value);
+ id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
+ cache.put(value, id);
+ return id;
+ }
+ }
+ byte[] valueBytes = bytesConvert.convertToBytes(value);
+ return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
+ }
+
+ //id --> value
+ @Override
+ final protected T getValueFromIdImpl(int id) {
+ if (enableValueCache) {
+ Object[] cache = idToValueCache.get();
+ if (cache != null) {
+ int seq = calcSeqNoFromId(id);
+ if (cache[seq] != null)
+ return (T) cache[seq];
+ byte[] valueBytes = getValueBytesFromIdImpl(id);
+ T value = bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length);
+ cache[seq] = value;
+ return value;
+ }
+ }
+ byte[] valueBytes = getValueBytesFromIdImpl(id);
+ return bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length);
+ }
+
+ final protected int calcSeqNoFromId(int id) {
+ int seq = id - baseId;
+ if (seq < 0 || seq >= getSize()) {
+ throw new IllegalArgumentException("Not a valid ID: " + id);
+ }
+ return seq;
+ }
+
+ final public void enableCache() {
+ this.enableValueCache = true;
+ if (this.valueToIdCache == null)
+ this.valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
+ if (this.idToValueCache == null)
+ this.idToValueCache = new SoftReference<Object[]>(new Object[getSize()]);
+ }
+
+ final public void disableCache() {
+ this.enableValueCache = false;
+ this.valueToIdCache = null;
+ this.idToValueCache = null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index ee8534f..29bbee2 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -156,6 +156,7 @@ public class DateStrDictionary extends Dictionary<String> {
init(pattern, baseId);
}
+
@Override
public int hashCode() {
return 31 * baseId + pattern.hashCode();
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
index 12efbd3..f1b1b3d 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
@@ -194,30 +194,6 @@ public class NumberDictionary<T> extends TrieDictionary<T> {
return codec.decodeNumber(returnValue, offset);
}
- @Override
- public void enableIdToValueBytesCache() {
- enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() {
- NumberBytesCodec codec = getCodec();
- byte[] tmp = new byte[getSizeOfValue()];
-
- @Override
- public byte[] getBuffer() {
- return codec.buf;
- }
-
- @Override
- public byte[] makeValueBytes(byte[] buf, int length) {
- // the given buf is the codec buf, which we returned in getBuffer()
- codec.bufOffset = 0;
- codec.bufLen = length;
- int numLen = codec.decodeNumber(tmp, 0);
-
- byte[] result = new byte[numLen];
- System.arraycopy(tmp, 0, result, 0, numLen);
- return result;
- }
- });
- }
public static void main(String[] args) throws Exception {
NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<String>(new StringBytesConverter());
@@ -227,7 +203,7 @@ public class NumberDictionary<T> extends TrieDictionary<T> {
b.addValue("7");
TrieDictionary<String> dict = b.build(0);
- dict.enableIdToValueBytesCache();
+ //dict.enableIdToValueBytesCache();
for (int i = 0; i <= dict.getMaxId(); i++) {
System.out.println(Bytes.toString(dict.getValueBytesFromId(i)));
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
index fc3db5f..eabc9f1 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
@@ -147,4 +147,5 @@ public class TimeStrDictionary extends Dictionary<String> {
@Override
public void readFields(DataInput in) throws IOException {
}
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index c099de0..957207e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@ -27,10 +27,8 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintStream;
-import java.lang.ref.SoftReference;
import java.util.Arrays;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
+
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
@@ -55,7 +53,7 @@ import com.google.common.base.Preconditions;
* @author yangli9
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
-public class TrieDictionary<T> extends Dictionary<T> {
+public class TrieDictionary<T> extends CacheDictionary<T> {
private static final long serialVersionUID = 1L;
public static final byte[] MAGIC = new byte[] { 0x54, 0x72, 0x69, 0x65, 0x44, 0x69, 0x63, 0x74 }; // "TrieDict"
@@ -74,21 +72,13 @@ public class TrieDictionary<T> extends Dictionary<T> {
transient private int bodyLen;
transient private int sizeChildOffset;
transient private int sizeNoValuesBeneath;
- transient private int baseId;
transient private int maxValueLength;
- transient private BytesConverter<T> bytesConvert;
transient private int nValues;
transient private int sizeOfId;
transient private long childOffsetMask;
transient private int firstByteOffset;
- transient private boolean enableValueCache = true;
- transient private SoftReference<Map> valueToIdCache;
- transient private SoftReference<Object[]> idToValueCache;
-
- transient private boolean enableIdToValueBytesCache = false;
- transient private byte[][] idToValueBytesCache;
public TrieDictionary() { // default constructor for Writable interface
}
@@ -120,16 +110,13 @@ public class TrieDictionary<T> extends Dictionary<T> {
this.sizeOfId = BytesUtil.sizeForValue(baseId + nValues + 1); // note baseId could raise 1 byte in ID space, +1 to reserve all 0xFF for NULL case
this.childOffsetMask = ~((long) (BIT_IS_LAST_CHILD | BIT_IS_END_OF_VALUE) << ((sizeChildOffset - 1) * 8));
this.firstByteOffset = sizeChildOffset + sizeNoValuesBeneath + 1; // the offset from begin of node to its first value byte
+ enableCache();
} catch (Exception e) {
if (e instanceof RuntimeException)
throw (RuntimeException) e;
else
throw new RuntimeException(e);
}
- if (enableValueCache) {
- valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
- idToValueCache = new SoftReference<Object[]>(new Object[nValues]);
- }
}
@Override
@@ -152,26 +139,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
return maxValueLength;
}
- @Override
- final protected int getIdFromValueImpl(T value, int roundingFlag) {
- if (enableValueCache && roundingFlag == 0) {
- Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory
- if (cache != null) {
- Integer id = null;
- id = (Integer) cache.get(value);
- if (id != null)
- return id.intValue();
-
- byte[] valueBytes = bytesConvert.convertToBytes(value);
- id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-
- cache.put(value, id);
- return id;
- }
- }
- byte[] valueBytes = bytesConvert.convertToBytes(value);
- return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
- }
@Override
protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) {
@@ -267,35 +234,9 @@ public class TrieDictionary<T> extends Dictionary<T> {
return k;
}
- @Override
- final protected T getValueFromIdImpl(int id) {
- if (enableValueCache) {
- Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory
- if (cache != null) {
- int seq = calcSeqNoFromId(id);
- if (cache[seq] != null)
- return (T) cache[seq];
-
- byte[] value = new byte[getSizeOfValue()];
- int length = getValueBytesFromId(id, value, 0);
- T result = bytesConvert.convertFromBytes(value, 0, length);
-
- cache[seq] = result;
- return result;
- }
- }
- byte[] value = new byte[getSizeOfValue()];
- int length = getValueBytesFromId(id, value, 0);
- return bytesConvert.convertFromBytes(value, 0, length);
- }
@Override
protected byte[] getValueBytesFromIdImpl(int id) {
- if (enableIdToValueBytesCache) {
- int seq = calcSeqNoFromId(id);
- return idToValueBytesCache[seq];
- }
-
byte[] buf = new byte[maxValueLength];
int len = getValueBytesFromIdImpl(id, buf, 0);
@@ -363,68 +304,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
}
}
- public void enableIdToValueBytesCache() {
- enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() {
- @Override
- public byte[] getBuffer() {
- return new byte[getSizeOfValue()];
- }
-
- @Override
- public byte[] makeValueBytes(byte[] buf, int length) {
- byte[] valueBytes = new byte[length];
- System.arraycopy(buf, 0, valueBytes, 0, length);
- return valueBytes;
- }
- });
- }
-
- interface EnableIdToValueBytesCacheVisitor {
- byte[] getBuffer();
-
- byte[] makeValueBytes(byte[] buf, int length);
- }
-
- protected void enableIdToValueBytesCache(EnableIdToValueBytesCacheVisitor visitor) {
- enableIdToValueBytesCache = true;
- idToValueBytesCache = new byte[nValues][];
- enableIdToValueBytesCache_recursion(headSize, 0, visitor.getBuffer(), 0, visitor);
- }
-
- private void enableIdToValueBytesCache_recursion(int n, int seq, byte[] buf, int tail, EnableIdToValueBytesCacheVisitor visitor) {
- // write current node value
- int p = n + firstByteOffset;
- int len = BytesUtil.readUnsigned(trieBytes, p - 1, 1);
- System.arraycopy(trieBytes, p, buf, tail, len);
- tail += len;
-
- // if the value is ended
- boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE);
- if (isEndOfValue) {
- idToValueBytesCache[seq] = visitor.makeValueBytes(buf, tail);
- seq++;
- }
-
- // find a child to continue
- int c = getChildOffset(n);
- if (c == headSize) // has no children
- return;
-
- // process each child
- while (true) {
- enableIdToValueBytesCache_recursion(c, seq, buf, tail, visitor);
-
- int nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
- seq += nValuesBeneath;
-
- // go next child
- if (checkFlag(c, BIT_IS_LAST_CHILD))
- break; // no more child? we are done
- p = c + firstByteOffset;
- c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
- }
- }
-
private boolean checkFlag(int offset, int bit) {
return (trieBytes[offset] & bit) > 0;
}
@@ -436,14 +315,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
return baseId + seq;
}
- private int calcSeqNoFromId(int id) {
- int seq = id - baseId;
- if (seq < 0 || seq >= nValues) {
- throw new IllegalArgumentException("Not a valid ID: " + id);
- }
- return seq;
- }
-
@Override
public void write(DataOutput out) throws IOException {
out.write(trieBytes);
@@ -552,7 +423,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
Preconditions.checkArgument(dict2.contains(dict));
Preconditions.checkArgument(dict.equals(dict2));
- dict2.enableIdToValueBytesCache();
+ //dict2.enableIdToValueBytesCache();
for (int i = 0; i <= dict.getMaxId(); i++) {
System.out.println(Bytes.toString(dict.getValueBytesFromId(i)));
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index e746348..c655854 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
@@ -40,18 +41,14 @@ import org.apache.kylin.common.util.Dictionary;
* <p>
* Created by xiefan on 16-10-26.
*/
-public class TrieDictionaryForest<T> extends Dictionary<T> {
+public class TrieDictionaryForest<T> extends CacheDictionary<T> {
private static final long serialVersionUID = 1L;
private ArrayList<TrieDictionary<T>> trees;
private ArrayList<ByteArray> valueDivide;
- private ArrayList<Integer> accuOffset; //find tree
-
- private BytesConverter<T> bytesConvert;
-
- private int baseId;
+ private ArrayList<Integer> accuOffset;
private ArrayList<ByteArray> maxValue;
@@ -66,6 +63,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
this.bytesConvert = bytesConverter;
this.baseId = baseId;
initMaxValue();
+ enableCache();
}
@Override
@@ -102,12 +100,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
return maxValue;
}
- // value --> id
- @Override
- protected int getIdFromValueImpl(T value, int roundingFlag) throws IllegalArgumentException {
- byte[] valueBytes = bytesConvert.convertToBytes(value);
- return getIdFromValueBytesImpl(valueBytes, 0, valueBytes.length, roundingFlag);
- }
@Override
protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException {
@@ -148,15 +140,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
return id;
}
- @Override
- protected T getValueFromIdImpl(int id) throws IllegalArgumentException {
- byte[] data = getValueBytesFromIdImpl(id);
- if (data != null) {
- return bytesConvert.convertFromBytes(data, 0, data.length);
- } else {
- return null;
- }
- }
+
@Override
protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) throws IllegalArgumentException {
@@ -271,6 +255,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
trees.add(dict);
}
initMaxValue();
+ enableCache();
} catch (Exception e) {
if (e instanceof RuntimeException)
throw (RuntimeException) e;
@@ -383,22 +368,4 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
}
}
- public static void main(String[] args) {
- ArrayList<String> list = new ArrayList<>();
- list.add("\u4e00");
- list.add("\u4e8c");
- list.add("\u4e09");
- list.add("");
- list.add("part");
- list.add("par");
- list.add("partition");
- list.add("party");
- list.add("parties");
- list.add("paint");
- Collections.sort(list);
- for (String str : list) {
- System.out.println("found value:" + str + " index:" + lowerBound(str, list));
- }
- }
-
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 4ee30f0..af2e302 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -111,7 +111,6 @@ public class TrieDictionaryForestBuilder<T> {
reset();
}
TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees, this.valueDivide, this.accuOffset, this.bytesConverter, baseId);
-
// if input values are not in ascending order and tree num>1,TrieDictionaryForest can not work correctly.
if (forest.getTrees().size() > 1 && !isOrdered) {
throw new IllegalStateException("Invalid input data. Unordered data can not be split into multi trees");
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
index ad166c2..73e0935 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
@@ -158,6 +158,7 @@ public class MultipleDictionaryValueEnumeratorTest {
@Override
public void readFields(DataInput in) throws IOException {}
+
@Override
public boolean contains(Dictionary another) {
return false;
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
new file mode 100644
index 0000000..0b4c0e3
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.apache.kylin.common.util.Dictionary;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+/**
+ * Created by xiefan on 16-12-28.
+ */
+@Ignore
+public class TrieDictionaryForestBenchmark {
+
+ private static final Random rand = new Random(System.currentTimeMillis());
+
+ private CacheDictionary<String> oldDict;
+
+ private CacheDictionary<String> newDict;
+
+ private ArrayList<String> rawData;
+
+ private int cardnality = 100;
+
+ private int testTimes = 100000;
+
+ @Before
+ public void before() {
+ int dataSize = 100 * 10000;
+ TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter());
+ TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5);
+ this.rawData = genStringDataSet(dataSize);
+ for (String str : this.rawData) {
+ b1.addValue(str);
+ b2.addValue(str);
+ }
+ this.oldDict = b1.build(0);
+ this.newDict = b2.build();
+ System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size());
+ }
+
+ @Test
+ public void testAll() {
+ benchmarkWithoutCache();
+ benchmarkWithCache();
+ }
+
+ @Test
+ public void benchmarkWithoutCache() {
+ oldDict.disableCache();
+ newDict.disableCache();
+ runBenchmark("benchmarkWithoutCache");
+ }
+
+ @Test
+ public void benchmarkWithCache() {
+ oldDict.enableCache();
+ newDict.enableCache();
+ runBenchmark("benchmarkWithCache");
+ }
+
+ private void runBenchmark(String testName) {
+ long oldTime = runQueryValue(oldDict, cardnality, testTimes);
+ long oldTime2 = runQueryId(rawData, oldDict, cardnality, testTimes);
+ long oldTime3 = runQueryValueBytes(oldDict, cardnality, testTimes);
+ long oldTime4 = runQueryValueBytes2(oldDict, cardnality, testTimes);
+ long oldTime5 = runQueryIdByValueBytes(rawData, oldDict, cardnality, testTimes);
+ long newTime = runQueryValue(newDict, cardnality, testTimes);
+ long newTime2 = runQueryId(rawData, newDict, cardnality, testTimes);
+ long newTime3 = runQueryValueBytes(newDict, cardnality, testTimes);
+ long newTime4 = runQueryValueBytes2(newDict, cardnality, testTimes);
+ long newTime5 = runQueryIdByValueBytes(rawData, newDict, cardnality, testTimes);
+ System.out.println(testName);
+ System.out.println("old dict value --> id : " + oldTime2);
+ System.out.println("new dict value --> id :" + newTime2);
+ System.out.println("old dict value bytes --> id : " + oldTime5);
+ System.out.println("new dict value bytes--> id :" + newTime5);
+ System.out.println("old dict id --> value : " + oldTime);
+ System.out.println("new dict id --> value : " + newTime);
+ System.out.println("old dict id --> value bytes : " + oldTime3);
+ System.out.println("new dict id --> value bytes : " + newTime3);
+ System.out.println("old dict id --> value bytes (method 2): " + oldTime4);
+ System.out.println("new dict id --> value bytes (method 2): " + newTime4);
+ }
+
+ //id -- value
+ private long runQueryValue(Dictionary<String> dict, int cardnality, int testTimes) {
+ long startTime = System.currentTimeMillis();
+ int step = 1;
+ for (int i = 0; i < testTimes; i++) {
+ for (int j = 0; j < cardnality; j++) {
+ step |= dict.getValueFromId(j).length();
+ }
+ }
+ return System.currentTimeMillis() - startTime;
+ }
+
+ private long runQueryValueBytes(Dictionary<String> dict, int cardnality, int testTimes) {
+ long startTime = System.currentTimeMillis();
+ int step = 1;
+ for (int i = 0; i < testTimes; i++) {
+ for (int j = 0; j < cardnality; j++) {
+ step |= dict.getValueBytesFromId(j).length;
+ }
+ }
+ return System.currentTimeMillis() - startTime;
+ }
+
+ private long runQueryValueBytes2(Dictionary<String> dict, int cardnality, int testTimes) {
+ long startTime = System.currentTimeMillis();
+ int step = 1;
+ byte[] returnValue = new byte[2048];
+ for (int i = 0; i < testTimes; i++) {
+ for (int j = 0; j < cardnality; j++) {
+ int size = dict.getValueBytesFromId(j, returnValue, 0);
+ step |= size;
+ }
+ }
+ return System.currentTimeMillis() - startTime;
+ }
+
+ private long runQueryId(ArrayList<String> rawData, Dictionary<String> dict, int cardnality, int testTimes) {
+ long startTime = System.currentTimeMillis();
+ int step = 1;
+ for (int i = 0; i < testTimes; i++) {
+ for (int j = 0; j < cardnality; j++) {
+ step |= dict.getIdFromValue(rawData.get(j));
+ }
+ }
+ return System.currentTimeMillis() - startTime;
+ }
+
+ private long runQueryIdByValueBytes(ArrayList<String> rawData, Dictionary<String> dict, int cardnality, int testTimes) {
+ List<byte[]> testBytes = new ArrayList<>();
+ StringBytesConverter converter = new StringBytesConverter();
+ for (int i = 0; i < cardnality; i++) {
+ testBytes.add(converter.convertToBytes(rawData.get(i)));
+ }
+ long startTime = System.currentTimeMillis();
+ int step = 1;
+ for (int i = 0; i < testTimes; i++) {
+ for (int j = 0; j < cardnality; j++) {
+ step |= dict.getIdFromValueBytes(testBytes.get(j), 0, testBytes.get(j).length);
+ }
+ }
+ return System.currentTimeMillis() - startTime;
+ }
+
+ private ArrayList<String> genStringDataSet(int totalSize) {
+ ArrayList<String> data = new ArrayList<>();
+ for (int i = 0; i < totalSize; i++) {
+ data.add(UUID.randomUUID().toString());
+ }
+ Collections.sort(data);
+ return data;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index ee092c9..68cf301 100755
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -58,7 +58,7 @@ public class TrieDictionaryForestTest {
TrieDictionaryForest<String> dict = builder.build();
assertSameBehaviorAsTrie(dict, strs, 0);
}
-
+
@Test
public void testBasicFound() {
ArrayList<String> strs = new ArrayList<String>();
@@ -106,7 +106,7 @@ public class TrieDictionaryForestTest {
assertEquals(expectId, dict.getIdFromValue(s));
expectId++;
}
-
+
assertSameBehaviorAsTrie(dict, strs, baseId);
}
@@ -128,7 +128,7 @@ public class TrieDictionaryForestTest {
assertEquals(255, id);
id = dict.getIdFromValue(null, -1);
assertEquals(255, id);
-
+
assertSameBehaviorAsTrie(dict, strs, 0);
}
@@ -263,7 +263,6 @@ public class TrieDictionaryForestTest {
}
@Test
- @Ignore
public void categoryNamesTest() throws Exception {
InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
ArrayList<String> str = loadStrings(is);
@@ -284,19 +283,19 @@ public class TrieDictionaryForestTest {
}
@Test
- public void emptyDictTest() throws Exception{
+ public void emptyDictTest() throws Exception {
TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter());
TrieDictionaryForest<String> dict = b.build();
- try{
+ try {
int id = dict.getIdFromValue("123", 0);
fail("id should not exist");
- }catch (IllegalArgumentException e){
+ } catch (IllegalArgumentException e) {
//right
}
- try{
+ try {
String value = dict.getValueFromIdImpl(123);
fail("value should not exist");
- }catch (IllegalArgumentException e){
+ } catch (IllegalArgumentException e) {
//right
}
}
@@ -732,12 +731,6 @@ public class TrieDictionaryForestTest {
System.out.println("compare build time. Old trie : " + oldDictTotalBuildTime / 1000.0 + "s.New trie : " + newDictTotalBuildTime / 1000.0 + "s");
}
- @Test
- public void queryTimeBenchmarkTest() throws Exception {
- int count = (int) (Integer.MAX_VALUE * 0.8 / 640);
- benchmarkStringDictionary(new RandomStrings(count));
- }
-
private void evaluateDataSize(ArrayList<String> list) {
long size = 0;
for (String str : list)
@@ -867,7 +860,6 @@ public class TrieDictionaryForestTest {
int baseId = new Random().nextInt(100);
TrieDictionaryForestBuilder<String> b = newDictBuilder(str, baseId, 2);
TrieDictionaryForest<String> dict = b.build();
- //dict.dump(System.out);
TreeSet<String> set = new TreeSet<String>();
for (String s : str) {
set.add(s);
@@ -881,7 +873,7 @@ public class TrieDictionaryForestTest {
int id = baseId;
for (; it.hasNext(); id++) {
String value = it.next();
- // System.out.println("checking " + id + " <==> " + value);
+ //System.out.println("checking " + id + " <==> " + value);
assertEquals(id, dict.getIdFromValue(value));
assertEquals(value, dict.getValueFromId(id));
@@ -892,7 +884,7 @@ public class TrieDictionaryForestTest {
for (String s : notFound) {
try {
int nullId = dict.getIdFromValue(s);
- System.out.println("null value id:" + nullId);
+ //System.out.println("null value id:" + nullId);
fail("For not found value '" + s + "', IllegalArgumentException is expected");
} catch (IllegalArgumentException e) {
// good
@@ -938,8 +930,9 @@ public class TrieDictionaryForestTest {
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId, int treeSize) {
TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId);
b.setMaxTrieTreeSize(treeSize);
- for (String s : strs)
+ for (String s : strs) {
b.addValue(s);
+ }
return b;
}
@@ -950,7 +943,7 @@ public class TrieDictionaryForestTest {
b.addValue(strs.next());
return b;
}
-
+
private static class RandomStrings implements Iterable<String> {
final private int size;
@@ -1039,7 +1032,7 @@ public class TrieDictionaryForestTest {
trieBuilder.addValue(s);
}
TrieDictionary<String> trie = trieBuilder.build(baseId);
-
+
assertEquals(trie.getMaxId(), dict.getMaxId());
assertEquals(trie.getMinId(), dict.getMinId());
assertEquals(trie.getSize(), dict.getSize());