You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/10/29 10:36:11 UTC
[2/2] incubator-kylin git commit: KYLIN-1099 Code review
KYLIN-1099 Code review
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9b0d0c97
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9b0d0c97
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9b0d0c97
Branch: refs/heads/1.x-staging
Commit: 9b0d0c9797c6c092ba777692861475f239a81c31
Parents: 41477e6
Author: Li, Yang <ya...@ebay.com>
Authored: Thu Oct 29 17:35:24 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Oct 29 17:35:59 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/dict/Dictionary.java | 4 +++
.../apache/kylin/dict/DictionaryGenerator.java | 29 +++++++-------------
.../apache/kylin/dict/DictionaryManager.java | 1 +
.../kylin/dict/IDictionaryValueEnumerator.java | 3 --
.../dict/MultipleDictionaryValueEnumerator.java | 2 ++
5 files changed, 17 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index 14eb400..e99a553 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@ -49,6 +49,10 @@ abstract public class Dictionary<T> implements Writable {
abstract public int getMinId();
abstract public int getMaxId();
+
+ public int getSize() {
+ return getMaxId() - getMinId() + 1;
+ }
/**
* @return the size of an ID in bytes, determined by the cardinality of
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 0021c6e..e399a70 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -49,7 +49,7 @@ public class DictionaryGenerator {
try {
return KylinConfig.getInstanceFromEnv().getDictionaryMaxCardinality();
} catch (Throwable e) {
- return 2000000; // some test case does not KylinConfig setup properly
+ return 5000000; // some test case does not KylinConfig setup properly
}
}
@@ -62,11 +62,11 @@ public class DictionaryGenerator {
// build dict, case by data type
DataType dataType = DataType.getInstance(info.getDataType());
if (dataType.isDateTimeFamily())
- dict = buildDateStrDict(info, valueEnumerator, baseId, nSamples, samples);
+ dict = buildDateStrDict(valueEnumerator, baseId, nSamples, samples);
else if (dataType.isNumberFamily())
- dict = buildNumberDict(info, valueEnumerator, baseId, nSamples, samples);
+ dict = buildNumberDict(valueEnumerator, baseId, nSamples, samples);
else
- dict = buildStringDict(info, valueEnumerator, baseId, nSamples, samples);
+ dict = buildStringDict(valueEnumerator, baseId, nSamples, samples);
// log a few samples
StringBuilder buf = new StringBuilder();
@@ -76,10 +76,10 @@ public class DictionaryGenerator {
buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s));
}
logger.info("Dictionary value samples: " + buf.toString());
- logger.info("Dictionary cardinality: " + info.getCardinality());
+ logger.info("Dictionary cardinality: " + dict.getSize());
- if (dict instanceof TrieDictionary && info.getCardinality() > DICT_MAX_CARDINALITY)
- throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + info.getCardinality());
+ if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY)
+ throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + dict.getSize());
return dict;
}
@@ -105,7 +105,7 @@ public class DictionaryGenerator {
}
}
- private static Dictionary buildDateStrDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+ private static Dictionary buildDateStrDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
final int BAD_THRESHOLD = 2;
String matchPattern = null;
byte[] value;
@@ -113,7 +113,6 @@ public class DictionaryGenerator {
for (String ptn : DATE_PATTERNS) {
matchPattern = ptn; // be optimistic
int badCount = 0;
- int totalCount = 0;
SimpleDateFormat sdf = new SimpleDateFormat(ptn);
while (valueEnumerator.moveNext()) {
@@ -126,7 +125,6 @@ public class DictionaryGenerator {
sdf.parse(str);
if (samples.size() < nSamples && !samples.contains(str))
samples.add(str);
- totalCount++;
} catch (ParseException e) {
logger.info("Unrecognized datetime value: " + str);
badCount++;
@@ -137,17 +135,15 @@ public class DictionaryGenerator {
}
}
if (matchPattern != null) {
- info.setCardinality(totalCount);
return new DateStrDictionary(matchPattern, baseId);
}
}
throw new IllegalStateException("Unrecognized datetime value");
}
- private static Dictionary buildStringDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+ private static Dictionary buildStringDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
byte[] value;
- int totalCount = 0;
while (valueEnumerator.moveNext()) {
value = valueEnumerator.current();
if (value == null)
@@ -156,16 +152,13 @@ public class DictionaryGenerator {
builder.addValue(v);
if (samples.size() < nSamples && !samples.contains(v))
samples.add(v);
- totalCount++;
}
- info.setCardinality(totalCount);
return builder.build(baseId);
}
- private static Dictionary buildNumberDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+ private static Dictionary buildNumberDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter());
byte[] value;
- int totalCount = 0;
while (valueEnumerator.moveNext()) {
value = valueEnumerator.current();
if (value == null)
@@ -177,9 +170,7 @@ public class DictionaryGenerator {
builder.addValue(v);
if (samples.size() < nSamples && !samples.contains(v))
samples.add(v);
- totalCount++;
}
- info.setCardinality(totalCount);
return builder.build(baseId);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index 13da78e..1c492c4 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -122,6 +122,7 @@ public class DictionaryManager {
return getDictionaryInfo(dupDict);
}
+ newDictInfo.setCardinality(newDict.getSize());
newDictInfo.setDictionaryObject(newDict);
newDictInfo.setDictionaryClass(newDict.getClass().getName());
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
index b297d70..ecf980a 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
@@ -18,9 +18,6 @@
package org.apache.kylin.dict;
-import org.apache.calcite.linq4j.Enumerator;
-import org.apache.hadoop.fs.RemoteIterator;
-
import java.io.IOException;
/**
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
index 7321bde..4cf72d3 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
@@ -20,6 +20,7 @@ package org.apache.kylin.dict;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
+
import org.apache.kylin.common.util.Bytes;
import java.io.IOException;
@@ -29,6 +30,7 @@ import java.util.List;
/**
* Created by dongli on 10/28/15.
*/
+@SuppressWarnings("rawtypes")
public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumerator {
private HashSet<byte[]> dedup = Sets.newHashSet();
private int curDictIndex = 0;