You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/10/29 10:36:11 UTC

[2/2] incubator-kylin git commit: KYLIN-1099 Code review

KYLIN-1099 Code review


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9b0d0c97
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9b0d0c97
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9b0d0c97

Branch: refs/heads/1.x-staging
Commit: 9b0d0c9797c6c092ba777692861475f239a81c31
Parents: 41477e6
Author: Li, Yang <ya...@ebay.com>
Authored: Thu Oct 29 17:35:24 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Thu Oct 29 17:35:59 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/dict/Dictionary.java  |  4 +++
 .../apache/kylin/dict/DictionaryGenerator.java  | 29 +++++++-------------
 .../apache/kylin/dict/DictionaryManager.java    |  1 +
 .../kylin/dict/IDictionaryValueEnumerator.java  |  3 --
 .../dict/MultipleDictionaryValueEnumerator.java |  2 ++
 5 files changed, 17 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index 14eb400..e99a553 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@ -49,6 +49,10 @@ abstract public class Dictionary<T> implements Writable {
     abstract public int getMinId();
 
     abstract public int getMaxId();
+    
+    public int getSize() {
+        return getMaxId() - getMinId() + 1;
+    }
 
     /**
      * @return the size of an ID in bytes, determined by the cardinality of

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 0021c6e..e399a70 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -49,7 +49,7 @@ public class DictionaryGenerator {
         try {
             return KylinConfig.getInstanceFromEnv().getDictionaryMaxCardinality();
         } catch (Throwable e) {
-            return 2000000; // some test case does not KylinConfig setup properly
+            return 5000000; // some test case does not KylinConfig setup properly
         }
     }
 
@@ -62,11 +62,11 @@ public class DictionaryGenerator {
         // build dict, case by data type
         DataType dataType = DataType.getInstance(info.getDataType());
         if (dataType.isDateTimeFamily())
-            dict = buildDateStrDict(info, valueEnumerator, baseId, nSamples, samples);
+            dict = buildDateStrDict(valueEnumerator, baseId, nSamples, samples);
         else if (dataType.isNumberFamily())
-            dict = buildNumberDict(info, valueEnumerator, baseId, nSamples, samples);
+            dict = buildNumberDict(valueEnumerator, baseId, nSamples, samples);
         else
-            dict = buildStringDict(info, valueEnumerator, baseId, nSamples, samples);
+            dict = buildStringDict(valueEnumerator, baseId, nSamples, samples);
 
         // log a few samples
         StringBuilder buf = new StringBuilder();
@@ -76,10 +76,10 @@ public class DictionaryGenerator {
             buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s));
         }
         logger.info("Dictionary value samples: " + buf.toString());
-        logger.info("Dictionary cardinality: " + info.getCardinality());
+        logger.info("Dictionary cardinality: " + dict.getSize());
 
-        if (dict instanceof TrieDictionary && info.getCardinality() > DICT_MAX_CARDINALITY)
-            throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + info.getCardinality());
+        if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY)
+            throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + dict.getSize());
 
         return dict;
     }
@@ -105,7 +105,7 @@ public class DictionaryGenerator {
         }
     }
 
-    private static Dictionary buildDateStrDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+    private static Dictionary buildDateStrDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
         final int BAD_THRESHOLD = 2;
         String matchPattern = null;
         byte[] value;
@@ -113,7 +113,6 @@ public class DictionaryGenerator {
         for (String ptn : DATE_PATTERNS) {
             matchPattern = ptn; // be optimistic
             int badCount = 0;
-            int totalCount = 0;
             SimpleDateFormat sdf = new SimpleDateFormat(ptn);
 
             while (valueEnumerator.moveNext()) {
@@ -126,7 +125,6 @@ public class DictionaryGenerator {
                     sdf.parse(str);
                     if (samples.size() < nSamples && !samples.contains(str))
                         samples.add(str);
-                    totalCount++;
                 } catch (ParseException e) {
                     logger.info("Unrecognized datetime value: " + str);
                     badCount++;
@@ -137,17 +135,15 @@ public class DictionaryGenerator {
                 }
             }
             if (matchPattern != null) {
-                info.setCardinality(totalCount);
                 return new DateStrDictionary(matchPattern, baseId);
             }
         }
         throw new IllegalStateException("Unrecognized datetime value");
     }
 
-    private static Dictionary buildStringDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+    private static Dictionary buildStringDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
         TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
         byte[] value;
-        int totalCount = 0;
         while (valueEnumerator.moveNext()) {
             value = valueEnumerator.current();
             if (value == null)
@@ -156,16 +152,13 @@ public class DictionaryGenerator {
             builder.addValue(v);
             if (samples.size() < nSamples && !samples.contains(v))
                 samples.add(v);
-            totalCount++;
         }
-        info.setCardinality(totalCount);
         return builder.build(baseId);
     }
 
-    private static Dictionary buildNumberDict(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
+    private static Dictionary buildNumberDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException {
         NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter());
         byte[] value;
-        int totalCount = 0;
         while (valueEnumerator.moveNext()) {
             value = valueEnumerator.current();
             if (value == null)
@@ -177,9 +170,7 @@ public class DictionaryGenerator {
             builder.addValue(v);
             if (samples.size() < nSamples && !samples.contains(v))
                 samples.add(v);
-            totalCount++;
         }
-        info.setCardinality(totalCount);
         return builder.build(baseId);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index 13da78e..1c492c4 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -122,6 +122,7 @@ public class DictionaryManager {
             return getDictionaryInfo(dupDict);
         }
 
+        newDictInfo.setCardinality(newDict.getSize());
         newDictInfo.setDictionaryObject(newDict);
         newDictInfo.setDictionaryClass(newDict.getClass().getName());
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
index b297d70..ecf980a 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java
@@ -18,9 +18,6 @@
 
 package org.apache.kylin.dict;
 
-import org.apache.calcite.linq4j.Enumerator;
-import org.apache.hadoop.fs.RemoteIterator;
-
 import java.io.IOException;
 
 /**

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9b0d0c97/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
index 7321bde..4cf72d3 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java
@@ -20,6 +20,7 @@ package org.apache.kylin.dict;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+
 import org.apache.kylin.common.util.Bytes;
 
 import java.io.IOException;
@@ -29,6 +30,7 @@ import java.util.List;
 /**
  * Created by dongli on 10/28/15.
  */
+@SuppressWarnings("rawtypes")
 public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumerator {
     private HashSet<byte[]> dedup = Sets.newHashSet();
     private int curDictIndex = 0;