You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/11/30 07:32:09 UTC

[5/5] kylin git commit: cv+

cv+


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/109772d9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/109772d9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/109772d9

Branch: refs/heads/KYLIN-2217-2
Commit: 109772d917ced8025717a3421e9debc84a64aabd
Parents: f6b1167
Author: Li Yang <li...@apache.org>
Authored: Wed Nov 30 15:31:11 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Wed Nov 30 15:31:58 2016 +0800

----------------------------------------------------------------------
 .../kylin/cube/cli/DictionaryGeneratorCLI.java  |   2 +-
 .../apache/kylin/dict/DictionaryGenerator.java  | 165 ++++++++++++-------
 .../apache/kylin/dict/DictionaryManager.java    |  16 +-
 .../apache/kylin/dict/DictionaryProvider.java   |   4 +-
 .../kylin/dict/GlobalDictionaryBuilder.java     |  36 ++--
 .../apache/kylin/dict/IDictionaryBuilder.java   |  13 +-
 .../kylin/dict/DictionaryProviderTest.java      |  56 +++----
 .../engine/mr/steps/CreateDictionaryJob.java    |  42 ++---
 .../mr/steps/FactDistinctColumnsReducer.java    |  11 +-
 9 files changed, 190 insertions(+), 155 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
index a4e1df0..163c6ca 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
@@ -60,7 +60,7 @@ public class DictionaryGeneratorCLI {
         for (TblColRef col : cubeSeg.getCubeDesc().getAllColumnsNeedDictionaryBuilt()) {
             logger.info("Building dictionary for " + col);
             ReadableTable inpTable = decideInputTable(cubeSeg.getModel(), col, factTableValueProvider);
-            if (config.isReducerLocalBuildDict() && dictProvider != null) {
+            if (dictProvider != null) {
                 Dictionary<String> dict = dictProvider.getDictionary(col);
                 if (dict != null) {
                     cubeMgr.saveDictionary(cubeSeg, col, inpTable, dict);

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 810a392..cd13d59 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -19,12 +19,11 @@
 package org.apache.kylin.dict;
 
 import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.slf4j.Logger;
@@ -40,9 +39,7 @@ public class DictionaryGenerator {
 
     private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class);
 
-    private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
-
-    public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+    public static IDictionaryBuilder newDictionaryBuilder(DataType dataType) {
         Preconditions.checkNotNull(dataType, "dataType cannot be null");
 
         // build dict, case by data type
@@ -57,16 +54,33 @@ public class DictionaryGenerator {
         } else {
             builder = new StringDictBuilder();
         }
+        return builder;
+    }
 
-        return buildDictionary(builder, null, valueEnumerator);
+    public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+        return buildDictionary(newDictionaryBuilder(dataType), null, valueEnumerator);
     }
 
-    public static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+    static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
         int baseId = 0; // always 0 for now
         int nSamples = 5;
         ArrayList<String> samples = new ArrayList<String>(nSamples);
 
-        Dictionary<String> dict = builder.build(dictInfo, valueEnumerator, baseId, nSamples, samples);
+        // init the builder
+        builder.init(dictInfo, baseId);
+
+        // add values
+        while (valueEnumerator.moveNext()) {
+            String value = valueEnumerator.current();
+
+            boolean accept = builder.addValue(value);
+
+            if (accept && samples.size() < nSamples && samples.contains(value) == false)
+                samples.add(value);
+        }
+
+        // build
+        Dictionary<String> dict = builder.build();
 
         // log a few samples
         StringBuilder buf = new StringBuilder();
@@ -88,81 +102,114 @@ public class DictionaryGenerator {
     }
 
     private static class DateDictBuilder implements IDictionaryBuilder {
+        private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
+
+        private int baseId;
+        private String datePattern;
+
         @Override
-        public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            final int BAD_THRESHOLD = 0;
-            String matchPattern = null;
-            String value;
-
-            for (String ptn : DATE_PATTERNS) {
-                matchPattern = ptn; // be optimistic
-                int badCount = 0;
-                SimpleDateFormat sdf = new SimpleDateFormat(ptn);
-                while (valueEnumerator.moveNext()) {
-                    value = valueEnumerator.current();
-                    if (value == null || value.length() == 0)
-                        continue;
+        public void init(DictionaryInfo info, int baseId) throws IOException {
+            this.baseId = baseId;
+        }
 
+        @Override
+        public boolean addValue(String value) {
+            if (StringUtils.isBlank(value)) // empty string is treated as null
+                return false;
+            
+            // detect date pattern on the first value
+            if (datePattern == null) {
+                for (String p : DATE_PATTERNS) {
                     try {
-                        sdf.parse(value);
-                        if (returnSamples.size() < nSamples && returnSamples.contains(value) == false)
-                            returnSamples.add(value);
-                    } catch (ParseException e) {
-                        logger.info("Unrecognized date value: " + value);
-                        badCount++;
-                        if (badCount > BAD_THRESHOLD) {
-                            matchPattern = null;
-                            break;
-                        }
+                        DateFormat.stringToDate(value, p);
+                        datePattern = p;
+                        break;
+                    } catch (Exception e) {
+                        // continue;
                     }
                 }
-                if (matchPattern != null) {
-                    return new DateStrDictionary(matchPattern, baseId);
-                }
+                if (datePattern == null)
+                    throw new IllegalArgumentException("Unknown date pattern for input value: " + value);
             }
+            
+            // check the date format
+            DateFormat.stringToDate(value, datePattern);
+            return true;
+        }
+
+        @Override
+        public Dictionary<String> build() throws IOException {
+            if (datePattern == null)
+                datePattern = DATE_PATTERNS[0];
 
-            throw new IllegalStateException("Unrecognized datetime value");
+            return new DateStrDictionary(datePattern, baseId);
         }
     }
 
     private static class TimeDictBuilder implements IDictionaryBuilder {
+
         @Override
-        public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+        public void init(DictionaryInfo info, int baseId) throws IOException {
+        }
+
+        @Override
+        public boolean addValue(String value) {
+            if (StringUtils.isBlank(value)) // empty string is treated as null
+                return false;
+
+            // check the time format
+            DateFormat.stringToMillis(value);
+            return true;
+        }
+
+        @Override
+        public Dictionary<String> build() throws IOException {
             return new TimeStrDictionary(); // base ID is always 0
         }
     }
 
     private static class StringDictBuilder implements IDictionaryBuilder {
+        TrieDictionaryForestBuilder builder;
+
         @Override
-        public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            TrieDictionaryForestBuilder builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId);
-            String value;
-            while (valueEnumerator.moveNext()) {
-                value = valueEnumerator.current();
-                if (value == null)
-                    continue;
-                builder.addValue(value);
-                if (returnSamples.size() < nSamples && returnSamples.contains(value) == false)
-                    returnSamples.add(value);
-            }
+        public void init(DictionaryInfo info, int baseId) throws IOException {
+            builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId);
+        }
+
+        @Override
+        public boolean addValue(String value) {
+            if (value == null)
+                return false;
+
+            builder.addValue(value);
+            return true;
+        }
+
+        @Override
+        public Dictionary<String> build() throws IOException {
             return builder.build();
         }
     }
 
     private static class NumberDictBuilder implements IDictionaryBuilder {
+        NumberDictionaryForestBuilder builder;
+
         @Override
-        public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(baseId);
-            String value;
-            while (valueEnumerator.moveNext()) {
-                value = valueEnumerator.current();
-                if (StringUtils.isBlank(value)) // empty string is null for numbers
-                    continue;
-
-                builder.addValue(value);
-                if (returnSamples.size() < nSamples && returnSamples.contains(value) == false)
-                    returnSamples.add(value);
-            }
+        public void init(DictionaryInfo info, int baseId) throws IOException {
+            builder = new NumberDictionaryForestBuilder(baseId);
+        }
+
+        @Override
+        public boolean addValue(String value) {
+            if (StringUtils.isBlank(value)) // empty string is treated as null
+                return false;
+
+            builder.addValue(value);
+            return true;
+        }
+
+        @Override
+        public Dictionary<String> build() throws IOException {
             return builder.build();
         }
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index 0caef14..54bc1c4 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -87,10 +87,6 @@ public class DictionaryManager {
     private KylinConfig config;
     private LoadingCache<String, DictionaryInfo> dictCache; // resource
 
-
-    // path ==>
-    // DictionaryInfo
-
     private DictionaryManager(KylinConfig config) {
         this.config = config;
         this.dictCache = CacheBuilder.newBuilder().removalListener(new RemovalListener<String, DictionaryInfo>() {
@@ -276,12 +272,10 @@ public class DictionaryManager {
         return buildDictionary(model, col, inpTable, null);
     }
 
-
     public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, String builderClass) throws IOException {
         if (inpTable.exists() == false)
             return null;
 
-
         logger.info("building dictionary for " + col);
 
         DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable);
@@ -303,10 +297,12 @@ public class DictionaryManager {
         IDictionaryValueEnumerator columnValueEnumerator = null;
         try {
             columnValueEnumerator = new TableColumnValueEnumerator(inpTable.getReader(), dictInfo.getSourceColumnIndex());
-            if (builderClass == null)
+            if (builderClass == null) {
                 dictionary = DictionaryGenerator.buildDictionary(DataType.getType(dictInfo.getDataType()), columnValueEnumerator);
-            else
-                dictionary = DictionaryGenerator.buildDictionary((IDictionaryBuilder) ClassUtil.newInstance(builderClass), dictInfo, columnValueEnumerator);
+            } else {
+                IDictionaryBuilder builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass);
+                dictionary = DictionaryGenerator.buildDictionary(builder, dictInfo, columnValueEnumerator);
+            }
         } catch (Exception ex) {
             throw new RuntimeException("Failed to create dictionary on " + col, ex);
         } finally {
@@ -365,7 +361,7 @@ public class DictionaryManager {
         while (join != null) {
             if (join.isInnerJoin() == false)
                 return false;
-            
+
             TableRef table = join.getFKSide();
             join = model.getJoinByPKSide(table);
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java
index 6387535..8476f5c 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java
@@ -17,6 +17,8 @@
 */
 package org.apache.kylin.dict;
 
+import java.io.IOException;
+
 import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.metadata.model.TblColRef;
 
@@ -24,5 +26,5 @@ import org.apache.kylin.metadata.model.TblColRef;
  * Created by xiefan on 16-11-23.
  */
 public interface DictionaryProvider {
-    public Dictionary<String> getDictionary(TblColRef col);
+    public Dictionary<String> getDictionary(TblColRef col) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java
index 7adc262..b2a3664 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java
@@ -36,8 +36,11 @@ import org.slf4j.LoggerFactory;
 public class GlobalDictionaryBuilder implements IDictionaryBuilder {
     private static final Logger logger = LoggerFactory.getLogger(GlobalDictionaryBuilder.class);
 
+    AppendTrieDictionary.Builder<String> builder;
+    int baseId;
+    
     @Override
-    public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+    public void init(DictionaryInfo dictInfo, int baseId) throws IOException {
         if (dictInfo == null) {
             throw new IllegalArgumentException("GlobalDictinaryBuilder must used with an existing DictionaryInfo");
         }
@@ -55,28 +58,31 @@ public class GlobalDictionaryBuilder implements IDictionaryBuilder {
             }
         }
 
-        AppendTrieDictionary.Builder<String> builder;
         if (appendDicts.isEmpty()) {
             logger.info("GlobalDict {} is empty, create new one", dictInfo.getResourceDir());
-            builder = AppendTrieDictionary.Builder.create(dictDir);
+            this.builder = AppendTrieDictionary.Builder.create(dictDir);
         } else if (appendDicts.size() == 1) {
             logger.info("GlobalDict {} exist, append value", appendDicts.get(0));
             AppendTrieDictionary dict = (AppendTrieDictionary) DictionaryManager.getInstance(KylinConfig.getInstanceFromEnv()).getDictionary(appendDicts.get(0));
-            builder = AppendTrieDictionary.Builder.create(dict);
+            this.builder = AppendTrieDictionary.Builder.create(dict);
         } else {
             throw new IllegalStateException(String.format("GlobalDict %s should have 0 or 1 append dict but %d", dictInfo.getResourceDir(), appendDicts.size()));
         }
-
-        String value;
-        while (valueEnumerator.moveNext()) {
-            value = valueEnumerator.current();
-            if (value == null) {
-                continue;
-            }
-            builder.addValue(value);
-            if (returnSamples.size() < nSamples && returnSamples.contains(value) == false)
-                returnSamples.add(value);
-        }
+        
+        this.baseId = baseId;
+    }
+    
+    @Override
+    public boolean addValue(String value) {
+        if (value == null)
+            return false;
+        
+        builder.addValue(value);
+        return true;
+    }
+    
+    @Override
+    public Dictionary<String> build() throws IOException {
         return builder.build(baseId);
     }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java
index 8f95a2a..0934a7d 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java
@@ -19,11 +19,20 @@
 package org.apache.kylin.dict;
 
 import java.io.IOException;
-import java.util.ArrayList;
 
 import org.apache.kylin.common.util.Dictionary;
 
+/**
+ * An once-only builder for dictionary.
+ */
 public interface IDictionaryBuilder {
 
-    Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException;
+    /** Sets the dictionary info for the dictionary being built. Mainly for GlobalDictionaryBuilder. */
+    void init(DictionaryInfo info, int baseId) throws IOException;
+    
+    /** Add a new value into dictionary, returns it is accepted (not null) or not. */
+    boolean addValue(String value);
+    
+    /** Build the dictionary */
+    Dictionary<String> build() throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java
index 0225737..84b1080 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java
@@ -1,26 +1,21 @@
 package org.apache.kylin.dict;
 
-import org.apache.kylin.common.util.ClassUtil;
-import org.apache.kylin.common.util.Dictionary;
-import org.apache.kylin.metadata.datatype.DataType;
-import org.apache.kylin.metadata.model.TblColRef;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
-import java.io.BufferedOutputStream;
-import java.io.BufferedWriter;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.lang.reflect.ParameterizedType;
 import java.util.Arrays;
 import java.util.Iterator;
 
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.Test;
 
 /**
  * Created by xiefan on 16-11-23.
@@ -28,53 +23,48 @@ import static org.junit.Assert.fail;
 public class DictionaryProviderTest {
 
     @Test
-    public void testReadWrite() throws Exception{
+    public void testReadWrite() throws Exception {
         //string dict
-        Dictionary<String> dict = getDict(DataType.getType("string"),
-                Arrays.asList(new String[]{"a","b"}).iterator());
+        Dictionary<String> dict = getDict(DataType.getType("string"), Arrays.asList(new String[] { "a", "b" }).iterator());
         readWriteTest(dict);
         //number dict
-        Dictionary<String> dict2 = getDict(DataType.getType("long"),
-                Arrays.asList(new String[]{"1","2"}).iterator());
+        Dictionary<String> dict2 = getDict(DataType.getType("long"), Arrays.asList(new String[] { "1", "2" }).iterator());
         readWriteTest(dict2);
 
         //date dict
-        Dictionary<String> dict3 = getDict(DataType.getType("datetime"),
-                Arrays.asList(new String[]{"20161122","20161123"}).iterator());
+        Dictionary<String> dict3 = getDict(DataType.getType("datetime"), Arrays.asList(new String[] { "20161122", "20161123" }).iterator());
         readWriteTest(dict3);
 
         //date dict
-        Dictionary<String> dict4 = getDict(DataType.getType("datetime"),
-                Arrays.asList(new String[]{"2016-11-22","2016-11-23"}).iterator());
+        Dictionary<String> dict4 = getDict(DataType.getType("datetime"), Arrays.asList(new String[] { "2016-11-22", "2016-11-23" }).iterator());
         readWriteTest(dict4);
 
         //date dict
         try {
-            Dictionary<String> dict5 = getDict(DataType.getType("date"),
-                    Arrays.asList(new String[]{"2016-11-22", "20161122"}).iterator());
+            Dictionary<String> dict5 = getDict(DataType.getType("date"), Arrays.asList(new String[] { "2016-11-22", "20161122" }).iterator());
             readWriteTest(dict5);
             fail("Date format not correct.Should throw exception");
-        }catch (IllegalStateException e){
+        } catch (IllegalArgumentException e) {
             //correct
         }
     }
 
     @Test
-    public void testReadWriteTime(){
+    public void testReadWriteTime() {
         System.out.println(Long.MAX_VALUE);
         System.out.println(Long.MIN_VALUE);
     }
 
-
-    private Dictionary<String> getDict(DataType type, Iterator<String> values) throws Exception{
-        IDictionaryReducerLocalBuilder builder = DictionaryReducerLocalGenerator.getBuilder(type);
-        while(values.hasNext()){
+    private Dictionary<String> getDict(DataType type, Iterator<String> values) throws Exception {
+        IDictionaryBuilder builder = DictionaryGenerator.newDictionaryBuilder(type);
+        builder.init(null, 0);
+        while (values.hasNext()) {
             builder.addValue(values.next());
         }
-        return builder.build(0);
+        return builder.build();
     }
 
-    private void readWriteTest(Dictionary<String> dict) throws Exception{
+    private void readWriteTest(Dictionary<String> dict) throws Exception {
         final String path = "src/test/resources/dict/tmp_dict";
         File f = new File(path);
         f.deleteOnExit();
@@ -93,10 +83,10 @@ public class DictionaryProviderTest {
             String dictClassName2 = in.readUTF();
             dict2 = (Dictionary<String>) ClassUtil.newInstance(dictClassName2);
             dict2.readFields(in);
-        }catch(IOException e){
+        } catch (IOException e) {
             e.printStackTrace();
-        }finally {
-            if(in != null){
+        } finally {
+            if (in != null) {
                 try {
                     in.close();
                 } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java
index 63005f9..4985503 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java
@@ -18,8 +18,10 @@
 
 package org.apache.kylin.engine.mr.steps;
 
+import java.io.IOException;
+
 import org.apache.commons.cli.Options;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -36,16 +38,8 @@ import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.source.ReadableTable;
 
-import java.io.IOException;
-
-/**
- * @author ysong1
- */
-
 public class CreateDictionaryJob extends AbstractHadoopJob {
 
-    private int returnCode = 0;
-
     @Override
     public int run(String[] args) throws Exception {
         Options options = new Options();
@@ -68,38 +62,28 @@ public class CreateDictionaryJob extends AbstractHadoopJob {
         }, new DictionaryProvider() {
 
             @Override
-            public Dictionary<String> getDictionary(TblColRef col) {
-                if (!config.isReducerLocalBuildDict()) {
+            public Dictionary<String> getDictionary(TblColRef col) throws IOException {
+                Path colDir = new Path(factColumnsInputPath, col.getName());
+                Path dictFile = new Path(colDir, col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX);
+                FileSystem fs = HadoopUtil.getFileSystem(dictFile.toString());
+                if (fs.exists(dictFile) == false)
                     return null;
-                }
+                
                 FSDataInputStream is = null;
                 try {
-                    Path colDir = new Path(factColumnsInputPath, col.getName());
-                    Path outputFile = new Path(colDir, col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX);
-                    Configuration conf = HadoopUtil.getCurrentConfiguration();
-                    FileSystem fs = HadoopUtil.getFileSystem(outputFile.getName());
-                    is = fs.open(outputFile);
+                    is = fs.open(dictFile);
                     String dictClassName = is.readUTF();
                     Dictionary<String> dict = (Dictionary<String>) ClassUtil.newInstance(dictClassName);
                     dict.readFields(is);
-                    logger.info("DictionaryProvider read dict form file : " + outputFile.getName());
+                    logger.info("DictionaryProvider read dict from file: " + dictFile);
                     return dict;
-                } catch (Exception e) {
-                    e.printStackTrace();
-                    return null;
                 } finally {
-                    if (is != null) {
-                        try {
-                            is.close();
-                        } catch (IOException e) {
-                            e.printStackTrace();
-                        }
-                    }
+                    IOUtils.closeQuietly(is);
                 }
             }
         });
 
-        return returnCode;
+        return 0;
     }
 
     public static void main(String[] args) throws Exception {

http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 5511626..59532e8 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -43,8 +43,8 @@ import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.DictionaryReducerLocalGenerator;
-import org.apache.kylin.dict.IDictionaryReducerLocalBuilder;
+import org.apache.kylin.dict.DictionaryGenerator;
+import org.apache.kylin.dict.IDictionaryBuilder;
 import org.apache.kylin.engine.mr.KylinReducer;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
@@ -82,7 +82,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
 
     //local build dict
     private boolean isReducerLocalBuildDict;
-    private IDictionaryReducerLocalBuilder builder;
+    private IDictionaryBuilder builder;
     private FastDateFormat dateFormat;
     private long timeMaxValue = Long.MIN_VALUE;
     private long timeMinValue = Long.MAX_VALUE;
@@ -145,7 +145,8 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
         //local build dict
         isReducerLocalBuildDict = config.isReducerLocalBuildDict();
         if (col != null && isReducerLocalBuildDict) {
-            builder = DictionaryReducerLocalGenerator.getBuilder(col.getType());
+            builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
+            builder.init(null, 0);
         }
 
     }
@@ -292,7 +293,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
                     if (isPartitionCol) {
                         outputPartitionInfo(context);
                     }
-                    Dictionary<String> dict = builder.build(0);
+                    Dictionary<String> dict = builder.build();
                     outputDict(col, dict, context);
                 } catch (Exception e) {
                     e.printStackTrace();