You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/11/16 10:47:12 UTC

[1/3] kylin git commit: KYLIN-1851 Change TrieDictionary to TrieDictionaryForest to reduce the peek memory usage

Repository: kylin
Updated Branches:
  refs/heads/master 9410b015b -> 350547e6e


http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsReducer2.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsReducer2.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsReducer2.java
new file mode 100644
index 0000000..b5aeef6
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsReducer2.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps.fdc2;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.engine.mr.KylinReducer;
+import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
+import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.CubeStatsWriter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ */
+public class FactDistinctColumnsReducer2 extends KylinReducer<SelfDefineSortableKey, Text, NullWritable, Text> {
+
+    private List<TblColRef> columnList;
+    private String statisticsOutput = null;
+    private List<Long> baseCuboidRowCountInMappers;
+    protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+    protected long baseCuboidId;
+    protected CubeDesc cubeDesc;
+    private long totalRowsBeforeMerge = 0;
+    private int samplingPercentage;
+    private List<ByteArray> colValues;
+    private TblColRef col = null;
+    private boolean isStatistics = false;
+    private boolean isPartitionCol = false;
+    private KylinConfig cubeConfig;
+    protected static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnsReducer2.class);
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        super.bindCurrentConfiguration(context.getConfiguration());
+
+        Configuration conf = context.getConfiguration();
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+        String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
+        CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
+        cubeConfig = cube.getConfig();
+        cubeDesc = cube.getDescriptor();
+        columnList = CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);
+
+        boolean collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
+        int numberOfTasks = context.getNumReduceTasks();
+        int taskId = context.getTaskAttemptID().getTaskID().getId();
+
+        if (collectStatistics && (taskId == numberOfTasks - 1)) {
+            // hll
+            isStatistics = true;
+            statisticsOutput = conf.get(BatchConstants.CFG_STATISTICS_OUTPUT);
+            baseCuboidRowCountInMappers = Lists.newArrayList();
+            cuboidHLLMap = Maps.newHashMap();
+            samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
+        } else if (collectStatistics && (taskId == numberOfTasks - 2)) {
+            // partition col
+            isStatistics = false;
+            isPartitionCol = true;
+            col = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
+            colValues = Lists.newLinkedList();
+        } else {
+            // col
+            isStatistics = false;
+            isPartitionCol = false;
+            col = columnList.get(taskId);
+            colValues = Lists.newLinkedList();
+        }
+    }
+
+    @Override
+    protected void doReduce(SelfDefineSortableKey skey, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+        Text key = skey.getText();
+        if (isStatistics == true) {
+            // for hll
+            long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
+            for (Text value : values) {
+                HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(cubeConfig.getCubeStatsHLLPrecision());
+                ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
+                hll.readRegisters(bf);
+
+                totalRowsBeforeMerge += hll.getCountEstimate();
+
+                if (cuboidId == baseCuboidId) {
+                    baseCuboidRowCountInMappers.add(hll.getCountEstimate());
+                }
+
+                if (cuboidHLLMap.get(cuboidId) != null) {
+                    cuboidHLLMap.get(cuboidId).merge(hll);
+                } else {
+                    cuboidHLLMap.put(cuboidId, hll);
+                }
+            }
+        } else if (isPartitionCol == true) {
+            // for partition col min/max value
+            ByteArray value = new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1));
+            if (colValues.size() > 1) {
+                colValues.set(1, value);
+            } else {
+                colValues.add(value);
+            }
+        } else {
+            colValues.add(new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1)));
+            if (colValues.size() == 1000000) { //spill every 1 million
+                logger.info("spill values to disk...");
+                outputDistinctValues(col, colValues, context);
+                colValues.clear();
+            }
+        }
+    }
+
+    private void outputDistinctValues(TblColRef col, Collection<ByteArray> values, Context context) throws IOException {
+        final Configuration conf = context.getConfiguration();
+        final FileSystem fs = FileSystem.get(conf);
+        final String outputPath = conf.get(BatchConstants.CFG_OUTPUT_PATH);
+        final Path outputFile = new Path(outputPath, col.getName());
+
+        FSDataOutputStream out = null;
+        try {
+            if (fs.exists(outputFile)) {
+                out = fs.append(outputFile);
+                logger.info("append file " + outputFile);
+            } else {
+                out = fs.create(outputFile);
+                logger.info("create file " + outputFile);
+            }
+
+            for (ByteArray value : values) {
+                out.write(value.array(), value.offset(), value.length());
+                out.write('\n');
+            }
+        } finally {
+            IOUtils.closeQuietly(out);
+        }
+    }
+
+    @Override
+    protected void doCleanup(Context context) throws IOException, InterruptedException {
+
+        if (isStatistics == false) {
+            if (colValues.size() > 0) {
+                outputDistinctValues(col, colValues, context);
+                colValues.clear();
+            }
+        } else {
+            //output the hll info;
+            long grandTotal = 0;
+            for (HyperLogLogPlusCounter hll : cuboidHLLMap.values()) {
+                grandTotal += hll.getCountEstimate();
+            }
+            double mapperOverlapRatio = grandTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grandTotal;
+            
+            int mapperNumber = baseCuboidRowCountInMappers.size();
+
+            writeMapperAndCuboidStatistics(context); // for human check
+            CubeStatsWriter.writeCuboidStatistics(context.getConfiguration(), new Path(statisticsOutput), //
+                    cuboidHLLMap, samplingPercentage, mapperNumber, mapperOverlapRatio);
+        }
+    }
+
+    private void writeMapperAndCuboidStatistics(Context context) throws IOException {
+        Configuration conf = context.getConfiguration();
+        FileSystem fs = FileSystem.get(conf);
+        FSDataOutputStream out = fs.create(new Path(statisticsOutput, BatchConstants.CFG_STATISTICS_CUBE_ESTIMATION_FILENAME));
+
+        try {
+            String msg;
+
+            List<Long> allCuboids = Lists.newArrayList();
+            allCuboids.addAll(cuboidHLLMap.keySet());
+            Collections.sort(allCuboids);
+
+            msg = "Total cuboid number: \t" + allCuboids.size();
+            writeLine(out, msg);
+            msg = "Samping percentage: \t" + samplingPercentage;
+            writeLine(out, msg);
+
+            writeLine(out, "The following statistics are collected based on sampling data.");
+            writeLine(out, "Number of Mappers: " + baseCuboidRowCountInMappers.size());
+            for (int i = 0; i < baseCuboidRowCountInMappers.size(); i++) {
+                if (baseCuboidRowCountInMappers.get(i) > 0) {
+                    msg = "Base Cuboid in Mapper " + i + " row count: \t " + baseCuboidRowCountInMappers.get(i);
+                    writeLine(out, msg);
+                }
+            }
+
+            long grantTotal = 0;
+            for (long i : allCuboids) {
+                grantTotal += cuboidHLLMap.get(i).getCountEstimate();
+                msg = "Cuboid " + i + " row count is: \t " + cuboidHLLMap.get(i).getCountEstimate();
+                writeLine(out, msg);
+            }
+
+            msg = "Sum of all the cube segments (before merge) is: \t " + totalRowsBeforeMerge;
+            writeLine(out, msg);
+
+            msg = "After merge, the cube has row count: \t " + grantTotal;
+            writeLine(out, msg);
+
+            if (grantTotal > 0) {
+                msg = "The mapper overlap ratio is: \t" + totalRowsBeforeMerge / grantTotal;
+                writeLine(out, msg);
+            }
+
+        } finally {
+            IOUtils.closeQuietly(out);
+        }
+    }
+
+    private void writeLine(FSDataOutputStream out, String msg) throws IOException {
+        out.write(msg.getBytes());
+        out.write('\n');
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/SelfDefineSortableKey.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/SelfDefineSortableKey.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/SelfDefineSortableKey.java
index cadbcbf..a3351fa 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/SelfDefineSortableKey.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/SelfDefineSortableKey.java
@@ -95,7 +95,8 @@ public class SelfDefineSortableKey implements WritableComparable<SelfDefineSorta
 
     @Override
     public void readFields(DataInput dataInput) throws IOException {
-        dataInput.readByte();
+        this.typeId = dataInput.readByte();
+        this.text = new Text();
         text.readFields(dataInput);
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
index 01d47b8..df68f76 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
@@ -78,7 +78,7 @@ public class MergeCuboidMapperTest extends LocalFileMetadataTestCase {
         values.add(new byte[] { 102, 102, 102 });
         Dictionary<?> dict = DictionaryGenerator.buildDictionary(DataType.getType(newDictInfo.getDataType()), new IterableDictionaryValueEnumerator(values));
         dictionaryManager.trySaveNewDict(dict, newDictInfo);
-        ((TrieDictionary) dict).dump(System.out);
+        dict.dump(System.out);
 
         return newDictInfo;
     }
@@ -130,7 +130,7 @@ public class MergeCuboidMapperTest extends LocalFileMetadataTestCase {
                 values.add(new byte[] { 98, 98, 98 });
             Dictionary<?> dict = DictionaryGenerator.buildDictionary(DataType.getType(newDictInfo.getDataType()), new IterableDictionaryValueEnumerator(values));
             dictionaryManager.trySaveNewDict(dict, newDictInfo);
-            ((TrieDictionary) dict).dump(System.out);
+            dict.dump(System.out);
 
             segment.putDictResPath(lfn, newDictInfo.getResourcePath());
             segment.putDictResPath(lsi, sharedDict.getResourcePath());

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index 70197ac..554ee9c 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -35,37 +35,37 @@ import static org.junit.Assert.assertTrue;
 
 public class NumberDictionaryForestTest {
     @Test
-    public void testNumberDictionaryForestLong(){
+    public void testNumberDictionaryForestLong() {
         List<String> list = randomLongData(10);
-        testData(list,TypeFlag.INTEGER_FAMILY_TYPE);
+        testData(list, TypeFlag.INTEGER_FAMILY_TYPE);
     }
 
     @Test
-    public void testNumberDictionaryForestDouble(){
+    public void testNumberDictionaryForestDouble() {
         List<String> list = randomDoubleData(10);
 
-        testData(list,TypeFlag.DOUBLE_FAMILY_TYPE);
+        testData(list, TypeFlag.DOUBLE_FAMILY_TYPE);
     }
 
-    private void testData(List<String> list,TypeFlag flag){
+    private void testData(List<String> list, TypeFlag flag) {
         //stimulate map-reduce job
-        ArrayList<SelfDefineSortableKey> keyList = createKeyList(list,(byte)flag.ordinal());
+        ArrayList<SelfDefineSortableKey> keyList = createKeyList(list, (byte) flag.ordinal());
         Collections.sort(keyList);
         //build tree
         NumberDictionaryForestBuilder<String> b = new NumberDictionaryForestBuilder<String>(
-                new StringBytesConverter(),0);
-        TrieDictionaryForestBuilder.MaxTrieTreeSize = 0;
-        for(SelfDefineSortableKey key : keyList){
+                new StringBytesConverter(), 0, 0);
+
+        for (SelfDefineSortableKey key : keyList) {
             String fieldValue = printKey(key);
             b.addValue(fieldValue);
         }
         NumberDictionaryForest<String> dict = b.build();
         dict.dump(System.out);
         ArrayList<Integer> resultIds = new ArrayList<>();
-        for(SelfDefineSortableKey key : keyList){
+        for (SelfDefineSortableKey key : keyList) {
             String fieldValue = getFieldValue(key);
             resultIds.add(dict.getIdFromValue(fieldValue));
-            assertEquals(fieldValue,dict.getValueFromId(dict.getIdFromValue(fieldValue)));
+            assertEquals(fieldValue, dict.getValueFromId(dict.getIdFromValue(fieldValue)));
         }
         assertTrue(isIncreasedOrder(resultIds, new Comparator<Integer>() {
             @Override
@@ -83,7 +83,7 @@ public class NumberDictionaryForestTest {
         testData.add("100");
         //TrieDictionaryForestBuilder.MaxTrieTreeSize = 0;
         NumberDictionaryForestBuilder<String> b = new NumberDictionaryForestBuilder<String>(new StringBytesConverter());
-        for(String str : testData)
+        for (String str : testData)
             b.addValue(str);
         NumberDictionaryForest<String> dict = b.build();
         dict = testSerialize(dict);
@@ -94,20 +94,20 @@ public class NumberDictionaryForestTest {
     }
 
     @Test
-    public void testVerySmallDouble(){
+    public void testVerySmallDouble() {
         List<String> testData = new ArrayList<>();
-        testData.add(-1.0+"");
-        testData.add(Double.MIN_VALUE+"");
+        testData.add(-1.0 + "");
+        testData.add(Double.MIN_VALUE + "");
         testData.add("1.01");
         testData.add("2.0");
         NumberDictionaryForestBuilder<String> b = new NumberDictionaryForestBuilder<String>(new StringBytesConverter());
-        for(String str : testData)
+        for (String str : testData)
             b.addValue(str);
         NumberDictionaryForest<String> dict = b.build();
         dict.dump(System.out);
 
         NumberDictionaryBuilder<String> b2 = new NumberDictionaryBuilder<>(new StringBytesConverter());
-        for(String str : testData)
+        for (String str : testData)
             b2.addValue(str);
         NumberDictionary<String> dict2 = b2.build(0);
         dict2.dump(System.out);
@@ -133,31 +133,31 @@ public class NumberDictionaryForestTest {
         }
     }
 
-    private List<String> randomLongData(int count){
+    private List<String> randomLongData(int count) {
         Random rand = new Random(System.currentTimeMillis());
         ArrayList<String> list = new ArrayList<>();
-        for(int i=0;i<count;i++){
-            list.add(rand.nextLong()+"");
+        for (int i = 0; i < count; i++) {
+            list.add(rand.nextLong() + "");
         }
-        list.add(Long.MAX_VALUE+"");
-        list.add(Long.MIN_VALUE+"");
+        list.add(Long.MAX_VALUE + "");
+        list.add(Long.MIN_VALUE + "");
         return list;
     }
 
-    private List<String> randomDoubleData(int count){
+    private List<String> randomDoubleData(int count) {
         Random rand = new Random(System.currentTimeMillis());
         ArrayList<String> list = new ArrayList<>();
-        for(int i=0;i<count;i++){
-            list.add(rand.nextDouble()+"");
+        for (int i = 0; i < count; i++) {
+            list.add(rand.nextDouble() + "");
         }
         list.add("-1");
         return list;
     }
 
-    private List<String> randomStringData(int count){
+    private List<String> randomStringData(int count) {
         Random rand = new Random(System.currentTimeMillis());
         ArrayList<String> list = new ArrayList<>();
-        for(int i=0;i<count;i++){
+        for (int i = 0; i < count; i++) {
             list.add(UUID.randomUUID().toString());
         }
         list.add("123");
@@ -165,47 +165,47 @@ public class NumberDictionaryForestTest {
         return list;
     }
 
-    private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList,byte typeFlag){
+    private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
         int partationId = 0;
         ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
-        for(String str : strNumList){
+        for (String str : strNumList) {
             ByteBuffer keyBuffer = ByteBuffer.allocate(4096);
             int offset = keyBuffer.position();
             keyBuffer.put(Bytes.toBytes(partationId)[3]);
             keyBuffer.put(Bytes.toBytes(str));
             //System.out.println(Arrays.toString(keyBuffer.array()));
-            byte[] valueField = Bytes.copy(keyBuffer.array(),1,keyBuffer.position()-offset-1);
+            byte[] valueField = Bytes.copy(keyBuffer.array(), 1, keyBuffer.position() - offset - 1);
             //System.out.println("new string:"+new String(valueField));
             //System.out.println("arrays toString:"+Arrays.toString(valueField));
             Text outputKey = new Text();
-            outputKey.set(keyBuffer.array(),offset,keyBuffer.position()-offset);
-            SelfDefineSortableKey sortableKey = new SelfDefineSortableKey(typeFlag,outputKey);
+            outputKey.set(keyBuffer.array(), offset, keyBuffer.position() - offset);
+            SelfDefineSortableKey sortableKey = new SelfDefineSortableKey(typeFlag, outputKey);
             keyList.add(sortableKey);
         }
         return keyList;
     }
 
-    private String printKey(SelfDefineSortableKey key){
+    private String printKey(SelfDefineSortableKey key) {
         byte[] data = key.getText().getBytes();
-        byte[] fieldValue = Bytes.copy(data,1,data.length-1);
-        System.out.println("type flag:"+key.getTypeId()+" fieldValue:"+new String(fieldValue));
+        byte[] fieldValue = Bytes.copy(data, 1, data.length - 1);
+        System.out.println("type flag:" + key.getTypeId() + " fieldValue:" + new String(fieldValue));
         return new String(fieldValue);
     }
 
-    private String getFieldValue(SelfDefineSortableKey key){
+    private String getFieldValue(SelfDefineSortableKey key) {
         byte[] data = key.getText().getBytes();
-        byte[] fieldValue = Bytes.copy(data,1,data.length-1);
+        byte[] fieldValue = Bytes.copy(data, 1, data.length - 1);
         return new String(fieldValue);
     }
 
-    private<T> boolean isIncreasedOrder(List<T> list, Comparator<T> comp){
+    private <T> boolean isIncreasedOrder(List<T> list, Comparator<T> comp) {
         int flag;
         T previous = null;
-        for(T t : list){
-            if(previous == null) previous = t;
-            else{
-                flag = comp.compare(previous,t);
-                if(flag > 0) return false;
+        for (T t : list) {
+            if (previous == null) previous = t;
+            else {
+                flag = comp.compare(previous, t);
+                if (flag > 0) return false;
                 previous = t;
             }
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/examples/test_case_data/sandbox/kylin.properties
----------------------------------------------------------------------
diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties
index 0988536..93b86c9 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -63,7 +63,7 @@ kylin.job.retry=0
 # you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password
 # It should not be set to "true" unless you're NOT running Kylin.sh on a hadoop client machine
 # (Thus kylin instance has to ssh to another real hadoop client machine to execute hbase,hive,hadoop commands)
-kylin.job.run.as.remote.cmd=false
+kylin.job.run.as.remote.cmd=true
 
 # Only necessary when kylin.job.run.as.remote.cmd=true
 kylin.job.remote.cli.hostname=sandbox
@@ -160,4 +160,7 @@ kylin.web.contact_mail=
 kylin.query.metrics.percentiles.intervals=60, 360, 3600
 
 # Env DEV|QA|PROD
-deploy.env=DEV
\ No newline at end of file
+deploy.env=DEV
+
+#default 500MB
+kylin.dictionary.forest.trie.size.max_mb=500
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
index 52461c4..e1303e4 100644
--- a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
+++ b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
@@ -491,7 +491,10 @@ public class KylinTestBase {
             ITable h2Table = executeQuery(h2Conn, queryName, sql, needSort);
 
             try {
+                //compare before junit
                 // compare the result
+                System.out.println("h2 Table rows count:"+h2Table.getRowCount());
+                System.out.println("kylin Table rows count:"+kylinTable.getRowCount());
                 Assertion.assertEquals(h2Table, kylinTable);
             } catch (Throwable t) {
                 printInfo("execAndCompQuery failed on: " + sqlFile.getAbsolutePath());


[2/3] kylin git commit: KYLIN-1851 Change TrieDictionary to TrieDictionaryForest to reduce the peek memory usage

Posted by li...@apache.org.
KYLIN-1851 Change TrieDictionary to TrieDictionaryForest to reduce the peek memory usage

Signed-off-by: Li Yang <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/734a4f98
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/734a4f98
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/734a4f98

Branch: refs/heads/master
Commit: 734a4f98b912a6f45957c3435b4f5be0cf54f4e8
Parents: 9410b01
Author: xiefan46 <95...@qq.com>
Authored: Mon Nov 7 14:37:22 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Wed Nov 16 18:03:59 2016 +0800

----------------------------------------------------------------------
 build/conf/kylin.properties                     |   4 +
 .../apache/kylin/common/KylinConfigBase.java    |  36 +-
 .../org/apache/kylin/cube/model/CubeDesc.java   |  35 +-
 .../org/apache/kylin/cube/CubeDescTest.java     |  28 +-
 .../apache/kylin/dict/DictionaryGenerator.java  |  18 +-
 .../apache/kylin/dict/DictionaryManager.java    |   3 +-
 .../kylin/dict/NumberDictionaryForest.java      |   6 +
 .../dict/NumberDictionaryForestBuilder.java     |  12 +
 .../org/apache/kylin/dict/TrieDictionary.java   |   7 +-
 .../apache/kylin/dict/TrieDictionaryForest.java |  95 ++++-
 .../kylin/dict/TrieDictionaryForestBuilder.java |  76 +++-
 .../apache/kylin/dict/lookup/SnapshotTable.java |   2 +
 .../apache/kylin/dict/NumberDictionaryTest.java |   6 +-
 .../kylin/dict/TrieDictionaryForestTest.java    | 373 +++++++++++++++++--
 .../kylin/dict/lookup/LookupTableTest.java      |   9 +
 .../engine/mr/DFSFileTableSortedReader.java     | 249 +++++++++++++
 .../kylin/engine/mr/JobBuilderSupport.java      |   4 +-
 .../fdc2/FactDistinctColumnsCombiner2.java      |   5 +-
 .../mr/steps/fdc2/FactDistinctColumnsJob2.java  |   3 +-
 .../fdc2/FactDistinctColumnsMapperBase2.java    |   1 +
 .../steps/fdc2/FactDistinctColumnsReducer2.java | 254 +++++++++++++
 .../mr/steps/fdc2/SelfDefineSortableKey.java    |   3 +-
 .../engine/mr/steps/MergeCuboidMapperTest.java  |   4 +-
 .../mr/steps/NumberDictionaryForestTest.java    |  86 ++---
 .../test_case_data/sandbox/kylin.properties     |   7 +-
 .../org/apache/kylin/query/KylinTestBase.java   |   3 +
 26 files changed, 1174 insertions(+), 155 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index e935ebf..715b7a6 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -131,6 +131,10 @@ kylin.dictionary.max.cardinality=5000000
 
 kylin.table.snapshot.max_mb=300
 
+#max size for one trie in TrieDictionaryForest (default 500MB)
+
+
+
 ### QUERY ###
 
 kylin.query.scan.threshold=10000000

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 6d9eef4..300f727 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -18,6 +18,13 @@
 
 package org.apache.kylin.common;
 
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.commons.lang.StringUtils;
+import org.apache.kylin.common.util.CliCommandExecutor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.File;
 import java.io.IOException;
 import java.io.Serializable;
@@ -28,14 +35,6 @@ import java.util.SortedSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.kylin.common.util.CliCommandExecutor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
 /**
  * An abstract class to encapsulate access to a set of 'properties'.
  * Subclass can override methods in this class to extend the content of the 'properties',
@@ -177,19 +176,25 @@ abstract public class KylinConfigBase implements Serializable {
         setProperty("kylin.storage.url", storageUrl);
     }
 
-    /** was for route to hive, not used any more */
+    /**
+     * was for route to hive, not used any more
+     */
     @Deprecated
     public String getHiveUrl() {
         return getOptional("hive.url", "");
     }
 
-    /** was for route to hive, not used any more */
+    /**
+     * was for route to hive, not used any more
+     */
     @Deprecated
     public String getHiveUser() {
         return getOptional("hive.user", "");
     }
 
-    /** was for route to hive, not used any more */
+    /**
+     * was for route to hive, not used any more
+     */
     @Deprecated
     public String getHivePassword() {
         return getOptional("hive.password", "");
@@ -205,7 +210,7 @@ abstract public class KylinConfigBase implements Serializable {
 
     public String[] getRealizationProviders() {
         return getOptionalStringArray("kylin.realization.providers", //
-                new String[] { "org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager" });
+                new String[]{"org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager"});
     }
 
     public CliCommandExecutor getCliCommandExecutor() throws IOException {
@@ -464,6 +469,10 @@ abstract public class KylinConfigBase implements Serializable {
         return Integer.parseInt(getOptional("kylin.table.snapshot.max_mb", "300"));
     }
 
+    public int getTrieDictionaryForestMaxTrieSizeMB() {
+        return Integer.parseInt(getOptional("kylin.dictionary.forest.trie.size.max_mb", "500"));
+    }
+
     public int getHBaseRegionCountMin() {
         return Integer.parseInt(getOptional("kylin.hbase.region.count.min", "1"));
     }
@@ -582,7 +591,7 @@ abstract public class KylinConfigBase implements Serializable {
     }
 
     public int[] getQueryMetricsPercentilesIntervals() {
-        String[] dft = { "60", "300", "3600" };
+        String[] dft = {"60", "300", "3600"};
         return getOptionalIntArray("kylin.query.metrics.percentiles.intervals", dft);
     }
 
@@ -600,6 +609,7 @@ abstract public class KylinConfigBase implements Serializable {
 
     /**
      * HBase region cut size, in GB
+     *
      * @return
      */
     public float getKylinHBaseRegionCut() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 3160085..c9ebff8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -196,7 +196,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     public Set<TblColRef> listAllColumns() {
         return allColumns;
     }
-    
+
     public Set<ColumnDesc> listAllColumnDescs() {
         return allColumnDescs;
     }
@@ -209,7 +209,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     }
 
     /**
-     * @return dimension columns excluding derived 
+     * @return dimension columns excluding derived
      */
     public List<TblColRef> listDimensionColumnsExcludingDerived(boolean alsoExcludeExtendedCol) {
         List<TblColRef> result = new ArrayList<TblColRef>();
@@ -473,8 +473,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     /**
      * this method is to prevent malicious metadata change by checking the saved signature
      * with the calculated signature.
-     * 
+     * <p>
      * if you're comparing two cube descs, prefer to use consistentWith()
+     *
      * @return
      */
     public boolean checkSignature() {
@@ -558,7 +559,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         checkState(rowkey.getRowKeyColumns().length == dimCols.size(), "RowKey columns count (%d) doesn't match dimensions columns count (%d)", rowkey.getRowKeyColumns().length, dimCols.size());
 
         initDictionaryDesc();
-        
+
         for (TblColRef col : allColumns) {
             allColumnDescs.add(col.getColumnDesc());
         }
@@ -609,7 +610,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
                 }
                 Collections.sort(notIncluded);
                 logger.error("Aggregation group " + index + " Include dimensions not containing all the used dimensions");
-                throw new IllegalStateException("Aggregation group " + index + " 'includes' dimensions not include all the dimensions:" +  notIncluded.toString());
+                throw new IllegalStateException("Aggregation group " + index + " 'includes' dimensions not include all the dimensions:" + notIncluded.toString());
             }
 
             Set<String> normalDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
@@ -754,7 +755,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
                     int find = ArrayUtils.indexOf(dimColArray, fk[i]);
                     if (find >= 0) {
                         TblColRef derivedCol = initDimensionColRef(pk[i]);
-                        initDerivedMap(new TblColRef[] { dimColArray[find] }, DeriveType.PK_FK, dim, new TblColRef[] { derivedCol }, null);
+                        initDerivedMap(new TblColRef[]{dimColArray[find]}, DeriveType.PK_FK, dim, new TblColRef[]{derivedCol}, null);
                     }
                 }
             }
@@ -775,7 +776,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
                 extra[i] = "";
             }
         }
-        return new String[][] { cols, extra };
+        return new String[][]{cols, extra};
     }
 
     private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) {
@@ -994,7 +995,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         this.partitionOffsetStart = partitionOffsetStart;
     }
 
-    /** Get columns that have dictionary */
+    /**
+     * Get columns that have dictionary
+     */
     public Set<TblColRef> getAllColumnsHaveDictionary() {
         Set<TblColRef> result = Sets.newLinkedHashSet();
 
@@ -1023,7 +1026,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         return result;
     }
 
-    /** Get columns that need dictionary built on it. Note a column could reuse dictionary of another column. */
+    /**
+     * Get columns that need dictionary built on it. Note a column could reuse dictionary of another column.
+     */
     public Set<TblColRef> getAllColumnsNeedDictionaryBuilt() {
         Set<TblColRef> result = getAllColumnsHaveDictionary();
 
@@ -1040,7 +1045,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         return result;
     }
 
-    /** A column may reuse dictionary of another column, find the dict column, return same col if there's no reuse column*/
+    /**
+     * A column may reuse dictionary of another column, find the dict column, return same col if there's no reuse column
+     */
     public TblColRef getDictionaryReuseColumn(TblColRef col) {
         if (dictionaries == null) {
             return col;
@@ -1053,7 +1060,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         return col;
     }
 
-    /** Get a column which can be used in distributing the source table */
+    /**
+     * Get a column which can be used in distributing the source table
+     */
     public TblColRef getDistributedByColumn() {
         Set<TblColRef> shardBy = getShardByColumns();
         if (shardBy != null && shardBy.size() > 0) {
@@ -1107,9 +1116,9 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     }
 
 
-    private Collection ensureOrder(Collection c){
+    private Collection ensureOrder(Collection c) {
         TreeSet set = new TreeSet();
-        for(Object o : c)
+        for (Object o : c)
             set.add(o.toString());
         //System.out.println("set:"+set);
         return set;

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
index 9ad6427..3326b24 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
@@ -89,7 +89,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit3() throws Exception {
         thrown.expect(IllegalStateException.class);
-        thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[] { "SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD" }));
+        thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[]{"SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD"}));
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
         String[] temp = Arrays.asList(cubeDesc.getAggregationGroups().get(0).getIncludes()).subList(0, 3).toArray(new String[3]);
         cubeDesc.getAggregationGroups().get(0).setIncludes(temp);
@@ -114,7 +114,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit5() throws Exception {
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "META_CATEG_NAME" };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "META_CATEG_NAME"};
 
         cubeDesc.init(getTestConfig());
     }
@@ -122,7 +122,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit6() throws Exception {
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "lstg_format_name" };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "lstg_format_name"};
 
         cubeDesc.init(getTestConfig());
     }
@@ -133,43 +133,43 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
         thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a joint");
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name"}};
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit8() throws Exception {
-        String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" };
+        String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"};
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}};
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit9() throws Exception {
-        String[] strs = new String[] { "lstg_format_name", "META_CATEG_NAME" };
+        String[] strs = new String[]{"lstg_format_name", "META_CATEG_NAME"};
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs));
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "lstg_format_name", "lstg_site_id" } };
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "lstg_format_name" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"lstg_format_name", "lstg_site_id"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "lstg_format_name"}};
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit10() throws Exception {
-        String[] strs = new String[] { "lstg_format_name", "lstg_site_id" };
+        String[] strs = new String[]{"lstg_format_name", "lstg_site_id"};
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 a dimension exist in more than one joint: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name", "lstg_site_id", "slr_segment_cd" }, new String[] { "lstg_format_name", "lstg_site_id", "leaf_categ_id" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name", "lstg_site_id", "slr_segment_cd"}, new String[]{"lstg_format_name", "lstg_site_id", "leaf_categ_id"}};
 
         cubeDesc.init(getTestConfig());
     }
@@ -180,19 +180,19 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
         thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a hierarchy.");
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME"}};
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit12() throws Exception {
-        String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" };
+        String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"};
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 a dimension exist in more than one hierarchy: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}};
 
         cubeDesc.init(getTestConfig());
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 0adf40e..8695338 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -36,12 +36,12 @@ import com.google.common.base.Preconditions;
 /**
  * @author yangli9
  */
-@SuppressWarnings({ "rawtypes", "unchecked" })
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class DictionaryGenerator {
 
     private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class);
 
-    private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
+    private static final String[] DATE_PATTERNS = new String[]{"yyyy-MM-dd", "yyyyMMdd"};
 
     public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
         Preconditions.checkNotNull(dataType, "dataType cannot be null");
@@ -137,7 +137,9 @@ public class DictionaryGenerator {
     private static class StringDictBuilder implements IDictionaryBuilder {
         @Override
         public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
+            int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB();
+            //TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
+            TrieDictionaryForestBuilder builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB);
             byte[] value;
             while (valueEnumerator.moveNext()) {
                 value = valueEnumerator.current();
@@ -148,14 +150,16 @@ public class DictionaryGenerator {
                 if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
                     returnSamples.add(v);
             }
-            return builder.build(baseId);
+            return builder.build();
+            //return builder.build(baseId);
         }
     }
 
     private static class NumberDictBuilder implements IDictionaryBuilder {
         @Override
         public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter());
+            int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB();
+            NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB);
             byte[] value;
             while (valueEnumerator.moveNext()) {
                 value = valueEnumerator.current();
@@ -169,7 +173,9 @@ public class DictionaryGenerator {
                 if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
                     returnSamples.add(v);
             }
-            return builder.build(baseId);
+            return builder.build();
         }
     }
+
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index c8a7a54..b8d039e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -327,7 +327,6 @@ public class DictionaryManager {
             if (columnValueEnumerator != null)
                 columnValueEnumerator.close();
         }
-
         return trySaveNewDict(dictionary, dictInfo);
     }
 
@@ -419,7 +418,7 @@ public class DictionaryManager {
 
         logger.info("DictionaryManager(" + System.identityHashCode(this) + ") loading DictionaryInfo(loadDictObj:" + loadDictObj + ") at " + resourcePath);
         DictionaryInfo info = store.getResource(resourcePath, DictionaryInfo.class, loadDictObj ? DictionaryInfoSerializer.FULL_SERIALIZER : DictionaryInfoSerializer.INFO_SERIALIZER);
-
+        //info.dictionaryObject.dump(System.out);
         //        if (loadDictObj)
         //            logger.debug("Loaded dictionary at " + resourcePath);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
index 8caa4b6..fdf1e68 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
@@ -275,4 +275,10 @@ public class NumberDictionaryForest<T> extends Dictionary<T> {
     public BytesConverter<T> getConverter() {
         return converter;
     }
+
+    public int getTreeSize(){
+        return this.dict.getTrees().size();
+    }
+
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
index 5444bb7..c997ce1 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
@@ -41,18 +41,30 @@ public class NumberDictionaryForestBuilder<T> {
         this.bytesConverter = bytesConverter;
     }
 
+    public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieSizeMB) {
+        this.trieBuilder = new TrieDictionaryForestBuilder<T>(bytesConverter, baseId, maxTrieSizeMB);
+        this.bytesConverter = bytesConverter;
+    }
+
     public void addValue(T value) {
         addValue(bytesConverter.convertToBytes(value));
     }
 
+
+
     public void addValue(byte[] value) {
         codec.encodeNumber(value, 0, value.length);
         byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
         this.trieBuilder.addValue(copy);
     }
 
+    //TODO:ensure ordered
     public NumberDictionaryForest<T> build() {
         TrieDictionaryForest<T> forest = trieBuilder.build();
         return new NumberDictionaryForest<T>(forest, bytesConverter);
     }
+
+    public void setMaxTrieSize(int size){
+        this.trieBuilder.setMaxTrieTreeSize(size);
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index aea9551..a5e3d36 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@ -126,7 +126,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
             else
                 throw new RuntimeException(e);
         }
-
+        //this.enableValueCache = false;
         if (enableValueCache) {
             valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
             idToValueCache = new SoftReference<Object[]>(new Object[nValues]);
@@ -156,6 +156,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
     @Override
     final protected int getIdFromValueImpl(T value, int roundingFlag) {
         if (enableValueCache && roundingFlag == 0) {
+            //System.out.println("use id cache");
             Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory
             if (cache != null) {
                 Integer id = null;
@@ -170,6 +171,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
                 return id;
             }
         }
+        //System.out.println("not use id cache");
         byte[] valueBytes = bytesConvert.convertToBytes(value);
         return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
     }
@@ -271,6 +273,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
     @Override
     final protected T getValueFromIdImpl(int id) {
         if (enableValueCache) {
+            //System.out.println("use value cache");
             Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory
             if (cache != null) {
                 int seq = calcSeqNoFromId(id);
@@ -285,8 +288,10 @@ public class TrieDictionary<T> extends Dictionary<T> {
                 return result;
             }
         }
+        //System.out.println("not use value cache");
         byte[] value = new byte[getSizeOfValue()];
         int length = getValueBytesFromId(id, value, 0);
+        //System.out.println("get value by id:"+id+" value:"+bytesConvert.convertFromBytes(value, 0, length).toString());
         return bytesConvert.convertFromBytes(value, 0, length);
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index e9ccc56..b0440db 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -19,6 +19,7 @@ package org.apache.kylin.dict;
 
 
 import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
 import org.apache.kylin.common.util.ClassUtil;
 import org.apache.kylin.common.util.Dictionary;
@@ -32,7 +33,6 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -89,13 +89,13 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     @Override
     public int getMinId() {
-        if (trees.isEmpty()) return -1;
+        if (trees.isEmpty()) return baseId;
         return trees.get(0).getMinId() + baseId;
     }
 
     @Override
     public int getMaxId() {
-        if (trees.isEmpty()) return -1;
+        if (trees.isEmpty()) return baseId - 1;
         int index = trees.size() - 1;
         int id = accuOffset.get(index) + trees.get(index).getMaxId() + baseId;
         return id;
@@ -127,43 +127,71 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
     }
 
 
-    //id = tree_inner_offset + accumulate_offset + baseId
     @Override
-    protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag)
+    protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException {
+
+        int result = _getIdFromValueBytesImpl(value, offset, len, roundingFlag);
+        //logger.info("{} => {}, rounding {}", bytesConvert.convertFromBytes(value, offset, len), result, roundingFlag);
+        return result;
+    }
+
+    //id = tree_inner_offset + accumulate_offset + baseId
+    protected int _getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag)
             throws IllegalArgumentException {
 
         //long startTime = System.currentTimeMillis();
         ByteArray search = new ByteArray(value, offset, len);
         //copyTime.addAndGet(System.currentTimeMillis() - startTime);
         int index = findIndexByValue(search);
-        //int index = findIndexByValue(value);
-        //binarySearchTime.addAndGet(System.currentTimeMillis() - startTime);
         if (index < 0) {
-            //System.out.println("value divide:"+valueDivide.size()+" "+valueDivide);
-            throw new IllegalArgumentException("Tree Not Found. index < 0.Value:" + new String(Arrays.copyOfRange(value, offset, len)));
+            if (roundingFlag > 0) {
+                return getMinId(); //searching value smaller than the smallest value in dict
+            } else {
+                throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
+            }
+        }
+        int id;
+        if (roundingFlag > 0) {
+            T curTreeMax = trees.get(index).getValueFromId(trees.get(index).getMaxId());
+            byte[] b1 = bytesConvert.convertToBytes(curTreeMax);
+            ByteArray ba1 = new ByteArray(b1, 0, b1.length);
+            //ByteArray ba2 = new ByteArray(value, 0, value.length);
+            if (search.compareTo(ba1) > 0)
+                index++;
+            if (index >= trees.size())
+                throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
         }
         TrieDictionary<T> tree = trees.get(index);
-        //getValueIndexTime.addAndGet(System.currentTimeMillis() - startTime);
-        //startTime = System.currentTimeMillis();
-        int id = tree.getIdFromValueBytes(value, offset, len, roundingFlag);
+        id = tree.getIdFromValueBytes(value, offset, len, roundingFlag);
         id = id + accuOffset.get(index);
         id += baseId;
-        //getValueTime.addAndGet(System.currentTimeMillis() - startTime);
+        if (id < 0) {
+            throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
+        }
+        //System.out.println("getIdFromValue  value:"+bytesConvert.convertFromBytes(value,offset,len)+" id:"+id);
         return id;
     }
 
     //id --> value
+    private boolean printstr = false;
+
     @Override
     protected T getValueFromIdImpl(int id) throws IllegalArgumentException {
         //System.out.println("here");
         byte[] data = getValueBytesFromIdImpl(id);
         if (data != null) {
+            if (!printstr) {
+                System.out.println("getValueFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(data, 0, data.length));
+                printstr = true;
+            }
             return bytesConvert.convertFromBytes(data, 0, data.length);
         } else {
             return null;
         }
     }
 
+    private boolean isPrintstr2 = false;
+
     @Override
     protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset)
             throws IllegalArgumentException {
@@ -174,6 +202,10 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
         //getValueIndexTime2.addAndGet(System.currentTimeMillis() - startTime);
         //startTime = System.currentTimeMillis();
         int size = tree.getValueBytesFromIdImpl(treeInnerOffset, returnValue, offset);
+        if (!isPrintstr2) {
+            isPrintstr2 = true;
+            System.out.println("getValueBytesFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(returnValue, offset, size));
+        }
         //getValueTime2.addAndGet(System.currentTimeMillis() - startTime);
         return size;
     }
@@ -200,18 +232,37 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     @Override
     public void dump(PrintStream out) {
+        out.println("TrieDictionaryForest");
+        out.println("baseId:" + baseId);
+        StringBuilder sb = new StringBuilder();
+        sb.append("value divide:");
+        for (ByteArray ba : valueDivide)
+            sb.append(bytesConvert.convertFromBytes(ba.array(), 0, ba.length()) + " ");
+        sb.append("\noffset divide:");
+        for (Integer offset : accuOffset)
+            sb.append(offset + " ");
+        out.println(sb.toString());
         for (int i = 0; i < trees.size(); i++) {
-            System.out.println("----tree " + i + "--------");
+            out.println("----tree " + i + "--------");
             trees.get(i).dump(out);
         }
     }
 
     @Override
     public void write(DataOutput out) throws IOException {
+        System.out.println("write dict");
         writeHead(out);
         writeBody(out);
     }
 
+    /*private int compare(T value1,T value2){
+        byte[] b1 = bytesConvert.convertToBytes(value1);
+        byte[] b2 = bytesConvert.convertToBytes(value2);
+        ByteArray ba1 = new ByteArray(b1,0,b1.length);
+        ByteArray ba2 = new ByteArray(b2,0,b2.length);
+        return ba1.compareTo(ba2);
+    }*/
+
     private void writeHead(DataOutput out) throws IOException {
         ByteArrayOutputStream byteBuf = new ByteArrayOutputStream();
         DataOutputStream headOut = new DataOutputStream(byteBuf);
@@ -248,6 +299,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     @Override
     public void readFields(DataInput in) throws IOException {
+        System.out.println("read dict");
         try {
             int headSize = in.readInt();
             this.baseId = in.readInt();
@@ -285,6 +337,21 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     }
 
+    /*@Override
+    public boolean equals(Object o) {
+        if ((o instanceof TrieDictionaryForest) == false) {
+            logger.info("Equals return false because it's not TrieDictionaryForest");
+            return false;
+        }
+        TrieDictionaryForest that = (TrieDictionaryForest) o;
+        if(this.trees.size() != that.getTrees().size())
+            return false;
+        for(int i=0;i<trees.size();i++){
+            if(!trees.get(i).equals(that.getTrees().get(i))) return false;
+        }
+        return true;
+    }*/
+
     @Override
     public boolean contains(Dictionary other) {
         if (other.getSize() > this.getSize()) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 3c03c08..5e2c346 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -17,6 +17,7 @@
 */
 package org.apache.kylin.dict;
 
+import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.ByteArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -26,7 +27,9 @@ import java.util.ArrayList;
 
 public class TrieDictionaryForestBuilder<T> {
 
-    public static int MaxTrieTreeSize = 1024 * 1024;//1M
+    public static int DEFAULT_MAX_TRIE_TREE_SIZE_MB = 500;
+
+    //public static int MaxTrieTreeSize = 1024;//1k
 
     private BytesConverter<T> bytesConverter;
 
@@ -48,6 +51,10 @@ public class TrieDictionaryForestBuilder<T> {
 
     private int curOffset;
 
+    private int maxTrieTreeSize;
+
+    private boolean isOrdered = true;
+
 
     public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter) {
         this(bytesConverter, 0);
@@ -58,9 +65,23 @@ public class TrieDictionaryForestBuilder<T> {
         this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter);
         this.baseId = baseId;
         curOffset = 0;
+        int maxTrieTreeSizeMB = getMaxTrieSizeInMB();
+        this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024;
+        logger.info("maxTrieSize is set to:" + maxTrieTreeSize + "B");
+        //System.out.println("max trie size:"+maxTrieTreeSize);
         //stringComparator = new ByteComparator<>(new StringBytesConverter());
     }
 
+    public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieTreeSizeMB) {
+        this.bytesConverter = bytesConverter;
+        this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter);
+        this.baseId = baseId;
+        curOffset = 0;
+        this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024;
+        logger.info("maxTrieSize is set to:" + maxTrieTreeSize + "B");
+    }
+
+
     public void addValue(T value) {
         if (value == null) return;
         byte[] valueBytes = bytesConverter.convertToBytes(value);
@@ -76,20 +97,25 @@ public class TrieDictionaryForestBuilder<T> {
     public void addValue(ByteArray value) {
         //System.out.println("value length:"+value.length);
         if (value == null) return;
+        //logger.info("going to add value:" + new String(value.array()));
         if (previousValue == null) {
             previousValue = value;
         } else {
             int comp = previousValue.compareTo(value);
-            if (comp == 0) return; //duplicate value
-            if (comp > 0) {
-                //logger.info("values not in ascending order");
+            if (comp == 0) {
+                //logger.info("find duplicate value:" + new String(value.array()));
+                return; //duplicate value
+            }
+            if (comp > 0 && isOrdered) {
+                logger.info("values not in ascending order:" + new String(value.array()));
+                isOrdered = false;
                 //System.out.println(".");
             }
         }
         this.trieBuilder.addValue(value.array());
         previousValue = value;
         this.curTreeSize += value.length();
-        if (curTreeSize >= MaxTrieTreeSize) {
+        if (curTreeSize >= this.maxTrieTreeSize) {
             TrieDictionary<T> tree = trieBuilder.build(0);
             addTree(tree);
             reset();
@@ -104,9 +130,33 @@ public class TrieDictionaryForestBuilder<T> {
         }
         TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees,
                 this.valueDivide, this.accuOffset, this.bytesConverter, baseId);
+
+        //log
+        logger.info("tree num:" + forest.getTrees().size());
+        StringBuilder sb = new StringBuilder();
+        for (ByteArray ba : valueDivide) {
+            sb.append(new String(ba.array()) + " ");
+        }
+        logger.info("value divide:" + sb.toString());
+        /*
+        If input values are not in ascending order and tree num>1,TrieDictionaryForest can not work correctly.
+         */
+        if (forest.getTrees().size() > 1 && !isOrdered) {
+            throw new IllegalStateException("Invalid input data.Unordered data can not be split into multi trees");
+        }
+
         return forest;
     }
 
+    public int getMaxTrieTreeSize() {
+        return maxTrieTreeSize;
+    }
+
+    public void setMaxTrieTreeSize(int maxTrieTreeSize) {
+        this.maxTrieTreeSize = maxTrieTreeSize;
+        logger.info("maxTrieSize is set to:" + maxTrieTreeSize + "B");
+    }
+
     private void addTree(TrieDictionary<T> tree) {
         trees.add(tree);
         int minId = tree.getMinId();
@@ -122,4 +172,20 @@ public class TrieDictionaryForestBuilder<T> {
         trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter);
     }
 
+    public static int getMaxTrieSizeInMB() {
+        KylinConfig config = null;
+        try {
+            config = KylinConfig.getInstanceFromEnv();
+        } catch (RuntimeException e) {
+            logger.info("can not get KylinConfig from env.Use default setting:" + DEFAULT_MAX_TRIE_TREE_SIZE_MB + "MB");
+        }
+        int maxTrieTreeSizeMB;
+        if (config != null) {
+            maxTrieTreeSizeMB = config.getTrieDictionaryForestMaxTrieSizeMB();
+        } else {
+            maxTrieTreeSizeMB = DEFAULT_MAX_TRIE_TREE_SIZE_MB;
+        }
+        return maxTrieTreeSizeMB;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
index db1a170..34b326a 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
@@ -51,6 +51,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
 @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE)
 public class SnapshotTable extends RootPersistentEntity implements ReadableTable {
 
+
     @JsonProperty("tableName")
     private String tableName;
     @JsonProperty("signature")
@@ -58,6 +59,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
     @JsonProperty("useDictionary")
     private boolean useDictionary;
 
+
     private ArrayList<int[]> rowIndices;
     private Dictionary<String> dict;
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
index d98b938..a9c4980 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
@@ -16,7 +16,7 @@
  * limitations under the License.
 */
 
-package org.apache.kylin.dict;
+/*package org.apache.kylin.dict;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -42,7 +42,7 @@ import com.google.common.collect.Sets;
 
 /**
  */
-public class NumberDictionaryTest extends LocalFileMetadataTestCase {
+/*public class NumberDictionaryTest extends LocalFileMetadataTestCase {
 
     NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT);
     Random rand = new Random();
@@ -207,4 +207,4 @@ public class NumberDictionaryTest extends LocalFileMetadataTestCase {
         return buf.toString();
     }
 
-}
+}*/

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index 81cba64..3def7e0 100755
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -20,6 +20,7 @@
 package org.apache.kylin.dict;
 
 
+import org.apache.kylin.common.util.Array;
 import org.apache.kylin.common.util.MemoryBudgetController;
 import org.junit.Ignore;
 import org.junit.Test;
@@ -88,10 +89,27 @@ public class TrieDictionaryForestTest {
         System.out.println("test ok");
     }
 
-    public void duplicateDataTest() {
-        //todo
+    @Test
+    public void testNullValue(){
+        //encounter null value when building dictionary
+        ArrayList<String> strs = new ArrayList<String>();
+        strs.add(null);
+        strs.add("abc");
+        System.out.println(strs);
+        int maxTreeSize = 0;
+        TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize);
+        TrieDictionaryForest<String> dict = builder.build();
+        dict.dump(System.out);
+        //null value query
+        int id = dict.getIdFromValue(null,0);
+        System.out.println(id);
+        id = dict.getIdFromValue(null,1);
+        System.out.println(id);
+        id = dict.getIdFromValue(null,-1);
+        System.out.println(id);
     }
 
+
     @Test
     public void testBigDataSet() {
         //h=generate data
@@ -245,6 +263,274 @@ public class TrieDictionaryForestTest {
     }
 
 
+    @Test
+    public void roundingFlagTest(){
+        ArrayList<String> testData = new ArrayList<>();
+        testData.add("b");
+        testData.add("bdd");
+        testData.add("ccc");
+        int baseId = 10;
+        TrieDictionaryForestBuilder<String> b = TrieDictionaryForestTest.newDictBuilder(testData,baseId, 0);
+        TrieDictionaryForest<String> dict = b.build();
+
+        //left
+        String smallerStr = "a";
+        int id;
+        try{
+            id = dict.getIdFromValue(smallerStr,0);
+            fail("should throw IllegalArgumentException,but id is:"+id);
+        }catch (IllegalArgumentException e){
+            //correct
+        }
+        try{
+            id = dict.getIdFromValue(smallerStr,-1);
+            fail("should throw IllegalArgumentException,but id is:"+id);
+        }catch (IllegalArgumentException e){
+            //correct
+        }
+        id = dict.getIdFromValue(smallerStr,1);
+        assertEquals(baseId,id);
+
+        //middle
+        String middleStr = "bd";
+        try{
+            id = dict.getIdFromValue(middleStr,0);
+            fail("should throw IllegalArgumentException,but id is:"+id);
+        }catch (IllegalArgumentException e){
+            //correct
+        }
+        id = dict.getIdFromValue(middleStr,-1);
+        assertEquals(baseId,id);
+        id = dict.getIdFromValue(middleStr,1);
+        assertEquals(baseId+1,id);
+
+        //right
+        String rightStr = "e";
+        try{
+            id = dict.getIdFromValue(rightStr,0);
+            fail("should throw IllegalArgumentException,but id is:"+id);
+        }catch (IllegalArgumentException e){
+            //correct
+        }
+        id = dict.getIdFromValue(rightStr,-1);
+        assertEquals(baseId+2,id);
+        try{
+            id = dict.getIdFromValue(rightStr,1);
+            fail("should throw IllegalArgumentException,but id is:"+id);
+        }catch (IllegalArgumentException e){
+            //correct
+        }
+    }
+
+    @Test
+    public void stringDictRoundFlagTest(){
+        TreeSet<String> set = new TreeSet<>(new ByteComparator<>(new StringBytesConverter()));
+        Iterator<String> it = new RandomStrings(10*10000).iterator();
+        int size = 0;
+        while(it.hasNext()){
+            BytesConverter converter = new StringBytesConverter();
+            String str = it.next();
+            set.add(str);
+            size += converter.convertToBytes(str).length;
+        }
+        int treeNum = 5;
+        TrieDictionaryForestBuilder<String> builder = newDictBuilder(set.iterator(),0,size / treeNum);
+        TrieDictionaryForest<String> dict = builder.build();
+        //dict.dump(System.out);
+
+        //test roundingFlag > 0
+        Iterator<String> it2 = new RandomStrings(100*10000).iterator();
+        while(it2.hasNext()){
+            String query = it2.next();
+            //System.out.println("query:"+query);
+            try {
+                int id = dict.getIdFromValue(query, 1);
+                assertEquals(set.ceiling(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.ceiling(query));
+            }
+        }
+
+        //test roundingFlag < 0
+        Iterator<String> it3 = new RandomStrings(100*10000).iterator();
+        while(it3.hasNext()){
+            String query = it3.next();
+            try {
+                int id = dict.getIdFromValue(query, -1);
+                assertEquals(set.floor(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.floor(query));
+            }
+        }
+
+    }
+
+    @Test
+    public void longDictRoundingFlagTest(){
+        TreeSet<String> set = new TreeSet<>(new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                try{
+                    Long l1 = Long.parseLong(o1);
+                    Long l2 = Long.parseLong(o2);
+                    return l1.compareTo(l2);
+                }catch(NumberFormatException e){
+                    e.printStackTrace();
+                    return 0;
+                }
+            }
+        });
+        int num = 10 * 10000;
+        int k = -48481;
+        int size = 0;
+        StringBytesConverter converter = new StringBytesConverter();
+        for(int i=0;i<num;i++)
+        {
+            String value = k+"";
+            set.add(value);
+            k += 1;
+            String basic = "-9999999999999952517";
+            size += converter.convertToBytes(basic).length;
+        }
+        System.out.println("tree num:"+size);
+        int treeNum = 5;
+        //TrieDictionaryForestBuilder<String> builder = newDictBuilder(set.iterator(),0,size / treeNum);
+        //TrieDictionaryForest<String> dict = builder.build();
+        NumberDictionaryForestBuilder<String> builder = new NumberDictionaryForestBuilder<String>(new StringBytesConverter(),0);
+        builder.setMaxTrieSize(size / treeNum);
+        Iterator<String> it = set.iterator();
+        while(it.hasNext())
+            builder.addValue(it.next());
+        NumberDictionaryForest<String> dict = builder.build();
+        System.out.println(dict.getTreeSize());
+
+        int testTimes = 100 * 10000;
+        Random rand = new Random(System.currentTimeMillis());
+        //test roundingFlag > 0
+        for(int i=0;i<testTimes;i++)
+        {
+            String query = rand.nextInt(2*num)+"";
+            try {
+                int id = dict.getIdFromValue(query, 1);
+                assertEquals(set.ceiling(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.ceiling(query));
+            }
+        }
+
+
+        //test roundingFlag < 0
+        for(int i=0;i<testTimes;i++)
+        {
+            String query = rand.nextInt(2*num)+"";
+            try {
+                int id = dict.getIdFromValue(query, -1);
+                assertEquals(set.floor(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.floor(query));
+            }
+        }
+    }
+
+    /*
+    can not pass cases like 1.7695564055819624E-4
+     */
+    @Ignore
+    @Test
+    public void doubleDictRoundingFlagTest(){
+        TreeSet<String> set = new TreeSet<>(new Comparator<String>() {
+            @Override
+            public int compare(String o1, String o2) {
+                try{
+                    Double d1 = Double.parseDouble(o1);
+                    Double d2 = Double.parseDouble(o2);
+                    return d1.compareTo(d2);
+                }catch(NumberFormatException e){
+                    e.printStackTrace();
+                    return 0;
+                }
+            }
+        });
+        int num = 1000000;
+        double k = -0.0;
+        int size = 0;
+        StringBytesConverter converter = new StringBytesConverter();
+        for(int i=0;i<num;i++)
+        {
+            String value = k+"";
+            set.add(value);
+            k += 1.55;
+            String basic = "-9999999999999952517";
+            size += converter.convertToBytes(basic).length;
+        }
+        int treeNum = 5;
+        //TrieDictionaryForestBuilder<String> builder = newDictBuilder(set.iterator(),0,size / treeNum);
+        //TrieDictionaryForest<String> dict = builder.build();
+        NumberDictionaryForestBuilder<String> builder = new NumberDictionaryForestBuilder<String>(new StringBytesConverter(),0);
+        builder.setMaxTrieSize(size / treeNum);
+        Iterator<String> it = set.iterator();
+        while(it.hasNext()){
+            String str = it.next();
+            if(str.contains("E")){
+                set.remove(str);
+            }
+            else{
+                builder.addValue(str);
+            }
+        }
+
+        NumberDictionaryForest<String> dict = builder.build();
+        System.out.println("tree size:"+dict.getTreeSize());
+        System.out.println("--------------dict-----------------");
+        dict.dump(System.out);
+        System.out.println("--------------set-------------------");
+        System.out.println(set);
+
+        //test special value
+        String query1 = "183.82499999999996";
+        int id1 = dict.getIdFromValue(query1,1);
+        String actualValue = dict.getValueFromId(id1);
+        //System.out.println("id:"+id1+"  value:"+actualValue);
+        //System.out.println(set.ceiling(query1));
+
+        //dict.dump(System.out);
+        int testTimes = 1000000;
+        double queryBasic = -145.355;
+        //test roundingFlag > 0
+        for(int i=0;i<testTimes;i++)
+        {
+            String query = queryBasic+"";
+            //System.out.println("query:"+query);
+            queryBasic += 1.51;
+            if(query.contains("E"))
+                continue;
+            try {
+                int id = dict.getIdFromValue(query, 1);
+                assertEquals(set.ceiling(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.ceiling(query));
+            }
+        }
+
+
+        //test roundingFlag < 0
+        queryBasic = -551.3588;
+        for(int i=0;i<testTimes;i++)
+        {
+            String query = queryBasic+"";
+            queryBasic += 1.0;
+            if(query.contains("E"))
+                continue;
+            try {
+                int id = dict.getIdFromValue(query, -1);
+                assertEquals(set.floor(query),dict.getValueFromId(id));
+            }catch(IllegalArgumentException e){
+                assertNull(set.floor(query));
+            }
+        }
+    }
+
+
     private static TrieDictionaryForest<String> testSerialize(TrieDictionaryForest<String> dict) {
         try {
             ByteArrayOutputStream bout = new ByteArrayOutputStream();
@@ -277,35 +563,54 @@ public class TrieDictionaryForestTest {
 
     }*/
 
-    //benchmark
-    @Deprecated
-    public void memoryUsageBenchmarkTest() throws Exception {
-        //create data
-        ArrayList<String> testData = getTestData((int) (Integer.MAX_VALUE * 0.8 / 640));
-        int testTimes = 1;
-        System.out.println("start memory:" + Runtime.getRuntime().maxMemory());
-        System.out.println("start memory:" + Runtime.getRuntime().totalMemory());
-        for (int i = 0; i < testTimes; i++) {
-            long start = MemoryBudgetController.gcAndGetSystemAvailMB();
-            TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<>(new StringBytesConverter());
-            for (String str : testData)
-                b.addValue(str);
-            long end = MemoryBudgetController.gcAndGetSystemAvailMB();
-            System.out.println("object trie memory usage:" + (end - start) + "MB");
-            System.out.println("start memory:" + Runtime.getRuntime().maxMemory());
-            System.out.println("start memory:" + Runtime.getRuntime().totalMemory());
-            /*System.out.println(b == null);
-            startMemUse = getSystemCurUsedMemory();
-            TrieDictionary<String> dict = b.build(0);
-            memUse = getSystemCurUsedMemory();
-            System.out.println("array trie memory usage:"+(memUse-startMemUse)/(1024*1024)+"MB");
-            System.out.println(b == null );
-            System.out.println(dict == null);*/
+    /*
+    add value to the Dictionary until encouter OOM error
+     */
+    @Ignore
+    @Test
+    public void memoryUsageBenchmarkOldDictTest() throws Exception {
+        System.out.println("max memory:"+Runtime.getRuntime().maxMemory());
+        System.gc();
+        Thread.currentThread().sleep(1000);
+        NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<>(new StringBytesConverter());
+        int k = 0;
+        while(true){
+            b.addValue(k+"");
+            if(k%100000 == 0)
+                System.out.println(k);
+            k++;
         }
+        //memroy:1908932608  entry:17500000
+    }
 
-
+    @Ignore
+    @Test
+    public void memoryUsageBenchmarkNewDictForestTest() throws Exception {
+        System.out.println("max memory:"+Runtime.getRuntime().maxMemory());
+        System.gc();
+        Thread.currentThread().sleep(3000);
+        NumberDictionaryForestBuilder<String> b = new NumberDictionaryForestBuilder<>(new StringBytesConverter(),0,0);
+        int k = 0;
+        while(true){
+            b.addValue(k+"");
+            if(k%100000 == 0)
+                System.out.println(k);
+            k++;
+        }
+        /*
+        memory:1908932608(1800MB)
+        maxTrieSize:500M  entry:17500000
+        maxTrieSize:180M  entry:47100000
+        maxTrieSize:100M  entry:83800000
+        maxTrieSize:50M  entry:128400000
+        maxTrieSize:25M  entry:148100000
+        maxTrieSize:0M  entry: 5000000
+
+        5-8
+         */
     }
 
+
     @Deprecated
     private long getSystemCurUsedMemory() throws Exception {
         System.gc();
@@ -559,21 +864,29 @@ public class TrieDictionaryForestTest {
         return result;
     }
 
-    private static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId) {
+    public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId) {
         TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId);
         for (String s : strs)
             b.addValue(s);
         return b;
     }
 
-    private static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId, int treeSize) {
+    public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId, int treeSize) {
         TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId);
-        TrieDictionaryForestBuilder.MaxTrieTreeSize = treeSize;
+        b.setMaxTrieTreeSize(treeSize);
         for (String s : strs)
             b.addValue(s);
         return b;
     }
 
+    public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterator<String> strs, int baseId, int treeSize) {
+        TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId);
+        b.setMaxTrieTreeSize(treeSize);
+        while(strs.hasNext())
+            b.addValue(strs.next());
+        return b;
+    }
+
     private static class RandomStrings implements Iterable<String> {
         final private int size;
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
index e4b32db..25d6ae2 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
@@ -25,8 +25,10 @@ import java.util.Set;
 
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.DateFormat;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.dict.TrieDictionaryForest;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.TableDesc;
 import org.junit.After;
@@ -107,6 +109,13 @@ public class LookupTableTest extends LocalFileMetadataTestCase {
         }
     }
 
+    @Test
+    public void testGetClassName(){
+        String name = TrieDictionaryForest.class.getName();
+        System.out.println(name);
+
+    }
+
     private String millis(String dateStr) {
         return String.valueOf(DateFormat.stringToMillis(dateStr));
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
new file mode 100644
index 0000000..6af35d2
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.StringSplitter;
+import org.apache.kylin.source.ReadableTable.TableReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * only use for reading output file of FactDistinctColumnsJob2
+ */
+public class DFSFileTableSortedReader implements TableReader {
+
+    private static final Logger logger = LoggerFactory.getLogger(DFSFileTableSortedReader.class);
+    private static final char CSV_QUOTE = '"';
+    private static final String[] DETECT_DELIMS = new String[] { "\177", "|", "\t", "," };
+
+    private String filePath;
+    private String delim;
+    private List<RowReader> readerList;
+
+    private String curLine;
+    private String[] curColumns;
+    private int expectedColumnNumber = -1; // helps delimiter detection
+
+    public DFSFileTableSortedReader(String filePath, int expectedColumnNumber) throws IOException {
+        this(filePath, DFSFileTable.DELIM_AUTO, expectedColumnNumber);
+    }
+
+    public DFSFileTableSortedReader(String filePath, String delim, int expectedColumnNumber) throws IOException {
+        filePath = HadoopUtil.fixWindowsPath(filePath);
+        this.filePath = filePath;
+        this.delim = delim;
+        this.expectedColumnNumber = expectedColumnNumber;
+        this.readerList = new ArrayList<RowReader>();
+
+        FileSystem fs = HadoopUtil.getFileSystem(filePath);
+
+        ArrayList<FileStatus> allFiles = new ArrayList<>();
+        FileStatus status = fs.getFileStatus(new Path(filePath));
+        if (status.isFile()) {
+            allFiles.add(status);
+        } else {
+            FileStatus[] listStatus = fs.listStatus(new Path(filePath));
+            allFiles.addAll(Arrays.asList(listStatus));
+        }
+
+        try {
+            for (FileStatus f : allFiles) {
+                RowReader rowReader = new SeqRowReader(HadoopUtil.getCurrentConfiguration(), fs, f.getPath().toString());
+                this.readerList.add(rowReader);
+            }
+        } catch (IOException e) {
+            if (isExceptionSayingNotSeqFile(e) == false)
+                throw e;
+
+            this.readerList = new ArrayList<RowReader>();
+            for (FileStatus f : allFiles) {
+                RowReader rowReader = new CsvRowReader(fs, f.getPath().toString());
+                this.readerList.add(rowReader);
+            }
+        }
+    }
+
+    private boolean isExceptionSayingNotSeqFile(IOException e) {
+        if (e.getMessage() != null && e.getMessage().contains("not a SequenceFile"))
+            return true;
+
+        if (e instanceof EOFException) // in case the file is very very small
+            return true;
+
+        return false;
+    }
+
+    @Override
+    public boolean next() throws IOException {
+        int curReaderIndex = -1;
+        RowReader curReader;
+
+        while (++curReaderIndex < readerList.size()) {
+            curReader = readerList.get(curReaderIndex);
+            curLine = curReader.nextLine();
+            curColumns = null;
+
+            if (curLine != null) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    public String getLine() {
+        return curLine;
+    }
+
+    @Override
+    public String[] getRow() {
+        if (curColumns == null) {
+            if (DFSFileTable.DELIM_AUTO.equals(delim))
+                delim = autoDetectDelim(curLine);
+
+            if (delim == null)
+                curColumns = new String[] { curLine };
+            else
+                curColumns = split(curLine, delim);
+        }
+        return curColumns;
+    }
+
+    private String[] split(String line, String delim) {
+        // FIXME CVS line should be parsed considering escapes
+        String[] str = StringSplitter.split(line, delim);
+
+        // un-escape CSV
+        if (DFSFileTable.DELIM_COMMA.equals(delim)) {
+            for (int i = 0; i < str.length; i++) {
+                str[i] = unescapeCsv(str[i]);
+            }
+        }
+
+        return str;
+    }
+
+    private String unescapeCsv(String str) {
+        if (str == null || str.length() < 2)
+            return str;
+
+        str = StringEscapeUtils.unescapeCsv(str);
+
+        // unescapeCsv may not remove the outer most quotes
+        if (str.charAt(0) == CSV_QUOTE && str.charAt(str.length() - 1) == CSV_QUOTE)
+            str = str.substring(1, str.length() - 1);
+
+        return str;
+    }
+
+    @Override
+    public void close() {
+        for (RowReader reader : readerList) {
+            IOUtils.closeQuietly(reader);
+        }
+    }
+
+    private String autoDetectDelim(String line) {
+        if (expectedColumnNumber > 0) {
+            for (String delim : DETECT_DELIMS) {
+                if (StringSplitter.split(line, delim).length == expectedColumnNumber) {
+                    logger.info("Auto detect delim to be '" + delim + "', split line to " + expectedColumnNumber + " columns -- " + line);
+                    return delim;
+                }
+            }
+        }
+
+        logger.info("Auto detect delim to be null, will take THE-WHOLE-LINE as a single value, for " + filePath);
+        return null;
+    }
+
+    // ============================================================================
+
+    private interface RowReader extends Closeable {
+        String nextLine() throws IOException; // return null on EOF
+    }
+
+    private class SeqRowReader implements RowReader {
+        Reader reader;
+        Writable key;
+        Text value;
+
+        SeqRowReader(Configuration hconf, FileSystem fs, String path) throws IOException {
+            reader = new Reader(hconf, Reader.file(new Path(path)));
+            key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
+            value = new Text();
+        }
+
+        @Override
+        public String nextLine() throws IOException {
+            boolean hasNext = reader.next(key, value);
+            if (hasNext)
+                return Bytes.toString(value.getBytes(), 0, value.getLength());
+            else
+                return null;
+        }
+
+        @Override
+        public void close() throws IOException {
+            reader.close();
+        }
+    }
+
+    private class CsvRowReader implements RowReader {
+        BufferedReader reader;
+
+        CsvRowReader(FileSystem fs, String path) throws IOException {
+            FSDataInputStream in = fs.open(new Path(path));
+            reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+        }
+
+        @Override
+        public String nextLine() throws IOException {
+            return reader.readLine();
+        }
+
+        @Override
+        public void close() throws IOException {
+            reader.close();
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
index 47eb9c3..9bb867b 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
@@ -27,10 +27,10 @@ import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
 import org.apache.kylin.engine.mr.common.MapReduceExecutable;
 import org.apache.kylin.engine.mr.steps.CreateDictionaryJob;
 import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
-import org.apache.kylin.engine.mr.steps.FactDistinctColumnsJob;
 import org.apache.kylin.engine.mr.steps.MergeDictionaryStep;
 import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterBuildStep;
 import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterMergeStep;
+import org.apache.kylin.engine.mr.steps.fdc2.FactDistinctColumnsJob2;
 import org.apache.kylin.job.constant.ExecutableConstants;
 import org.apache.kylin.job.engine.JobEngineConfig;
 
@@ -63,7 +63,7 @@ public class JobBuilderSupport {
     private MapReduceExecutable createFactDistinctColumnsStep(String jobId, boolean withStats) {
         MapReduceExecutable result = new MapReduceExecutable();
         result.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
-        result.setMapReduceJobClass(FactDistinctColumnsJob.class);
+        result.setMapReduceJobClass(FactDistinctColumnsJob2.class);
         StringBuilder cmd = new StringBuilder();
         appendMapReduceParameters(cmd);
         appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsCombiner2.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsCombiner2.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsCombiner2.java
index 6652f4e..289edd0 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsCombiner2.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsCombiner2.java
@@ -26,7 +26,7 @@ import java.io.IOException;
 /**
  * @author yangli9
  */
-public class FactDistinctColumnsCombiner2 extends KylinReducer<SelfDefineSortableKey, Text, Text, Text> {
+public class FactDistinctColumnsCombiner2 extends KylinReducer<SelfDefineSortableKey, Text, SelfDefineSortableKey, Text> {
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -36,9 +36,10 @@ public class FactDistinctColumnsCombiner2 extends KylinReducer<SelfDefineSortabl
     @Override
     public void doReduce(SelfDefineSortableKey key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
 
+
         // for hll, each key only has one output, no need to do local combine;
         // for normal col, values are empty text
-        context.write(key.getText(), values.iterator().next());
+        context.write(key, values.iterator().next());
     }
 
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsJob2.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsJob2.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsJob2.java
index 4d26402..2e84f45 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsJob2.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsJob2.java
@@ -34,7 +34,6 @@ import org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat;
 import org.apache.kylin.engine.mr.MRUtil;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
-import org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -127,7 +126,7 @@ public class FactDistinctColumnsJob2 extends AbstractHadoopJob {
     }
 
     private void setupReducer(Path output, int numberOfReducers) throws IOException {
-        job.setReducerClass(FactDistinctColumnsReducer.class);  //reducer do not need to change
+        job.setReducerClass(FactDistinctColumnsReducer2.class);
         job.setOutputFormatClass(SequenceFileOutputFormat.class);
         job.setOutputKeyClass(NullWritable.class);
         job.setOutputValueClass(Text.class);

http://git-wip-us.apache.org/repos/asf/kylin/blob/734a4f98/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsMapperBase2.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsMapperBase2.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsMapperBase2.java
index 6238d22..037afeb 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsMapperBase2.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/fdc2/FactDistinctColumnsMapperBase2.java
@@ -41,6 +41,7 @@ import java.util.List;
 
 /**
  */
+
 abstract public class FactDistinctColumnsMapperBase2<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, SelfDefineSortableKey, Text> {
 
     protected String cubeName;


[3/3] kylin git commit: KYLIN-1851 code review format

Posted by li...@apache.org.
KYLIN-1851 code review format


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/350547e6
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/350547e6
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/350547e6

Branch: refs/heads/master
Commit: 350547e6ec6634008a4d07d771822f81acc2bcbe
Parents: 734a4f9
Author: Li Yang <li...@apache.org>
Authored: Wed Nov 16 18:46:49 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Wed Nov 16 18:46:49 2016 +0800

----------------------------------------------------------------------
 build/conf/kylin.properties                     |   3 +-
 .../apache/kylin/common/KylinConfigBase.java    |   4 +-
 .../org/apache/kylin/cube/model/CubeDesc.java   |   4 +-
 .../org/apache/kylin/cube/CubeDescTest.java     |  28 +--
 .../apache/kylin/dict/DictionaryGenerator.java  |  10 +-
 .../apache/kylin/dict/DictionaryManager.java    |   3 -
 .../dict/NumberDictionaryForestBuilder.java     |   7 +-
 .../org/apache/kylin/dict/TrieDictionary.java   |   6 -
 .../apache/kylin/dict/TrieDictionaryForest.java |  72 +-----
 .../kylin/dict/TrieDictionaryForestBuilder.java |  10 +-
 .../apache/kylin/dict/NumberDictionaryTest.java |   8 +-
 .../engine/mr/DFSFileTableSortedReader.java     | 249 -------------------
 .../mr/steps/NumberDictionaryForestTest.java    |  25 +-
 .../test_case_data/sandbox/kylin.properties     |   9 +-
 .../org/apache/kylin/query/KylinTestBase.java   |   3 -
 15 files changed, 54 insertions(+), 387 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/build/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index 715b7a6..3b50c12 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -118,6 +118,7 @@ kylin.job.mapreduce.mapper.input.rows=1000000
 
 kylin.job.step.timeout=7200
 
+
 ### CUBE ###
 
 # 'auto', 'inmem', 'layer' or 'random' for testing
@@ -131,8 +132,6 @@ kylin.dictionary.max.cardinality=5000000
 
 kylin.table.snapshot.max_mb=300
 
-#max size for one trie in TrieDictionaryForest (default 500MB)
-
 
 
 ### QUERY ###

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 300f727..c7dd8a8 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -210,7 +210,7 @@ abstract public class KylinConfigBase implements Serializable {
 
     public String[] getRealizationProviders() {
         return getOptionalStringArray("kylin.realization.providers", //
-                new String[]{"org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager"});
+                new String[] {"org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager"});
     }
 
     public CliCommandExecutor getCliCommandExecutor() throws IOException {
@@ -591,7 +591,7 @@ abstract public class KylinConfigBase implements Serializable {
     }
 
     public int[] getQueryMetricsPercentilesIntervals() {
-        String[] dft = {"60", "300", "3600"};
+        String[] dft = { "60", "300", "3600" };
         return getOptionalIntArray("kylin.query.metrics.percentiles.intervals", dft);
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index c9ebff8..7dad87b 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -755,7 +755,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
                     int find = ArrayUtils.indexOf(dimColArray, fk[i]);
                     if (find >= 0) {
                         TblColRef derivedCol = initDimensionColRef(pk[i]);
-                        initDerivedMap(new TblColRef[]{dimColArray[find]}, DeriveType.PK_FK, dim, new TblColRef[]{derivedCol}, null);
+                        initDerivedMap(new TblColRef[] { dimColArray[find] }, DeriveType.PK_FK, dim, new TblColRef[] { derivedCol }, null);
                     }
                 }
             }
@@ -776,7 +776,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
                 extra[i] = "";
             }
         }
-        return new String[][]{cols, extra};
+        return new String[][] { cols, extra };
     }
 
     private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
index 3326b24..9ad6427 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
@@ -89,7 +89,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit3() throws Exception {
         thrown.expect(IllegalStateException.class);
-        thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[]{"SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD"}));
+        thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[] { "SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD" }));
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
         String[] temp = Arrays.asList(cubeDesc.getAggregationGroups().get(0).getIncludes()).subList(0, 3).toArray(new String[3]);
         cubeDesc.getAggregationGroups().get(0).setIncludes(temp);
@@ -114,7 +114,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit5() throws Exception {
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "META_CATEG_NAME"};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "META_CATEG_NAME" };
 
         cubeDesc.init(getTestConfig());
     }
@@ -122,7 +122,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     @Test
     public void testBadInit6() throws Exception {
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "lstg_format_name"};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "lstg_format_name" };
 
         cubeDesc.init(getTestConfig());
     }
@@ -133,43 +133,43 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
         thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a joint");
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name" } };
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit8() throws Exception {
-        String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"};
+        String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" };
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } };
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit9() throws Exception {
-        String[] strs = new String[]{"lstg_format_name", "META_CATEG_NAME"};
+        String[] strs = new String[] { "lstg_format_name", "META_CATEG_NAME" };
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs));
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"lstg_format_name", "lstg_site_id"}};
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "lstg_format_name"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "lstg_format_name", "lstg_site_id" } };
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "lstg_format_name" } };
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit10() throws Exception {
-        String[] strs = new String[]{"lstg_format_name", "lstg_site_id"};
+        String[] strs = new String[] { "lstg_format_name", "lstg_site_id" };
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 a dimension exist in more than one joint: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name", "lstg_site_id", "slr_segment_cd"}, new String[]{"lstg_format_name", "lstg_site_id", "leaf_categ_id"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name", "lstg_site_id", "slr_segment_cd" }, new String[] { "lstg_format_name", "lstg_site_id", "leaf_categ_id" } };
 
         cubeDesc.init(getTestConfig());
     }
@@ -180,19 +180,19 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
         thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a hierarchy.");
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME" } };
 
         cubeDesc.init(getTestConfig());
     }
 
     @Test
     public void testBadInit12() throws Exception {
-        String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"};
+        String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" };
         thrown.expect(IllegalStateException.class);
         thrown.expectMessage("Aggregation group 0 a dimension exist in more than one hierarchy: " + sortStrs(strs));
 
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc");
-        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}};
+        cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } };
 
         cubeDesc.init(getTestConfig());
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 8695338..8eafe5f 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -36,12 +36,12 @@ import com.google.common.base.Preconditions;
 /**
  * @author yangli9
  */
-@SuppressWarnings({"rawtypes", "unchecked"})
+@SuppressWarnings({ "rawtypes", "unchecked" })
 public class DictionaryGenerator {
 
     private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class);
 
-    private static final String[] DATE_PATTERNS = new String[]{"yyyy-MM-dd", "yyyyMMdd"};
+    private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
 
     public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
         Preconditions.checkNotNull(dataType, "dataType cannot be null");
@@ -138,7 +138,6 @@ public class DictionaryGenerator {
         @Override
         public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
             int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB();
-            //TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
             TrieDictionaryForestBuilder builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB);
             byte[] value;
             while (valueEnumerator.moveNext()) {
@@ -151,15 +150,13 @@ public class DictionaryGenerator {
                     returnSamples.add(v);
             }
             return builder.build();
-            //return builder.build(baseId);
         }
     }
 
     private static class NumberDictBuilder implements IDictionaryBuilder {
         @Override
         public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
-            int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB();
-            NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB);
+            NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(new StringBytesConverter(), baseId);
             byte[] value;
             while (valueEnumerator.moveNext()) {
                 value = valueEnumerator.current();
@@ -177,5 +174,4 @@ public class DictionaryGenerator {
         }
     }
 
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index b8d039e..2dd5085 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -418,9 +418,6 @@ public class DictionaryManager {
 
         logger.info("DictionaryManager(" + System.identityHashCode(this) + ") loading DictionaryInfo(loadDictObj:" + loadDictObj + ") at " + resourcePath);
         DictionaryInfo info = store.getResource(resourcePath, DictionaryInfo.class, loadDictObj ? DictionaryInfoSerializer.FULL_SERIALIZER : DictionaryInfoSerializer.INFO_SERIALIZER);
-        //info.dictionaryObject.dump(System.out);
-        //        if (loadDictObj)
-        //            logger.debug("Loaded dictionary at " + resourcePath);
 
         return info;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
index c997ce1..519d4c3 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
@@ -29,8 +29,7 @@ public class NumberDictionaryForestBuilder<T> {
 
     private BytesConverter<T> bytesConverter;
 
-    private NumberDictionaryForest.NumberBytesCodec codec = new NumberDictionaryForest.NumberBytesCodec(
-            NumberDictionaryForest.MAX_DIGITS_BEFORE_DECIMAL_POINT);
+    private NumberDictionaryForest.NumberBytesCodec codec = new NumberDictionaryForest.NumberBytesCodec(NumberDictionaryForest.MAX_DIGITS_BEFORE_DECIMAL_POINT);
 
     public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter) {
         this(bytesConverter, 0);
@@ -50,8 +49,6 @@ public class NumberDictionaryForestBuilder<T> {
         addValue(bytesConverter.convertToBytes(value));
     }
 
-
-
     public void addValue(byte[] value) {
         codec.encodeNumber(value, 0, value.length);
         byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
@@ -64,7 +61,7 @@ public class NumberDictionaryForestBuilder<T> {
         return new NumberDictionaryForest<T>(forest, bytesConverter);
     }
 
-    public void setMaxTrieSize(int size){
+    public void setMaxTrieSize(int size) {
         this.trieBuilder.setMaxTrieTreeSize(size);
     }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index a5e3d36..c099de0 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@ -126,7 +126,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
             else
                 throw new RuntimeException(e);
         }
-        //this.enableValueCache = false;
         if (enableValueCache) {
             valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
             idToValueCache = new SoftReference<Object[]>(new Object[nValues]);
@@ -156,7 +155,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
     @Override
     final protected int getIdFromValueImpl(T value, int roundingFlag) {
         if (enableValueCache && roundingFlag == 0) {
-            //System.out.println("use id cache");
             Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory
             if (cache != null) {
                 Integer id = null;
@@ -171,7 +169,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
                 return id;
             }
         }
-        //System.out.println("not use id cache");
         byte[] valueBytes = bytesConvert.convertToBytes(value);
         return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
     }
@@ -273,7 +270,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
     @Override
     final protected T getValueFromIdImpl(int id) {
         if (enableValueCache) {
-            //System.out.println("use value cache");
             Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory
             if (cache != null) {
                 int seq = calcSeqNoFromId(id);
@@ -288,10 +284,8 @@ public class TrieDictionary<T> extends Dictionary<T> {
                 return result;
             }
         }
-        //System.out.println("not use value cache");
         byte[] value = new byte[getSizeOfValue()];
         int length = getValueBytesFromId(id, value, 0);
-        //System.out.println("get value by id:"+id+" value:"+bytesConvert.convertFromBytes(value, 0, length).toString());
         return bytesConvert.convertFromBytes(value, 0, length);
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index b0440db..38cd0dc 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -18,14 +18,6 @@
 package org.apache.kylin.dict;
 
 
-import org.apache.kylin.common.util.ByteArray;
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.common.util.BytesUtil;
-import org.apache.kylin.common.util.ClassUtil;
-import org.apache.kylin.common.util.Dictionary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -37,6 +29,12 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.Dictionary;
+
 
 /**
  * use trie forest to optimize trie dictionary
@@ -46,8 +44,7 @@ import java.util.List;
  * Created by xiefan on 16-10-26.
  */
 public class TrieDictionaryForest<T> extends Dictionary<T> {
-
-    private static final Logger logger = LoggerFactory.getLogger(TrieDictionaryForest.class);
+    private static final long serialVersionUID = 1L;
 
     private ArrayList<TrieDictionary<T>> trees;
 
@@ -168,30 +165,19 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
         if (id < 0) {
             throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
         }
-        //System.out.println("getIdFromValue  value:"+bytesConvert.convertFromBytes(value,offset,len)+" id:"+id);
         return id;
     }
 
-    //id --> value
-    private boolean printstr = false;
-
     @Override
     protected T getValueFromIdImpl(int id) throws IllegalArgumentException {
-        //System.out.println("here");
         byte[] data = getValueBytesFromIdImpl(id);
         if (data != null) {
-            if (!printstr) {
-                System.out.println("getValueFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(data, 0, data.length));
-                printstr = true;
-            }
             return bytesConvert.convertFromBytes(data, 0, data.length);
         } else {
             return null;
         }
     }
 
-    private boolean isPrintstr2 = false;
-
     @Override
     protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset)
             throws IllegalArgumentException {
@@ -199,14 +185,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
         int index = findIndexById(id);
         int treeInnerOffset = getTreeInnerOffset(id, index);
         TrieDictionary<T> tree = trees.get(index);
-        //getValueIndexTime2.addAndGet(System.currentTimeMillis() - startTime);
-        //startTime = System.currentTimeMillis();
         int size = tree.getValueBytesFromIdImpl(treeInnerOffset, returnValue, offset);
-        if (!isPrintstr2) {
-            isPrintstr2 = true;
-            System.out.println("getValueBytesFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(returnValue, offset, size));
-        }
-        //getValueTime2.addAndGet(System.currentTimeMillis() - startTime);
         return size;
     }
 
@@ -250,19 +229,10 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     @Override
     public void write(DataOutput out) throws IOException {
-        System.out.println("write dict");
         writeHead(out);
         writeBody(out);
     }
 
-    /*private int compare(T value1,T value2){
-        byte[] b1 = bytesConvert.convertToBytes(value1);
-        byte[] b2 = bytesConvert.convertToBytes(value2);
-        ByteArray ba1 = new ByteArray(b1,0,b1.length);
-        ByteArray ba2 = new ByteArray(b2,0,b2.length);
-        return ba1.compareTo(ba2);
-    }*/
-
     private void writeHead(DataOutput out) throws IOException {
         ByteArrayOutputStream byteBuf = new ByteArrayOutputStream();
         DataOutputStream headOut = new DataOutputStream(byteBuf);
@@ -299,8 +269,8 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
 
     @Override
     public void readFields(DataInput in) throws IOException {
-        System.out.println("read dict");
         try {
+            @SuppressWarnings("unused")
             int headSize = in.readInt();
             this.baseId = in.readInt();
             String converterName = in.readUTF();
@@ -371,15 +341,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
         return Collections.unmodifiableList(this.trees);
     }
 
-    private boolean onlyOneTree() {
-        return trees.size() == 1;
-    }
-
-    private int findIndexByValue(T value) {
-        byte[] valueBytes = bytesConvert.convertToBytes(value);
-        return findIndexByValue(new ByteArray(valueBytes, 0, valueBytes.length));
-    }
-
     private int findIndexByValue(ByteArray value) {
         int index = lowerBound(value, new Comparator<ByteArray>() {
             @Override
@@ -421,29 +382,13 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
                 found = true;
         }
         if (found) {
-            //System.out.println("look for:"+lookfor+" index:"+mid);
             return mid;
         } else {
-            //System.out.println("look for:"+lookfor+" index:"+Math.max(left,right));
             return Math.min(left, right);  //value may be bigger than the right tree
         }
     }
 
     public static void main(String[] args) {
-        /*ArrayList<Integer> list = new ArrayList<>();
-        list.add(3);
-        list.add(10);
-        list.add(15);
-        Comparator<Integer> comp = new Comparator<Integer>() {
-            @Override
-            public int compare(Integer o1, Integer o2) {
-                return o1.compareTo(o2);
-            }
-        };
-        int[] nums = {-1,0,1,2,3,4,13,15,16};
-        for(int i : nums){
-            System.out.println("found value:"+i+" index:"+lowerBound(i,comp,list));
-        }*/
         ArrayList<String> list = new ArrayList<>();
         list.add("\u4e00");
         list.add("\u4e8c");
@@ -464,7 +409,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
                 }
             }, list));
         }
-        //System.out.println(BytesUtil.safeCompareBytes("\u4e8c".getBytes(),"\u4e09".getBytes()));
     }
 
     public BytesConverter<T> getBytesConvert() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 5e2c346..1ceac27 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -61,15 +61,7 @@ public class TrieDictionaryForestBuilder<T> {
     }
 
     public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId) {
-        this.bytesConverter = bytesConverter;
-        this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter);
-        this.baseId = baseId;
-        curOffset = 0;
-        int maxTrieTreeSizeMB = getMaxTrieSizeInMB();
-        this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024;
-        logger.info("maxTrieSize is set to:" + maxTrieTreeSize + "B");
-        //System.out.println("max trie size:"+maxTrieTreeSize);
-        //stringComparator = new ByteComparator<>(new StringBytesConverter());
+        this(bytesConverter, baseId, getMaxTrieSizeInMB());
     }
 
     public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieTreeSizeMB) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
index a9c4980..ea6358d 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
@@ -16,7 +16,7 @@
  * limitations under the License.
 */
 
-/*package org.apache.kylin.dict;
+package org.apache.kylin.dict;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -35,6 +35,7 @@ import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.Lists;
@@ -42,7 +43,7 @@ import com.google.common.collect.Sets;
 
 /**
  */
-/*public class NumberDictionaryTest extends LocalFileMetadataTestCase {
+public class NumberDictionaryTest extends LocalFileMetadataTestCase {
 
     NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT);
     Random rand = new Random();
@@ -70,6 +71,7 @@ import com.google.common.collect.Sets;
         assertEquals(1, maxId);
     }
 
+    @Ignore
     @SuppressWarnings("unchecked")
     @Test
     public void testEmptyInput() throws IOException {
@@ -207,4 +209,4 @@ import com.google.common.collect.Sets;
         return buf.toString();
     }
 
-}*/
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
deleted file mode 100644
index 6af35d2..0000000
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.engine.mr;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile.Reader;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.common.util.StringSplitter;
-import org.apache.kylin.source.ReadableTable.TableReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.Closeable;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * only use for reading output file of FactDistinctColumnsJob2
- */
-public class DFSFileTableSortedReader implements TableReader {
-
-    private static final Logger logger = LoggerFactory.getLogger(DFSFileTableSortedReader.class);
-    private static final char CSV_QUOTE = '"';
-    private static final String[] DETECT_DELIMS = new String[] { "\177", "|", "\t", "," };
-
-    private String filePath;
-    private String delim;
-    private List<RowReader> readerList;
-
-    private String curLine;
-    private String[] curColumns;
-    private int expectedColumnNumber = -1; // helps delimiter detection
-
-    public DFSFileTableSortedReader(String filePath, int expectedColumnNumber) throws IOException {
-        this(filePath, DFSFileTable.DELIM_AUTO, expectedColumnNumber);
-    }
-
-    public DFSFileTableSortedReader(String filePath, String delim, int expectedColumnNumber) throws IOException {
-        filePath = HadoopUtil.fixWindowsPath(filePath);
-        this.filePath = filePath;
-        this.delim = delim;
-        this.expectedColumnNumber = expectedColumnNumber;
-        this.readerList = new ArrayList<RowReader>();
-
-        FileSystem fs = HadoopUtil.getFileSystem(filePath);
-
-        ArrayList<FileStatus> allFiles = new ArrayList<>();
-        FileStatus status = fs.getFileStatus(new Path(filePath));
-        if (status.isFile()) {
-            allFiles.add(status);
-        } else {
-            FileStatus[] listStatus = fs.listStatus(new Path(filePath));
-            allFiles.addAll(Arrays.asList(listStatus));
-        }
-
-        try {
-            for (FileStatus f : allFiles) {
-                RowReader rowReader = new SeqRowReader(HadoopUtil.getCurrentConfiguration(), fs, f.getPath().toString());
-                this.readerList.add(rowReader);
-            }
-        } catch (IOException e) {
-            if (isExceptionSayingNotSeqFile(e) == false)
-                throw e;
-
-            this.readerList = new ArrayList<RowReader>();
-            for (FileStatus f : allFiles) {
-                RowReader rowReader = new CsvRowReader(fs, f.getPath().toString());
-                this.readerList.add(rowReader);
-            }
-        }
-    }
-
-    private boolean isExceptionSayingNotSeqFile(IOException e) {
-        if (e.getMessage() != null && e.getMessage().contains("not a SequenceFile"))
-            return true;
-
-        if (e instanceof EOFException) // in case the file is very very small
-            return true;
-
-        return false;
-    }
-
-    @Override
-    public boolean next() throws IOException {
-        int curReaderIndex = -1;
-        RowReader curReader;
-
-        while (++curReaderIndex < readerList.size()) {
-            curReader = readerList.get(curReaderIndex);
-            curLine = curReader.nextLine();
-            curColumns = null;
-
-            if (curLine != null) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    public String getLine() {
-        return curLine;
-    }
-
-    @Override
-    public String[] getRow() {
-        if (curColumns == null) {
-            if (DFSFileTable.DELIM_AUTO.equals(delim))
-                delim = autoDetectDelim(curLine);
-
-            if (delim == null)
-                curColumns = new String[] { curLine };
-            else
-                curColumns = split(curLine, delim);
-        }
-        return curColumns;
-    }
-
-    private String[] split(String line, String delim) {
-        // FIXME CVS line should be parsed considering escapes
-        String[] str = StringSplitter.split(line, delim);
-
-        // un-escape CSV
-        if (DFSFileTable.DELIM_COMMA.equals(delim)) {
-            for (int i = 0; i < str.length; i++) {
-                str[i] = unescapeCsv(str[i]);
-            }
-        }
-
-        return str;
-    }
-
-    private String unescapeCsv(String str) {
-        if (str == null || str.length() < 2)
-            return str;
-
-        str = StringEscapeUtils.unescapeCsv(str);
-
-        // unescapeCsv may not remove the outer most quotes
-        if (str.charAt(0) == CSV_QUOTE && str.charAt(str.length() - 1) == CSV_QUOTE)
-            str = str.substring(1, str.length() - 1);
-
-        return str;
-    }
-
-    @Override
-    public void close() {
-        for (RowReader reader : readerList) {
-            IOUtils.closeQuietly(reader);
-        }
-    }
-
-    private String autoDetectDelim(String line) {
-        if (expectedColumnNumber > 0) {
-            for (String delim : DETECT_DELIMS) {
-                if (StringSplitter.split(line, delim).length == expectedColumnNumber) {
-                    logger.info("Auto detect delim to be '" + delim + "', split line to " + expectedColumnNumber + " columns -- " + line);
-                    return delim;
-                }
-            }
-        }
-
-        logger.info("Auto detect delim to be null, will take THE-WHOLE-LINE as a single value, for " + filePath);
-        return null;
-    }
-
-    // ============================================================================
-
-    private interface RowReader extends Closeable {
-        String nextLine() throws IOException; // return null on EOF
-    }
-
-    private class SeqRowReader implements RowReader {
-        Reader reader;
-        Writable key;
-        Text value;
-
-        SeqRowReader(Configuration hconf, FileSystem fs, String path) throws IOException {
-            reader = new Reader(hconf, Reader.file(new Path(path)));
-            key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
-            value = new Text();
-        }
-
-        @Override
-        public String nextLine() throws IOException {
-            boolean hasNext = reader.next(key, value);
-            if (hasNext)
-                return Bytes.toString(value.getBytes(), 0, value.getLength());
-            else
-                return null;
-        }
-
-        @Override
-        public void close() throws IOException {
-            reader.close();
-        }
-    }
-
-    private class CsvRowReader implements RowReader {
-        BufferedReader reader;
-
-        CsvRowReader(FileSystem fs, String path) throws IOException {
-            FSDataInputStream in = fs.open(new Path(path));
-            reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
-        }
-
-        @Override
-        public String nextLine() throws IOException {
-            return reader.readLine();
-        }
-
-        @Override
-        public void close() throws IOException {
-            reader.close();
-        }
-
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index 554ee9c..66946b7 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -1,16 +1,7 @@
 package org.apache.kylin.engine.mr.steps;
 
-import org.apache.hadoop.io.Text;
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.dict.NumberDictionary;
-import org.apache.kylin.dict.NumberDictionaryBuilder;
-import org.apache.kylin.dict.NumberDictionaryForest;
-import org.apache.kylin.dict.NumberDictionaryForestBuilder;
-import org.apache.kylin.dict.StringBytesConverter;
-import org.apache.kylin.dict.TrieDictionaryForestBuilder;
-import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey;
-import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -25,8 +16,16 @@ import java.util.List;
 import java.util.Random;
 import java.util.UUID;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.dict.NumberDictionary;
+import org.apache.kylin.dict.NumberDictionaryBuilder;
+import org.apache.kylin.dict.NumberDictionaryForest;
+import org.apache.kylin.dict.NumberDictionaryForestBuilder;
+import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey;
+import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag;
+import org.junit.Test;
 
 /**
  * Created by xiefan on 16-11-2.

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/examples/test_case_data/sandbox/kylin.properties
----------------------------------------------------------------------
diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties
index 93b86c9..de1250f 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -63,7 +63,7 @@ kylin.job.retry=0
 # you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password
 # It should not be set to "true" unless you're NOT running Kylin.sh on a hadoop client machine
 # (Thus kylin instance has to ssh to another real hadoop client machine to execute hbase,hive,hadoop commands)
-kylin.job.run.as.remote.cmd=true
+kylin.job.run.as.remote.cmd=false
 
 # Only necessary when kylin.job.run.as.remote.cmd=true
 kylin.job.remote.cli.hostname=sandbox
@@ -112,10 +112,12 @@ kylin.job.uhc.reducer.count=1
 
 ### CUBE ###
 
+# dictionary forest cut
+kylin.dictionary.forest.trie.size.max_mb=500
+
 # 'auto', 'inmem', 'layer' or 'random' for testing
 kylin.cube.algorithm=random
 
-
 # Enable/disable ACL check for cube query
 kylin.query.security.enabled=true
 
@@ -161,6 +163,3 @@ kylin.query.metrics.percentiles.intervals=60, 360, 3600
 
 # Env DEV|QA|PROD
 deploy.env=DEV
-
-#default 500MB
-kylin.dictionary.forest.trie.size.max_mb=500
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
index e1303e4..52461c4 100644
--- a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
+++ b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java
@@ -491,10 +491,7 @@ public class KylinTestBase {
             ITable h2Table = executeQuery(h2Conn, queryName, sql, needSort);
 
             try {
-                //compare before junit
                 // compare the result
-                System.out.println("h2 Table rows count:"+h2Table.getRowCount());
-                System.out.println("kylin Table rows count:"+kylinTable.getRowCount());
                 Assertion.assertEquals(h2Table, kylinTable);
             } catch (Throwable t) {
                 printInfo("execAndCompQuery failed on: " + sqlFile.getAbsolutePath());