You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2015/12/15 09:46:29 UTC

[6/6] kylin git commit: KYLIN-976 Support Custom Aggregation Types

KYLIN-976 Support Custom Aggregation Types


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/c721d679
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/c721d679
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/c721d679

Branch: refs/heads/1.x-staging
Commit: c721d679727b0345a7fcdd8335a088e7c336aacc
Parents: cdea731
Author: honma <ho...@ebay.com>
Authored: Fri Dec 11 16:08:59 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Tue Dec 15 16:50:58 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/KylinConfig.java    |  40 +-
 .../common/hll/HyperLogLogPlusCounter.java      |  87 +++-
 .../org/apache/kylin/common/util/BytesUtil.java |  16 +
 .../apache/kylin/common/util/Dictionary.java    | 199 +++++++++
 .../kylin/cube/CubeCapabilityChecker.java       | 147 ++++---
 .../org/apache/kylin/cube/CubeInstance.java     |  20 +-
 .../java/org/apache/kylin/cube/CubeManager.java |   6 +-
 .../java/org/apache/kylin/cube/CubeSegment.java |  14 +-
 .../cube/estimation/CubeSizeEstimationCLI.java  |   4 +-
 .../kylin/cube/kv/AbstractRowKeyEncoder.java    |   2 +-
 .../apache/kylin/cube/kv/RowKeyColumnIO.java    |   2 +-
 .../apache/kylin/cube/kv/RowKeyColumnOrder.java |   2 +-
 .../apache/kylin/cube/kv/RowValueDecoder.java   |  27 +-
 .../org/apache/kylin/cube/model/CubeDesc.java   |  61 ++-
 .../apache/kylin/cube/model/v1/CubeDesc.java    |   9 -
 .../model/validation/rule/FunctionRule.java     |  33 +-
 .../kylin/cube/DictionaryManagerTest.java       |   2 +-
 .../kylin/cube/kv/RowValueDecoderTest.java      |   6 +-
 .../metadata/measure/MeasureCodecTest.java      |   3 +-
 cube/src/test/resources/data/TEST1_desc.json    |   7 +-
 cube/src/test/resources/data/TEST2_desc.json    |   7 +-
 cube/src/test/resources/data/TEST3_desc.json    | 418 +++++++++++--------
 .../apache/kylin/dict/DateStrDictionary.java    |   1 +
 .../java/org/apache/kylin/dict/Dictionary.java  | 199 ---------
 .../apache/kylin/dict/DictionaryGenerator.java  |  91 ++--
 .../org/apache/kylin/dict/DictionaryInfo.java   |   1 +
 .../kylin/dict/DictionaryInfoSerializer.java    |   1 +
 .../apache/kylin/dict/DictionaryManager.java    |   1 +
 .../java/org/apache/kylin/dict/ISegment.java    |   3 +-
 .../dict/MultipleDictionaryValueEnumerator.java |   1 +
 .../org/apache/kylin/dict/TrieDictionary.java   |   1 +
 .../apache/kylin/dict/lookup/SnapshotTable.java |   2 +-
 .../test_kylin_cube_without_slr_desc.json       |   7 +-
 ...t_kylin_cube_without_slr_left_join_desc.json |   7 +-
 .../invertedindex/IICapabilityChecker.java      | 182 ++++++++
 .../apache/kylin/invertedindex/IIInstance.java  |  16 +-
 .../apache/kylin/invertedindex/IIManager.java   |   2 +-
 .../apache/kylin/invertedindex/IISegment.java   |   4 +-
 .../invertedindex/index/BitMapContainer.java    |   2 +-
 .../index/CompressedValueContainer.java         |   2 +-
 .../invertedindex/index/RawTableRecord.java     |   2 +-
 .../invertedindex/index/TableRecordInfo.java    |   2 +-
 .../index/TableRecordInfoDigest.java            |   2 +-
 .../kylin/invertedindex/model/IIDesc.java       |   4 +
 .../invertedindex/IIInstanceTest.java           |   2 +-
 .../invertedindex/InvertedIndexLocalTest.java   |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java |  76 ++--
 .../kylin/job/hadoop/cube/CubeHFileMapper.java  |   4 +-
 .../apache/kylin/job/hadoop/cube/CuboidJob.java |   5 +-
 .../kylin/job/hadoop/cube/CuboidReducer.java    |   6 +-
 .../job/hadoop/cube/MergeCuboidMapper.java      |  57 ++-
 .../job/hadoop/cube/NewBaseCuboidMapper.java    | 346 ---------------
 .../kylin/job/dataGen/FactTableGenerator.java   |   2 +-
 .../job/hadoop/cube/BaseCuboidMapperTest.java   |   7 +-
 .../job/hadoop/cube/CubeHFileMapper2Test.java   |   2 +-
 .../kylin/job/hadoop/cube/CubeReducerTest.java  |   2 +-
 .../job/hadoop/cube/MergeCuboidMapperTest.java  |   2 +-
 .../job/hadoop/cube/NDCuboidMapperTest.java     |  17 +-
 .../apache/kylin/measure/MeasureAggregator.java |  78 ++++
 .../kylin/measure/MeasureAggregators.java       |  81 ++++
 .../org/apache/kylin/measure/MeasureCodec.java  |  79 ++++
 .../apache/kylin/measure/MeasureIngester.java   |  48 +++
 .../org/apache/kylin/measure/MeasureType.java   | 146 +++++++
 .../kylin/measure/MeasureTypeFactory.java       |  93 +++++
 .../kylin/measure/basic/BasicMeasureType.java   | 152 +++++++
 .../kylin/measure/basic/BigDecimalIngester.java |  41 ++
 .../measure/basic/BigDecimalMaxAggregator.java  |  56 +++
 .../measure/basic/BigDecimalMinAggregator.java  |  57 +++
 .../measure/basic/BigDecimalSumAggregator.java  |  53 +++
 .../kylin/measure/basic/DoubleIngester.java     |  46 ++
 .../measure/basic/DoubleMaxAggregator.java      |  55 +++
 .../measure/basic/DoubleMinAggregator.java      |  55 +++
 .../measure/basic/DoubleSumAggregator.java      |  52 +++
 .../kylin/measure/basic/LongIngester.java       |  46 ++
 .../kylin/measure/basic/LongMaxAggregator.java  |  55 +++
 .../kylin/measure/basic/LongMinAggregator.java  |  55 +++
 .../kylin/measure/basic/LongSumAggregator.java  |  52 +++
 .../kylin/measure/hllc/HLLCAggregator.java      |  64 +++
 .../kylin/measure/hllc/HLLCMeasureType.java     | 127 ++++++
 .../kylin/measure/hllc/HLLCSerializer.java      |  88 ++++
 .../measure/hllc/HLLDistinctCountAggFunc.java   | 152 +++++++
 .../metadata/datatype/BigDecimalSerializer.java | 105 +++++
 .../kylin/metadata/datatype/DataType.java       | 285 +++++++++++++
 .../metadata/datatype/DataTypeSerializer.java   |  90 ++++
 .../metadata/datatype/DateTimeSerializer.java   |  57 +++
 .../metadata/datatype/DoubleSerializer.java     |  75 ++++
 .../kylin/metadata/datatype/LongSerializer.java |  82 ++++
 .../metadata/datatype/StringSerializer.java     |  52 +++
 .../measure/BigDecimalMaxAggregator.java        |  53 ---
 .../measure/BigDecimalMinAggregator.java        |  54 ---
 .../metadata/measure/BigDecimalSerializer.java  |  62 ---
 .../measure/BigDecimalSumAggregator.java        |  50 ---
 .../metadata/measure/DoubleMaxAggregator.java   |  54 ---
 .../metadata/measure/DoubleMinAggregator.java   |  54 ---
 .../metadata/measure/DoubleSerializer.java      |  55 ---
 .../metadata/measure/DoubleSumAggregator.java   |  51 ---
 .../kylin/metadata/measure/HLLCAggregator.java  |  57 ---
 .../kylin/metadata/measure/HLLCSerializer.java  |  67 ---
 .../kylin/metadata/measure/LDCAggregator.java   |  64 ---
 .../metadata/measure/LongMaxAggregator.java     |  54 ---
 .../metadata/measure/LongMinAggregator.java     |  54 ---
 .../kylin/metadata/measure/LongSerializer.java  |  56 ---
 .../metadata/measure/LongSumAggregator.java     |  51 ---
 .../metadata/measure/MeasureAggregator.java     | 102 -----
 .../metadata/measure/MeasureAggregators.java    |  82 ----
 .../kylin/metadata/measure/MeasureCodec.java    |  83 ----
 .../metadata/measure/MeasureSerializer.java     |  68 ---
 .../measure/fixedlen/FixedHLLCodec.java         |   2 +-
 .../measure/fixedlen/FixedLenMeasureCodec.java  |   5 +-
 .../measure/fixedlen/FixedPointLongCodec.java   |   2 +-
 .../apache/kylin/metadata/model/ColumnDesc.java |   1 +
 .../apache/kylin/metadata/model/DataType.java   | 290 -------------
 .../kylin/metadata/model/FunctionDesc.java      |  53 ++-
 .../kylin/metadata/model/MeasureDesc.java       |   4 -
 .../kylin/metadata/model/ParameterDesc.java     |  59 ++-
 .../apache/kylin/metadata/model/TblColRef.java  |   1 +
 .../metadata/realization/CapabilityResult.java  |  63 +++
 .../metadata/realization/IRealization.java      |  15 +-
 .../metadata/realization/SQLDigestUtil.java     |   2 +-
 .../org/apache/kylin/metadata/tuple/Tuple.java  | 164 ++++++++
 .../apache/kylin/metadata/tuple/TupleInfo.java  | 113 +++++
 .../datatype/BigDecimalSerializerTest.java      |  69 +++
 .../query/enumerator/LookupTableEnumerator.java |   2 +-
 .../kylin/query/relnode/OLAPAggregateRel.java   |  14 +-
 .../apache/kylin/query/routing/Candidate.java   | 100 +++++
 .../apache/kylin/query/routing/QueryRouter.java |  63 ++-
 .../apache/kylin/query/routing/RoutingRule.java |  34 +-
 .../AdjustForWeeklyMatchedRealization.java      | 101 -----
 .../routing/RoutingRules/CubesSortRule.java     |  67 ---
 .../RoutingRules/RealizationPriorityRule.java   |  60 ---
 .../RoutingRules/RealizationSortRule.java       |  65 ---
 .../RemoveUncapableRealizationsRule.java        |  42 --
 .../SimpleQueryMoreColumnsCubeFirstRule.java    |  50 ---
 .../routing/rules/RealizationSortRule.java      |  34 ++
 .../rules/RemoveUncapableRealizationsRule.java  |  44 ++
 .../apache/kylin/query/schema/OLAPTable.java    |  12 +-
 .../query/sqlfunc/HLLDistinctCountAggFunc.java  | 152 -------
 .../apache/kylin/query/test/IIQueryTest.java    |  18 +-
 .../apache/kylin/query/test/KylinQueryTest.java |   2 +-
 .../apache/kylin/query/test/KylinTestBase.java  |   4 +-
 .../apache/kylin/rest/service/JobService.java   |  10 +-
 .../storage/filter/BitMapFilterEvaluator.java   |   2 +-
 .../kylin/storage/hbase/ColumnValueRange.java   |   2 +-
 .../storage/hbase/CubeSegmentTupleIterator.java | 211 +++++++---
 .../kylin/storage/hbase/CubeStorageEngine.java  |  24 +-
 .../kylin/storage/hbase/CubeTupleConverter.java | 139 ++++++
 .../hbase/SerializedHBaseTupleIterator.java     |   2 +-
 .../hbase/coprocessor/AggregationCache.java     |   7 +-
 .../hbase/coprocessor/CoprocessorFilter.java    |   2 +-
 .../CoprocessorTupleFilterTranslator.java       |   2 +-
 .../endpoint/EndpointAggregationCache.java      |   2 +-
 .../endpoint/EndpointAggregators.java           |  11 +-
 .../endpoint/EndpointTupleIterator.java         |   9 +-
 .../hbase/coprocessor/endpoint/IIEndpoint.java  |   2 +-
 .../observer/AggregationScanner.java            |   2 +-
 .../observer/ObserverAggregationCache.java      |   2 +-
 .../observer/ObserverAggregators.java           |  24 +-
 .../coprocessor/observer/ObserverEnabler.java   |   2 +-
 .../coprocessor/observer/ObserverTuple.java     |   2 +-
 .../kylin/storage/hybrid/HybridInstance.java    |  31 +-
 .../storage/hybrid/HybridStorageEngine.java     |   2 +-
 .../org/apache/kylin/storage/tuple/Tuple.java   | 252 -----------
 .../apache/kylin/storage/tuple/TupleInfo.java   | 120 ------
 .../filter/BitMapFilterEvaluatorTest.java       |   2 +-
 .../kylin/storage/filter/FilterBaseTest.java    |   4 +-
 .../storage/filter/FilterEvaluateTest.java      |   2 +-
 .../kylin/storage/filter/FilterPerfTest.java    |   4 +-
 .../storage/hbase/ColumnValueRangeTest.java     |   2 +-
 .../endpoint/EndpointAggregationTest.java       |   2 +-
 169 files changed, 4764 insertions(+), 3644 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/KylinConfig.java b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
index 7816487..8a123dc 100644
--- a/common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -45,7 +45,7 @@ import com.google.common.collect.Sets;
  * @author yangli9
  */
 public class KylinConfig {
-    
+
     /*
      * NO NEED TO DEFINE PUBLIC CONSTANTS FOR KEY NAMES!
      * 
@@ -272,7 +272,7 @@ public class KylinConfig {
     // ============================================================================
 
     public boolean isHiveReroutingEnabled() {
-	return Boolean.parseBoolean(getOptional("kylin.route.hive.enabled", "false")); 
+        return Boolean.parseBoolean(getOptional("kylin.route.hive.enabled", "false"));
     }
 
     public String getHiveRerouteUrl() {
@@ -336,7 +336,7 @@ public class KylinConfig {
         }
         return getFileName(kylinHome + File.separator + "lib", JOB_JAR_NAME_PATTERN);
     }
-    
+
     public String getKylinJobMRLibDir() {
         return getOptional("kylin.job.mr.lib.dir", "");
     }
@@ -499,7 +499,7 @@ public class KylinConfig {
     public int getHBaseKeyValueSize() {
         return Integer.parseInt(this.getOptional("kylin.hbase.client.keyvalue.maxsize", "10485760"));
     }
-    
+
     public int getHBaseScanCacheRows() {
         return Integer.parseInt(this.getOptional("kylin.hbase.scan.cache_rows", "1024"));
     }
@@ -507,7 +507,11 @@ public class KylinConfig {
     public int getHBaseScanMaxResultSize() {
         return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
     }
-    
+
+    public boolean isQueryIgnoreUnknownFunction() {
+        return Boolean.parseBoolean(this.getOptional("kylin.query.ignore_unknown_function", "false"));
+    }
+
     public String getHbaseDefaultCompressionCodec() {
         return getOptional(HTABLE_DEFAULT_COMPRESSION_CODEC, "");
 
@@ -704,26 +708,26 @@ public class KylinConfig {
     public int getHBaseRegionCountMax() {
         return Integer.parseInt(getOptional(HBASE_REGION_COUNT_MAX, "500"));
     }
-    
+
     public int getHBaseRegionCut(String capacity) {
         String cut;
         switch (capacity) {
-            case "SMALL":
-                cut = getProperty(HBASE_REGION_CUT_SMALL, "10");
-                break;
-            case "MEDIUM":
-                cut = getProperty(HBASE_REGION_CUT_MEDIUM, "20");
-                break;
-            case "LARGE":
-                cut = getProperty(HBASE_REGION_CUT_LARGE, "100");
-                break;
-            default:
-                throw new IllegalArgumentException("Capacity not recognized: " + capacity);
+        case "SMALL":
+            cut = getProperty(HBASE_REGION_CUT_SMALL, "10");
+            break;
+        case "MEDIUM":
+            cut = getProperty(HBASE_REGION_CUT_MEDIUM, "20");
+            break;
+        case "LARGE":
+            cut = getProperty(HBASE_REGION_CUT_LARGE, "100");
+            break;
+        default:
+            throw new IllegalArgumentException("Capacity not recognized: " + capacity);
         }
 
         return Integer.valueOf(cut);
     }
-    
+
     public String getProperty(String key, String defaultValue) {
         return kylinConfig.getString(key, defaultValue);
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
index 5ee323e..d37d6db 100644
--- a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
+++ b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
@@ -6,19 +6,20 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
+ * 
  *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- */
+*/
 
 package org.apache.kylin.common.hll;
 
 import java.io.IOException;
+import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.Arrays;
@@ -30,13 +31,14 @@ import com.google.common.hash.Hashing;
 
 /**
  * About compression, test on HLLC data shows
- * 
+ *
  * - LZF compression ratio is around 65%-80%, fast
  * - GZIP compression ratio is around 41%-46%, very slow
- * 
+ *
  * @author yangli9
  */
-public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter> {
+@SuppressWarnings("serial")
+public class HyperLogLogPlusCounter implements Serializable, Comparable<HyperLogLogPlusCounter> {
 
     private final int p;
     private final int m;
@@ -108,10 +110,6 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
         return new HLLCSnapshot(this).getCountEstimate();
     }
 
-    public int getMemBytes() {
-        return 12 + m;
-    }
-
     public double getErrorRate() {
         return 1.04 / Math.sqrt(m);
     }
@@ -125,6 +123,11 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
         return size;
     }
 
+    @Override
+    public String toString() {
+        return "" + getCountEstimate();
+    }
+
     // ============================================================================
 
     // a memory efficient snapshot of HLL registers which can yield count
@@ -176,7 +179,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
 
         // decide output scheme -- map (3*size bytes) or array (2^p bytes)
         byte scheme;
-        if ((indexLen + 1) * size < m)
+        if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
             scheme = 0; // map
         else
             scheme = 1; // array
@@ -186,15 +189,14 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
             BytesUtil.writeVInt(size, out);
             for (int i = 0; i < m; i++) {
                 if (registers[i] > 0) {
-                    BytesUtil.writeUnsigned(i, indexLen, out);
+                    writeUnsigned(i, indexLen, out);
                     out.put(registers[i]);
                 }
             }
-        } else { // array scheme
-            for (int i = 0; i < m; i++) {
-                out.put(registers[i]);
-            }
-        }
+        } else if (scheme == 1) { // array scheme
+            out.put(registers);
+        } else
+            throw new IllegalStateException();
     }
 
     public void readRegisters(ByteBuffer in) throws IOException {
@@ -207,12 +209,34 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
                 throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
             int indexLen = getRegisterIndexSize();
             for (int i = 0; i < size; i++) {
-                int key = BytesUtil.readUnsigned(in, indexLen);
+                int key = readUnsigned(in, indexLen);
                 registers[key] = in.get();
             }
-        } else { // array scheme
+        } else if (scheme == 1) { // array scheme
             in.get(registers);
+        } else
+            throw new IllegalStateException();
+    }
+
+    public int peekLength(ByteBuffer in) {
+        int mark = in.position();
+        int len;
+
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            int size = BytesUtil.readVInt(in);
+            int indexLen = getRegisterIndexSize();
+            len = in.position() - mark + (indexLen + 1) * size;
+        } else {
+            len = in.position() - mark + m;
         }
+
+        in.position(mark);
+        return len;
+    }
+
+    public int maxLength() {
+        return 1 + m;
     }
 
     public void writeRegistersArray(final ByteBuffer out) {
@@ -288,4 +312,29 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
             System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
         }
     }
+
+    /**
+     *
+     * @param num
+     * @param size
+     * @param out
+     */
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
index 54e5871..03da261 100644
--- a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
+++ b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
@@ -230,6 +230,8 @@ public class BytesUtil {
         return bytes == null ? null : Bytes.toString(bytes);
     }
 
+    
+    
     public static void writeAsciiString(String str, ByteBuffer out) {
         if (str == null) {
             BytesUtil.writeVInt(-1, out);
@@ -320,6 +322,20 @@ public class BytesUtil {
         return array;
     }
 
+
+    public static int peekByteArrayLength(ByteBuffer in) {
+        int start = in.position();
+        int arrayLen = readVInt(in);
+        int sizeLen = in.position() - start;
+        in.position(start);
+
+        if (arrayLen < 0)
+            return sizeLen;
+        else
+            return sizeLen + arrayLen;
+    }
+
+    
     public static void writeBooleanArray(boolean[] array, ByteBuffer out) {
         if (array == null) {
             writeVInt(-1, out);

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
new file mode 100644
index 0000000..0168609
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.common.util;
+
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.kylin.common.util.BytesUtil;
+
+/**
+ * A bi-way dictionary that maps from dimension/column values to IDs and vice
+ * versa. By storing IDs instead of real values, the size of cube is
+ * significantly reduced.
+ * 
+ * - IDs are smallest integers possible for the cardinality of a column, for the
+ * purpose of minimal storage space - IDs preserve ordering of values, such that
+ * range query can be applied to IDs directly
+ * 
+ * A dictionary once built, is immutable. This allows optimal memory footprint
+ * by e.g. flatten the Trie structure into a byte array, replacing node pointers
+ * with array offsets.
+ * 
+ * @author yangli9
+ */
+abstract public class Dictionary<T> implements Writable {
+
+    public static final byte NULL = (byte) 0xff;
+
+    // ID with all bit-1 (0xff e.g.) reserved for NULL value
+    public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffff };
+
+    abstract public int getMinId();
+
+    abstract public int getMaxId();
+    
+    public int getSize() {
+        return getMaxId() - getMinId() + 1;
+    }
+
+    /**
+     * @return the size of an ID in bytes, determined by the cardinality of
+     *         column
+     */
+    abstract public int getSizeOfId();
+
+    /**
+     * @return the (maximum) size of value in bytes, determined by the longest
+     *         value of column
+     */
+    abstract public int getSizeOfValue();
+
+    /**
+     * Convenient form of <code>getIdFromValue(value, 0)</code>
+     */
+    final public int getIdFromValue(T value) throws IllegalArgumentException {
+        return getIdFromValue(value, 0);
+    }
+
+    /**
+     * Returns the ID integer of given value. In case of not found
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
+     * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
+     * 
+     * @throws IllegalArgumentException
+     *             if value is not found in dictionary and rounding is off or
+     *             failed
+     */
+    final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
+        if (isNullObjectForm(value))
+            return nullId();
+        else
+            return getIdFromValueImpl(value, roundingFlag);
+    }
+
+    protected boolean isNullObjectForm(T value) {
+        return value == null;
+    }
+
+    abstract protected int getIdFromValueImpl(T value, int roundingFlag);
+
+    /**
+     * @return the value corresponds to the given ID
+     * @throws IllegalArgumentException
+     *             if ID is not found in dictionary
+     */
+    final public T getValueFromId(int id) {
+        if (isNullId(id))
+            return null;
+        else
+            return getValueFromIdImpl(id);
+    }
+
+    abstract protected T getValueFromIdImpl(int id);
+
+    /**
+     * Convenient form of
+     * <code>getIdFromValueBytes(value, offset, len, 0)</code>
+     */
+    final public int getIdFromValueBytes(byte[] value, int offset, int len) {
+        return getIdFromValueBytes(value, offset, len, 0);
+    }
+
+    /**
+     * A lower level API, return ID integer from raw value bytes. In case of not found
+     * <p>
+     * - if roundingFlag=0, throw IllegalArgumentException; <br>
+     * - if roundingFlag<0, the closest smaller ID integer if exist; <br> 
+     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+     * <p>
+     * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
+     * 
+     * @throws IllegalArgumentException
+     *             if value is not found in dictionary and rounding is off or failed
+     */
+    final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
+        if (isNullByteForm(value, offset, len))
+            return nullId();
+        else
+            return getIdFromValueBytesImpl(value, offset, len, roundingFlag);
+    }
+
+    protected boolean isNullByteForm(byte[] value, int offset, int len) {
+        return value == null;
+    }
+
+    abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag);
+
+    /**
+     * A lower level API, get byte values from ID, return the number of bytes
+     * written. Bypassing the cache layer, this could be significantly slower
+     * than getIdFromValue(T value).
+     *
+     * @return size of value bytes, 0 if empty string, -1 if null
+     *
+     * @throws IllegalArgumentException
+     *             if ID is not found in dictionary
+     */
+    final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
+        if (isNullId(id))
+            return -1;
+        else
+            return getValueBytesFromIdImpl(id, returnValue, offset);
+    }
+
+    abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset);
+
+    abstract public void dump(PrintStream out);
+
+    public int nullId() {
+        return NULL_ID[getSizeOfId()];
+    }
+
+    public boolean isNullId(int id) {
+        int nullId = NULL_ID[getSizeOfId()];
+        return (nullId & id) == nullId;
+    }
+
+    /** utility that converts a dictionary ID to string, preserving order */
+    public static String dictIdToString(byte[] idBytes, int offset, int length) {
+        try {
+            return new String(idBytes, offset, length, "ISO-8859-1");
+        } catch (UnsupportedEncodingException e) {
+            // never happen
+            return null;
+        }
+    }
+
+    /** the reverse of dictIdToString(), returns integer ID */
+    public static int stringToDictId(String str) {
+        try {
+            byte[] bytes = str.getBytes("ISO-8859-1");
+            return BytesUtil.readUnsigned(bytes, 0, bytes.length);
+        } catch (UnsupportedEncodingException e) {
+            // never happen
+            return 0;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java b/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
index e713774..5668030 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
@@ -6,84 +6,113 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
+ * 
  *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- */
+*/
 
 package org.apache.kylin.cube;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.DimensionDesc;
+import org.apache.kylin.measure.MeasureType;
+import org.apache.kylin.measure.basic.BasicMeasureType;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.JoinDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.CapabilityResult;
+import org.apache.kylin.metadata.realization.CapabilityResult.CapabilityInfluence;
 import org.apache.kylin.metadata.realization.SQLDigest;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Sets;
+
 /**
- * Created by Hongbin Ma(Binmahone) on 1/8/15.
  */
 public class CubeCapabilityChecker {
     private static final Logger logger = LoggerFactory.getLogger(CubeCapabilityChecker.class);
 
-    public static boolean check(CubeInstance cube, SQLDigest digest, boolean allowWeekMatch) {
+    public static CapabilityResult check(CubeInstance cube, SQLDigest digest) {
+        CapabilityResult result = new CapabilityResult();
+        result.capable = false;
 
-        // retrieve members from olapContext
-        Collection<TblColRef> dimensionColumns = CubeDimensionDeriver.getDimensionColumns(digest.groupbyColumns, digest.filterColumns);
-        Collection<FunctionDesc> functions = digest.aggregations;
-        Collection<TblColRef> metricsColumns = digest.metricColumns;
-        Collection<JoinDesc> joins = digest.joinDescs;
+        // match joins
+        boolean isJoinMatch = isJoinMatch(digest.joinDescs, cube);
+        if (!isJoinMatch) {
+            logger.info("Exclude cube " + cube.getName() + " because unmatched joins");
+            return result;
+        }
 
-        // match dimensions & aggregations & joins
+        // dimensions & measures
+        Collection<TblColRef> dimensionColumns = getDimensionColumns(digest);
+        Collection<FunctionDesc> aggrFunctions = digest.aggregations;
+        Collection<TblColRef> unmatchedDimensions = unmatchedDimensions(dimensionColumns, cube);
+        Collection<FunctionDesc> unmatchedAggregations = unmatchedAggregations(aggrFunctions, cube);
 
-        boolean isOnline = cube.isReady();
+        // try custom measure types
+        if (!unmatchedDimensions.isEmpty() || !unmatchedAggregations.isEmpty()) {
+            tryCustomMeasureTypes(unmatchedDimensions, unmatchedAggregations, digest, cube, result);
+        }
 
-        boolean matchDimensions = isMatchedWithDimensions(dimensionColumns, cube);
-        boolean matchAggregation = isMatchedWithAggregations(functions, cube);
-        boolean matchJoin = isMatchedWithJoins(joins, cube);
+        // try dimension-as-measure
+        if (!unmatchedAggregations.isEmpty()) {
+            tryDimensionAsMeasures(unmatchedAggregations, digest, cube, result);
+        }
 
-        // Some cubes are not "perfectly" match, but still save them in case of usage
-        if (allowWeekMatch && isOnline && matchDimensions && !matchAggregation && matchJoin) {
-            // sometimes metrics are indeed dimensions
-            // e.g. select min(cal_dt) from ..., where cal_dt is actually a dimension
-            if (isWeaklyMatchedWithAggregations(functions, metricsColumns, cube)) {
-                logger.info("Weakly matched cube found " + cube.getName());
-                return true;
-            }
+        if (!unmatchedDimensions.isEmpty()) {
+            logger.info("Exclude cube " + cube.getName() + " because unmatched dimensions");
+            return result;
         }
 
-        if (!isOnline || !matchDimensions || !matchAggregation || !matchJoin) {
-            logger.info("Exclude cube " + cube.getName() + " because " + " isOnlne=" + isOnline + ",matchDimensions=" + matchDimensions + ",matchAggregation=" + matchAggregation + ",matchJoin=" + matchJoin);
-            return false;
+        if (!unmatchedAggregations.isEmpty()) {
+            logger.info("Exclude cube " + cube.getName() + " because unmatched aggregations");
+            return result;
         }
 
-        return true;
+        // cost will be minded by caller
+        result.capable = true;
+        return result;
     }
 
-    private static boolean isMatchedWithDimensions(Collection<TblColRef> dimensionColumns, CubeInstance cube) {
+    private static Collection<TblColRef> getDimensionColumns(SQLDigest sqlDigest) {
+        Collection<TblColRef> groupByColumns = sqlDigest.groupbyColumns;
+        Collection<TblColRef> filterColumns = sqlDigest.filterColumns;
+
+        Collection<TblColRef> dimensionColumns = new HashSet<TblColRef>();
+        dimensionColumns.addAll(groupByColumns);
+        dimensionColumns.addAll(filterColumns);
+        return dimensionColumns;
+    }
+
+    private static Set<TblColRef> unmatchedDimensions(Collection<TblColRef> dimensionColumns, CubeInstance cube) {
+        HashSet<TblColRef> result = Sets.newHashSet(dimensionColumns);
         CubeDesc cubeDesc = cube.getDescriptor();
-        boolean matchAgg = cubeDesc.listDimensionColumnsIncludingDerived().containsAll(dimensionColumns);
-        return matchAgg;
+        result.removeAll(cubeDesc.listDimensionColumnsIncludingDerived());
+        return result;
     }
 
-    private static boolean isMatchedWithAggregations(Collection<FunctionDesc> aggregations, CubeInstance cube) {
+    private static Set<FunctionDesc> unmatchedAggregations(Collection<FunctionDesc> aggregations, CubeInstance cube) {
+        HashSet<FunctionDesc> result = Sets.newHashSet(aggregations);
         CubeDesc cubeDesc = cube.getDescriptor();
-        boolean matchAgg = cubeDesc.listAllFunctions().containsAll(aggregations);
-        return matchAgg;
+        result.removeAll(cubeDesc.listAllFunctions());
+        return result;
     }
 
-    private static boolean isMatchedWithJoins(Collection<JoinDesc> joins, CubeInstance cube) {
+    private static boolean isJoinMatch(Collection<JoinDesc> joins, CubeInstance cube) {
         CubeDesc cubeDesc = cube.getDescriptor();
 
         List<JoinDesc> cubeJoins = new ArrayList<JoinDesc>(cubeDesc.getDimensions().size());
@@ -118,30 +147,50 @@ public class CubeCapabilityChecker {
         return true;
     }
 
-    private static boolean isWeaklyMatchedWithAggregations(Collection<FunctionDesc> aggregations, Collection<TblColRef> metricColumns, CubeInstance cube) {
+    private static void tryDimensionAsMeasures(Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube, CapabilityResult result) {
         CubeDesc cubeDesc = cube.getDescriptor();
         Collection<FunctionDesc> cubeFuncs = cubeDesc.listAllFunctions();
 
-        boolean matched = true;
-        for (FunctionDesc functionDesc : aggregations) {
-            if (cubeFuncs.contains(functionDesc))
+        Iterator<FunctionDesc> it = unmatchedAggregations.iterator();
+        while (it.hasNext()) {
+            FunctionDesc functionDesc = it.next();
+
+            if (cubeFuncs.contains(functionDesc)) {
+                it.remove();
                 continue;
+            }
 
-            // only inverted-index cube does not have count, and let calcite handle in this case
-            if (functionDesc.isCount())
+            // let calcite handle count
+            if (functionDesc.isCount()) {
+                it.remove();
                 continue;
+            }
 
-            if (functionDesc.isCountDistinct()) // calcite can not handle distinct count
-                matched = false;
+            // calcite can do aggregation from columns on-the-fly
+            List<TblColRef> neededCols = functionDesc.getParameter().getColRefs();
+            if (neededCols.size() > 0 && cubeDesc.listDimensionColumnsIncludingDerived().containsAll(neededCols)) {
+                result.influences.add(new CapabilityResult.DimensionAsMeasure(functionDesc));
+                it.remove();
+                continue;
+            }
+        }
+    }
 
-            TblColRef col = null;
-            if (functionDesc.getParameter().getColRefs().size() > 0)
-                col = functionDesc.getParameter().getColRefs().get(0);
+    // custom measure types can cover unmatched dimensions or measures
+    private static void tryCustomMeasureTypes(Collection<TblColRef> unmatchedDimensions, Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube, CapabilityResult result) {
+        CubeDesc cubeDesc = cube.getDescriptor();
+        for (MeasureDesc measure : cubeDesc.getMeasures()) {
+            if (unmatchedDimensions.isEmpty() && unmatchedAggregations.isEmpty())
+                break;
 
-            if (col == null || !cubeDesc.listDimensionColumnsIncludingDerived().contains(col)) {
-                matched = false;
-            }
+            MeasureType<?> measureType = measure.getFunction().getMeasureType();
+            if (measureType instanceof BasicMeasureType)
+                continue;
+
+            CapabilityInfluence inf = measureType.influenceCapabilityCheck(unmatchedDimensions, unmatchedAggregations, digest, measure);
+            if (inf != null)
+                result.influences.add(inf);
         }
-        return matched;
     }
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
index 81a64e8..7773351 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
@@ -28,6 +28,7 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.SegmentStatusEnum;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.CapabilityResult;
 import org.apache.kylin.metadata.realization.IRealization;
 import org.apache.kylin.metadata.realization.RealizationStatusEnum;
 import org.apache.kylin.metadata.realization.RealizationType;
@@ -321,12 +322,20 @@ public class CubeInstance extends RootPersistentEntity implements IRealization {
     }
 
     @Override
-    public boolean isCapable(SQLDigest digest) {
-        return CubeCapabilityChecker.check(this, digest, true);
+    public CapabilityResult isCapable(SQLDigest digest) {
+        CapabilityResult result = CubeCapabilityChecker.check(this, digest);
+        if (result.capable) {
+            result.cost = getCost(digest);
+            for (CapabilityResult.CapabilityInfluence i : result.influences) {
+                result.cost *= (i.suggestCostMultiplier() == 0) ? 1.0 : i.suggestCostMultiplier();
+            }
+        } else {
+            result.cost = -1;
+        }
+        return result;
     }
 
-    @Override
-    public int getCost(SQLDigest digest) {
+    private int getCost(SQLDigest digest) {
         return cost;
     }
 
@@ -353,9 +362,6 @@ public class CubeInstance extends RootPersistentEntity implements IRealization {
         if (!this.getDescriptor().getModel().getPartitionDesc().isPartitioned())
             return false;
 
-        if (this.getDescriptor().hasHolisticCountDistinctMeasures())
-            return false;
-
         return this.getDescriptor().getAutoMergeTimeRanges() != null && this.getDescriptor().getAutoMergeTimeRanges().length > 0;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
index b3eeeef..d3bbf59 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
@@ -36,11 +36,11 @@ import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.Serializer;
 import org.apache.kylin.common.restclient.Broadcaster;
 import org.apache.kylin.common.restclient.CaseInsensitiveStringCache;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.DimensionDesc;
-import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.dict.DictionaryInfo;
 import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.dict.lookup.HiveTable;
@@ -176,7 +176,7 @@ public class CubeManager implements IRealizationProvider {
     /**
      * return null if no dictionary for given column
      */
-    public Dictionary<?> getDictionary(CubeSegment cubeSeg, TblColRef col) {
+    public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
         DictionaryInfo info = null;
         try {
             DictionaryManager dictMgr = getDictionaryManager();
@@ -193,7 +193,7 @@ public class CubeManager implements IRealizationProvider {
             throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e);
         }
 
-        return info.getDictionaryObject();
+        return (Dictionary<String>) info.getDictionaryObject();
     }
 
     public SnapshotTable buildSnapshotTable(CubeSegment cubeSeg, String lookupTable) throws IOException {

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java b/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
index 5db3251..12f0bf7 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
@@ -24,8 +24,9 @@ import java.util.Map;
 import java.util.TimeZone;
 import java.util.concurrent.ConcurrentHashMap;
 
+import com.google.common.collect.Maps;
 import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.dict.ISegment;
 import org.apache.kylin.metadata.model.SegmentStatusEnum;
 import org.apache.kylin.metadata.model.TblColRef;
@@ -261,10 +262,19 @@ public class CubeSegment implements Comparable<CubeSegment>, ISegment {
     }
 
     @Override
-    public Dictionary<?> getDictionary(TblColRef col) {
+    public Dictionary<String> getDictionary(TblColRef col) {
         return CubeManager.getInstance(this.getCubeInstance().getConfig()).getDictionary(this, col);
     }
 
+    public Map<TblColRef, Dictionary<String>> buildDictionaryMap() {
+        Map<TblColRef, Dictionary<String>> result = Maps.newHashMap();
+        for (TblColRef col : getCubeDesc().getAllColumnsNeedDictionary()) {
+            result.put(col, (Dictionary<String>) getDictionary(col));
+        }
+        return result;
+    }
+
+
     public void validate() {
         if (cubeInstance.getDescriptor().getModel().getPartitionDesc().isPartitioned() && dateRangeStart >= dateRangeEnd)
             throw new IllegalStateException("dateRangeStart(" + dateRangeStart + ") must be greater than dateRangeEnd(" + dateRangeEnd + ") in segment " + this);

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java b/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
index 1172d66..188b157 100644
--- a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
+++ b/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
@@ -34,7 +34,7 @@ import org.apache.kylin.cube.model.DimensionDesc;
 import org.apache.kylin.cube.model.HierarchyDesc;
 import org.apache.kylin.cube.model.RowKeyColDesc;
 import org.apache.kylin.cube.model.RowKeyDesc;
-import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.MeasureDesc;
 
 /**
@@ -140,7 +140,7 @@ public class CubeSizeEstimationCLI {
         int space = 0;
         for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
             DataType returnType = measureDesc.getFunction().getReturnDataType();
-            space += returnType.getSpaceEstimate();
+            space += returnType.getStorageBytesEstimate();
         }
         return space;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
index c923b33..532950b 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
@@ -22,7 +22,7 @@ import java.util.Map;
 
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
index 1a84abb..00ecd46 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
@@ -22,7 +22,7 @@ import java.util.Arrays;
 
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.dict.ISegment;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
index 7a64132..64bc813 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
@@ -21,7 +21,7 @@ package org.apache.kylin.cube.kv;
 import java.util.Collection;
 import java.util.Comparator;
 
-import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.datatype.DataType;
 
 /**
  * @author yangli9

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
index f90a88d..5fe4e2e 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
@@ -25,11 +25,9 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.cube.model.HBaseColumnDesc;
-import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.measure.MeasureCodec;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 
@@ -80,14 +78,19 @@ public class RowValueDecoder implements Cloneable {
         for (int i = 0; i < mapredObjs.length; i++) {
             Object o = mapredObjs[i];
 
+            //            if (o instanceof LongWritable)
+            //                o = ((LongWritable) o).get();
+            //            else if (o instanceof IntWritable)
+            //                o = ((IntWritable) o).get();
+            //            else if (o instanceof DoubleWritable)
+            //                o = ((DoubleWritable) o).get();
+            //            else if (o instanceof FloatWritable)
+            //                o = ((FloatWritable) o).get();
+
             if (o instanceof LongWritable)
                 o = ((LongWritable) o).get();
-            else if (o instanceof IntWritable)
-                o = ((IntWritable) o).get();
             else if (o instanceof DoubleWritable)
                 o = ((DoubleWritable) o).get();
-            else if (o instanceof FloatWritable)
-                o = ((FloatWritable) o).get();
 
             results[i] = o;
         }
@@ -117,22 +120,20 @@ public class RowValueDecoder implements Cloneable {
         return measures;
     }
 
-    public boolean hasMemHungryCountDistinct() {
+    public boolean hasMemHungryMeasures() {
         for (int i = projectionIndex.nextSetBit(0); i >= 0; i = projectionIndex.nextSetBit(i + 1)) {
             FunctionDesc func = measures[i].getFunction();
-            if (func.isCountDistinct() && !func.isHolisticCountDistinct()) {
+            if (func.getMeasureType().isMemoryHungry())
                 return true;
-            }
         }
         return false;
     }
 
-    public static boolean hasMemHungryCountDistinct(Collection<RowValueDecoder> rowValueDecoders) {
+    public static boolean hasMemHungryMeasures(Collection<RowValueDecoder> rowValueDecoders) {
         for (RowValueDecoder decoder : rowValueDecoders) {
-            if (decoder.hasMemHungryCountDistinct())
+            if (decoder.hasMemHungryMeasures())
                 return true;
         }
         return false;
     }
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 8fd2c4e..3a9df54 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -34,7 +34,6 @@ import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
 import org.apache.commons.net.util.Base64;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
@@ -42,6 +41,7 @@ import org.apache.kylin.common.persistence.RootPersistentEntity;
 import org.apache.kylin.common.util.Array;
 import org.apache.kylin.common.util.CaseInsensitiveStringMap;
 import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.measure.MeasureType;
 import org.apache.kylin.metadata.MetadataConstants;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.ColumnDesc;
@@ -562,7 +562,7 @@ public class CubeDesc extends RootPersistentEntity {
     private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) {
         if (hostCols.length == 0 || derivedCols.length == 0)
             throw new IllegalStateException("host/derived columns must not be empty");
-        
+
         // Although FK derives PK automatically, user unaware of this can declare PK as derived dimension explicitly.
         // In that case, derivedCols[] will contain a FK which is transformed from the PK by initDimensionColRef().
         // Must drop FK from derivedCols[] before continue.
@@ -641,25 +641,22 @@ public class CubeDesc extends RootPersistentEntity {
             f.setExpression(f.getExpression().toUpperCase());
             f.initReturnDataType();
 
-            ParameterDesc p = f.getParameter();
-            p.normalizeColumnValue();
+            for (ParameterDesc p = f.getParameter(); p != null; p = p.getNextParameter()) {
+                p.setValue(p.getValue().toUpperCase());
+            }
 
-            if (p.isColumnType()) {
-                ArrayList<TblColRef> colRefs = Lists.newArrayList();
-                for (String cName : p.getValue().split("\\s*,\\s*")) {
-                    ColumnDesc sourceColumn = factTable.findColumnByName(cName);
+            ArrayList<TblColRef> colRefs = Lists.newArrayList();
+            for (ParameterDesc p = f.getParameter(); p != null; p = p.getNextParameter()) {
+                if (p.isColumnType()) {
+                    ColumnDesc sourceColumn = factTable.findColumnByName(p.getValue());
                     TblColRef colRef = new TblColRef(sourceColumn);
                     colRefs.add(colRef);
                     allColumns.add(colRef);
                 }
-                if (colRefs.isEmpty() == false)
-                    p.setColRefs(colRefs);
             }
 
-            // verify holistic count distinct as a dependent measure
-            if (m.isHolisticCountDistinct() && StringUtils.isBlank(m.getDependentMeasureRef())) {
-                throw new IllegalStateException(m + " is a holistic count distinct but it has no DependentMeasureRef defined!");
-            }
+            f.getParameter().setColRefs(colRefs);
+
         }
     }
 
@@ -730,15 +727,6 @@ public class CubeDesc extends RootPersistentEntity {
         }
     }
 
-    public boolean hasHolisticCountDistinctMeasures() {
-        for (MeasureDesc measure : measures) {
-            if (measure.getFunction().isHolisticCountDistinct()) {
-                return true;
-            }
-        }
-        return false;
-    }
-
     /**
      * Add error info and thrown exception out
      * 
@@ -794,4 +782,31 @@ public class CubeDesc extends RootPersistentEntity {
         this.autoMergeTimeRanges = autoMergeTimeRanges;
     }
 
+    public List<TblColRef> getAllColumnsNeedDictionary() {
+        List<TblColRef> result = Lists.newArrayList();
+
+        for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) {
+            TblColRef colRef = rowKeyColDesc.getColRef();
+            if (rowkey.isUseDictionary(colRef)) {
+                result.add(colRef);
+            }
+        }
+
+        for (MeasureDesc measure : measures) {
+            MeasureType<?> aggrType = measure.getFunction().getMeasureType();
+            result.addAll(aggrType.getColumnsNeedDictionary(measure.getFunction()));
+        }
+        return result;
+    }
+
+    public boolean hasMemoryHungryMeasures() {
+        for (MeasureDesc measure : measures) {
+            if (measure.getFunction().getMeasureType().isMemoryHungry()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java b/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
index b11056a..41d75c2 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
@@ -718,7 +718,6 @@ public class CubeDesc extends RootPersistentEntity {
             f.initReturnDataType();
 
             ParameterDesc p = f.getParameter();
-            p.normalizeColumnValue();
 
             if (p.isColumnType()) {
                 ArrayList<TblColRef> colRefs = Lists.newArrayList();
@@ -801,14 +800,6 @@ public class CubeDesc extends RootPersistentEntity {
         }
     }
 
-    public boolean hasHolisticCountDistinctMeasures() {
-        for (MeasureDesc measure : measures) {
-            if (measure.getFunction().isHolisticCountDistinct()) {
-                return true;
-            }
-        }
-        return false;
-    }
 
     /**
      * Add error info and thrown exception out

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
index a271ab5..d7d9f13 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
@@ -25,14 +25,12 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.validation.IValidatorRule;
 import org.apache.kylin.cube.model.validation.ResultLevel;
 import org.apache.kylin.cube.model.validation.ValidateContext;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.ColumnDesc;
-import org.apache.kylin.metadata.model.DataType;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.ParameterDesc;
@@ -94,7 +92,11 @@ public class FunctionRule implements IValidatorRule<CubeDesc> {
             } else if (StringUtils.equals(FunctionDesc.PARAMTER_TYPE_CONSTANT, type)) {
                 validateCostantParameter(context, cube, value);
             }
-            validateReturnType(context, cube, func);
+            try {
+                func.getMeasureType().validate(func);
+            } catch (IllegalArgumentException ex) {
+                context.addResult(ResultLevel.ERROR, ex.getMessage());
+            }
 
             if (func.isCount())
                 countFuncs.add(func);
@@ -105,31 +107,6 @@ public class FunctionRule implements IValidatorRule<CubeDesc> {
         }
     }
 
-    private void validateReturnType(ValidateContext context, CubeDesc cube, FunctionDesc funcDesc) {
-
-        String func = funcDesc.getExpression();
-        DataType rtype = funcDesc.getReturnDataType();
-
-        if (funcDesc.isCount()) {
-            if (rtype.isIntegerFamily() == false) {
-                context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be one of " + DataType.INTEGER_FAMILY);
-            }
-        } else if (funcDesc.isCountDistinct()) {
-            if (rtype.isHLLC() == false && funcDesc.isHolisticCountDistinct() == false) {
-                context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be hllc(10), hllc(12) etc.");
-            }
-        } else if (funcDesc.isMax() || funcDesc.isMin() || funcDesc.isSum()) {
-            if (rtype.isNumberFamily() == false) {
-                context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be one of " + DataType.NUMBER_FAMILY);
-            }
-        } else {
-            if (StringUtils.equalsIgnoreCase(KylinConfig.getInstanceFromEnv().getProperty(KEY_IGNORE_UNKNOWN_FUNC, "false"), "false")) {
-                context.addResult(ResultLevel.ERROR, "Unrecognized function: [" + func + "]");
-            }
-        }
-
-    }
-
     /**
      * @param context
      * @param cube

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
index a076938..f093032 100644
--- a/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
+++ b/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
@@ -26,7 +26,7 @@ import java.util.HashSet;
 import org.apache.kylin.common.util.JsonUtil;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.dict.DictionaryInfo;
 import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.metadata.model.TblColRef;

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java b/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
index b4aa238..a00afef 100644
--- a/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
+++ b/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
@@ -30,8 +30,8 @@ import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.HBaseColumnDesc;
+import org.apache.kylin.measure.MeasureCodec;
 import org.apache.kylin.metadata.MetadataManager;
-import org.apache.kylin.metadata.measure.MeasureCodec;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.junit.After;
@@ -63,7 +63,7 @@ public class RowValueDecoderTest extends LocalFileMetadataTestCase {
         LongWritable count = new LongWritable(2);
         LongWritable item_count = new LongWritable(99999);
         ByteBuffer buf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
-        codec.encode(new Object[] { sum, min, max, count,item_count }, buf);
+        codec.encode(new Object[] { sum, min, max, count, item_count }, buf);
 
         buf.flip();
         byte[] valueBytes = new byte[buf.limit()];
@@ -81,7 +81,7 @@ public class RowValueDecoderTest extends LocalFileMetadataTestCase {
         Object[] measureValues = rowValueDecoder.getValues();
 
         assertEquals("[PRICE, MIN_PRICE_, MAX_PRICE_, COUNT__, ITEM_COUNT]", measureNames.toString());
-        assertEquals("[333.1234567, 333.1111111, 333.1999999, 2, 99999]", Arrays.toString(measureValues));
+        assertEquals("[333.1235, 333.1111, 333.2000, 2, 99999]", Arrays.toString(measureValues));
     }
 
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
index 3ed6094..0870914 100644
--- a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
+++ b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
 import org.apache.kylin.cube.kv.RowConstants;
+import org.apache.kylin.measure.MeasureCodec;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.junit.Test;
@@ -45,7 +46,7 @@ public class MeasureCodecTest {
 
         DoubleWritable d = new DoubleWritable(1.0);
         LongWritable l = new LongWritable(2);
-        BigDecimal b = new BigDecimal("333.1234567");
+        BigDecimal b = new BigDecimal("333.1234");
         HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(16);
         hllc.add("1234567");
         hllc.add("abcdefg");

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST1_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST1_desc.json b/cube/src/test/resources/data/TEST1_desc.json
index 082a3cf..0373496 100644
--- a/cube/src/test/resources/data/TEST1_desc.json
+++ b/cube/src/test/resources/data/TEST1_desc.json
@@ -127,7 +127,12 @@
       "expression" : "COUNT_DISTINCT",
       "parameter" : {
         "type" : "column",
-        "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+        "value" : "LSTG_FORMAT_NAME",
+        "next_parameter" : {
+          "type" : "column",
+          "value" : "SELLER_ID",
+          "next_parameter" : null
+        }
       },
       "returntype" : "hllc10"
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST2_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST2_desc.json b/cube/src/test/resources/data/TEST2_desc.json
index 90bd806..de6bfe0 100644
--- a/cube/src/test/resources/data/TEST2_desc.json
+++ b/cube/src/test/resources/data/TEST2_desc.json
@@ -112,7 +112,12 @@
       "expression" : "COUNT_DISTINCT",
       "parameter" : {
         "type" : "column",
-        "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+        "value" : "LSTG_FORMAT_NAME",
+        "next_parameter" : {
+          "type" : "column",
+          "value" : "SELLER_ID",
+          "next_parameter" : null
+        }
       },
       "returntype" : "hllc10"
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST3_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST3_desc.json b/cube/src/test/resources/data/TEST3_desc.json
index 4b0836b..54f0cf9 100644
--- a/cube/src/test/resources/data/TEST3_desc.json
+++ b/cube/src/test/resources/data/TEST3_desc.json
@@ -1,182 +1,268 @@
 {
-  "uuid" : "9c53506d-d7b9-4ad4-b3b1-53ea42673c4b",
-  "last_modified" : 1401429176099,
-  "name" : "TEST1_desc",
+  "uuid": "9c53506d-d7b9-4ad4-b3b1-53ea42673c4b",
+  "last_modified": 1401429176099,
+  "name": "TEST1_desc",
   "model_name": "TEST1_model_desc",
-  "dimensions" : [ {
-    "id" : 1,
-    "name" : "CAL_DT",
-    "join" : {
-      "type" : "inner",
-      "primary_key" : [ "CAL_DT" ],
-      "foreign_key" : [ "CAL_DT" ]
+  "dimensions": [
+    {
+      "id": 1,
+      "name": "CAL_DT",
+      "join": {
+        "type": "inner",
+        "primary_key": [
+          "CAL_DT"
+        ],
+        "foreign_key": [
+          "CAL_DT"
+        ]
+      },
+      "hierarchy": null,
+      "table": "TEST_CAL_DT",
+      "column": [
+        "TEST_CAL_DT.CAL_DT"
+      ],
+      "datatype": "date",
+      "derived": [
+        "WEEK_BEG_DT"
+      ]
     },
-    "hierarchy" : null,
-    "table" : "TEST_CAL_DT",
-    "column" : ["TEST_CAL_DT.CAL_DT"],
-    "datatype" : "date",
-    "derived" : [ "WEEK_BEG_DT" ]
-  }, {
-    "id" : 2,
-    "name" : "CATEGORY",
-    "join" : {
-      "type" : "inner",
-      "primary_key" : [ "LEAF_CATEG_ID", "SITE_ID" ],
-      "foreign_key" : [ "LEAF_CATEG_ID", "LSTG_SITE_ID" ]
+    {
+      "id": 2,
+      "name": "CATEGORY",
+      "join": {
+        "type": "inner",
+        "primary_key": [
+          "LEAF_CATEG_ID",
+          "SITE_ID"
+        ],
+        "foreign_key": [
+          "LEAF_CATEG_ID",
+          "LSTG_SITE_ID"
+        ]
+      },
+      "hierarchy": true,
+      "table": "TEST_CATEGORY_GROUPINGS",
+      "column": [
+        "TEST_CATEGORY_GROUPINGS.META_CATEG_NAME",
+        "TEST_CATEGORY_GROUPINGS.CATEG_LVL2_NAME",
+        "TEST_CATEGORY_GROUPINGS.CATEG_LVL3_NAME"
+      ],
+      "derived": null
     },
-    "hierarchy" : true,
-    "table" : "TEST_CATEGORY_GROUPINGS",
-    "column" : ["TEST_CATEGORY_GROUPINGS.META_CATEG_NAME", "TEST_CATEGORY_GROUPINGS.CATEG_LVL2_NAME", "TEST_CATEGORY_GROUPINGS.CATEG_LVL3_NAME"],
-    "derived" : null
-  }, {
-    "id" : 3,
-    "name" : "LSTG_FORMAT_NAME",
-    "join" : null,
-    "hierarchy" : null,
-    "table" : "TEST_KYLIN_FACT",
-    "column" : ["TEST_KYLIN_FACT.LSTG_FORMAT_NAME"],
-    "datatype" : "string",
-    "derived" : null
-  }, {
-    "id" : 4,
-    "name" : "SITE_ID",
-    "join" : {
-      "type" : "inner",
-      "primary_key" : [ "SITE_ID" ],
-      "foreign_key" : [ "LSTG_SITE_ID" ]
+    {
+      "id": 3,
+      "name": "LSTG_FORMAT_NAME",
+      "join": null,
+      "hierarchy": null,
+      "table": "TEST_KYLIN_FACT",
+      "column": [
+        "TEST_KYLIN_FACT.LSTG_FORMAT_NAME"
+      ],
+      "datatype": "string",
+      "derived": null
     },
-    "hierarchy" : null,
-    "table" : "TEST_SITES",
-    "column" : ["TEST_SITES.SITE_ID"],
-    "datatype" : "string",
-    "derived" : [ "SITE_NAME", "CRE_USER" ]
-  }, {
-    "id" : 5,
-    "name" : "SELLER_TYPE_CD",
-    "join" : {
-      "type" : "inner",
-      "primary_key" : [ "SELLER_TYPE_CD" ],
-      "foreign_key" : [ "SLR_SEGMENT_CD" ]
+    {
+      "id": 4,
+      "name": "SITE_ID",
+      "join": {
+        "type": "inner",
+        "primary_key": [
+          "SITE_ID"
+        ],
+        "foreign_key": [
+          "LSTG_SITE_ID"
+        ]
+      },
+      "hierarchy": null,
+      "table": "TEST_SITES",
+      "column": [
+        "TEST_SITES.SITE_ID"
+      ],
+      "datatype": "string",
+      "derived": [
+        "SITE_NAME",
+        "CRE_USER"
+      ]
     },
-    "hierarchy" : null,
-    "table" : "TEST_SELLER_TYPE_DIM",
-    "column" : ["TEST_SELLER_TYPE_DIM.SELLER_TYPE_CD"],
-    "datatype" : "string",
-    "derived" : [ "SELLER_TYPE_DESC" ]
-  } ],
-  "measures" : [ {
-    "id" : 1,
-    "name" : "GMV_SUM",
-    "function" : {
-      "expression" : "SUM",
-      "parameter" : {
-        "type" : "column",
-        "value" : "PRICE"
+    {
+      "id": 5,
+      "name": "SELLER_TYPE_CD",
+      "join": {
+        "type": "inner",
+        "primary_key": [
+          "SELLER_TYPE_CD"
+        ],
+        "foreign_key": [
+          "SLR_SEGMENT_CD"
+        ]
       },
-      "returntype" : "decimal"
+      "hierarchy": null,
+      "table": "TEST_SELLER_TYPE_DIM",
+      "column": [
+        "TEST_SELLER_TYPE_DIM.SELLER_TYPE_CD"
+      ],
+      "datatype": "string",
+      "derived": [
+        "SELLER_TYPE_DESC"
+      ]
     }
-  }, {
-    "id" : 2,
-    "name" : "GMV_MIN",
-    "function" : {
-      "expression" : "MIN",
-      "parameter" : {
-        "type" : "column",
-        "value" : "PRICE"
-      },
-      "returntype" : "decimal"
+  ],
+  "measures": [
+    {
+      "id": 1,
+      "name": "GMV_SUM",
+      "function": {
+        "expression": "SUM",
+        "parameter": {
+          "type": "column",
+          "value": "PRICE"
+        },
+        "returntype": "decimal"
+      }
+    },
+    {
+      "id": 2,
+      "name": "GMV_MIN",
+      "function": {
+        "expression": "MIN",
+        "parameter": {
+          "type": "column",
+          "value": "PRICE"
+        },
+        "returntype": "decimal"
+      }
+    },
+    {
+      "id": 3,
+      "name": "GMV_MAX",
+      "function": {
+        "expression": "MAX",
+        "parameter": {
+          "type": "column",
+          "value": "PRICE"
+        },
+        "returntype": "decimal"
+      }
+    },
+    {
+      "id": 4,
+      "name": "TRANS_CNT",
+      "function": {
+        "expression": "COUNT",
+        "parameter": {
+          "type": "constant",
+          "value": "1"
+        },
+        "returntype": "long"
+      }
+    },
+    {
+      "id": 5,
+      "name": "SELLER_CNT",
+      "function": {
+        "expression": "COUNT_DISTINCT",
+        "parameter": {
+          "type": "column",
+          "value": "SELLER_ID"
+        },
+        "returntype": "hllc10"
+      }
+    },
+    {
+      "id": 6,
+      "name": "SELLER_FORMAT_CNT",
+      "function": {
+        "expression": "COUNT_DISTINCT",
+        "parameter": {
+          "type": "column",
+          "value": "LSTG_FORMAT_NAME",
+          "next_parameter": {
+            "type": "column",
+            "value": "SELLER_ID",
+            "next_parameter": null
+          }
+        },
+        "returntype": "hllc10"
+      }
     }
-  }, {
-    "id" : 3,
-    "name" : "GMV_MAX",
-    "function" : {
-      "expression" : "MAX",
-      "parameter" : {
-        "type" : "column",
-        "value" : "PRICE"
+  ],
+  "rowkey": {
+    "rowkey_columns": [
+      {
+        "column": "CAL_DT",
+        "length": 10,
+        "dictionary": "date(yyyy-mm-dd)",
+        "mandatory": false
       },
-      "returntype" : "decimal"
-    }
-  }, {
-    "id" : 4,
-    "name" : "TRANS_CNT",
-    "function" : {
-      "expression" : "COUNT",
-      "parameter" : {
-        "type" : "constant",
-        "value" : "1"
+      {
+        "column": "META_CATEG_NAME",
+        "length": 0,
+        "dictionary": "string",
+        "mandatory": false
       },
-      "returntype" : "long"
-    }
-  }, {
-    "id" : 5,
-    "name" : "SELLER_CNT",
-    "function" : {
-      "expression" : "COUNT_DISTINCT",
-      "parameter" : {
-        "type" : "column",
-        "value" : "SELLER_ID"
+      {
+        "column": "CATEG_LVL2_NAME",
+        "length": 0,
+        "dictionary": "string",
+        "mandatory": false
       },
-      "returntype" : "hllc10"
-    }
-  }, {
-    "id" : 6,
-    "name" : "SELLER_FORMAT_CNT",
-    "function" : {
-      "expression" : "COUNT_DISTINCT",
-      "parameter" : {
-        "type" : "column",
-        "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+      {
+        "column": "CATEG_LVL3_NAME",
+        "length": 0,
+        "dictionary": "string",
+        "mandatory": false
       },
-      "returntype" : "hllc10"
-    }
-  } ],
-  "rowkey" : {
-    "rowkey_columns" : [ {
-      "column" : "CAL_DT",
-      "length" : 10,
-      "dictionary" : "date(yyyy-mm-dd)",
-      "mandatory" : false
-    }, {
-      "column" : "META_CATEG_NAME",
-      "length" : 0,
-      "dictionary" : "string",
-      "mandatory" : false
-    }, {
-      "column" : "CATEG_LVL2_NAME",
-      "length" : 0,
-      "dictionary" : "string",
-      "mandatory" : false
-    }, {
-      "column" : "CATEG_LVL3_NAME",
-      "length" : 0,
-      "dictionary" : "string",
-      "mandatory" : false
-    }, {
-      "column" : "LSTG_FORMAT_NAME",
-      "length" : 12,
-      "dictionary" : null,
-      "mandatory" : false
-    }, {
-      "column" : "SITE_ID",
-      "length" : 0,
-      "dictionary" : "string",
-      "mandatory" : false
-    }, {
-      "column" : "SELLER_TYPE_CD",
-      "length" : 0,
-      "dictionary" : "string",
-      "mandatory" : false
-    } ],
-    "aggregation_groups" : [ [ "META_CATEG_NAME", "CATEG_LVL3_NAME", "CATEG_LVL2_NAME", "CAL_DT" ], [ "LSTG_FORMAT_NAME", "SITE_ID", "SELLER_TYPE_CD" ] ]
+      {
+        "column": "LSTG_FORMAT_NAME",
+        "length": 12,
+        "dictionary": null,
+        "mandatory": false
+      },
+      {
+        "column": "SITE_ID",
+        "length": 0,
+        "dictionary": "string",
+        "mandatory": false
+      },
+      {
+        "column": "SELLER_TYPE_CD",
+        "length": 0,
+        "dictionary": "string",
+        "mandatory": false
+      }
+    ],
+    "aggregation_groups": [
+      [
+        "META_CATEG_NAME",
+        "CATEG_LVL3_NAME",
+        "CATEG_LVL2_NAME",
+        "CAL_DT"
+      ],
+      [
+        "LSTG_FORMAT_NAME",
+        "SITE_ID",
+        "SELLER_TYPE_CD"
+      ]
+    ]
   },
-  "hbase_mapping" : {
-    "column_family" : [ {
-      "name" : "F1",
-      "columns" : [ {
-        "qualifier" : "M",
-        "measure_refs" : [ "GMV_SUM", "GMV_MIN", "GMV_MAX", "TRANS_CNT", "SELLER_CNT", "SELLER_FORMAT_CNT" ]
-      } ]
-    } ]
+  "hbase_mapping": {
+    "column_family": [
+      {
+        "name": "F1",
+        "columns": [
+          {
+            "qualifier": "M",
+            "measure_refs": [
+              "GMV_SUM",
+              "GMV_MIN",
+              "GMV_MAX",
+              "TRANS_CNT",
+              "SELLER_CNT",
+              "SELLER_FORMAT_CNT"
+            ]
+          }
+        ]
+      }
+    ]
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 5702be9..14086c6 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -27,6 +27,7 @@ import java.util.Date;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.util.DateFormat;
+import org.apache.kylin.common.util.Dictionary;
 
 /**
  * A dictionary for date string (date only, no time).

http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
deleted file mode 100644
index e99a553..0000000
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.kylin.dict;
-
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.kylin.common.util.BytesUtil;
-
-/**
- * A bi-way dictionary that maps from dimension/column values to IDs and vice
- * versa. By storing IDs instead of real values, the size of cube is
- * significantly reduced.
- * 
- * - IDs are smallest integers possible for the cardinality of a column, for the
- * purpose of minimal storage space - IDs preserve ordering of values, such that
- * range query can be applied to IDs directly
- * 
- * A dictionary once built, is immutable. This allows optimal memory footprint
- * by e.g. flatten the Trie structure into a byte array, replacing node pointers
- * with array offsets.
- * 
- * @author yangli9
- */
-abstract public class Dictionary<T> implements Writable {
-
-    public static final byte NULL = (byte) 0xff;
-
-    // ID with all bit-1 (0xff e.g.) reserved for NULL value
-    public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffff };
-
-    abstract public int getMinId();
-
-    abstract public int getMaxId();
-    
-    public int getSize() {
-        return getMaxId() - getMinId() + 1;
-    }
-
-    /**
-     * @return the size of an ID in bytes, determined by the cardinality of
-     *         column
-     */
-    abstract public int getSizeOfId();
-
-    /**
-     * @return the (maximum) size of value in bytes, determined by the longest
-     *         value of column
-     */
-    abstract public int getSizeOfValue();
-
-    /**
-     * Convenient form of <code>getIdFromValue(value, 0)</code>
-     */
-    final public int getIdFromValue(T value) throws IllegalArgumentException {
-        return getIdFromValue(value, 0);
-    }
-
-    /**
-     * Returns the ID integer of given value. In case of not found
-     * <p>
-     * - if roundingFlag=0, throw IllegalArgumentException; <br>
-     * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
-     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
-     * <p>
-     * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
-     * 
-     * @throws IllegalArgumentException
-     *             if value is not found in dictionary and rounding is off or
-     *             failed
-     */
-    final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
-        if (isNullObjectForm(value))
-            return nullId();
-        else
-            return getIdFromValueImpl(value, roundingFlag);
-    }
-
-    protected boolean isNullObjectForm(T value) {
-        return value == null;
-    }
-
-    abstract protected int getIdFromValueImpl(T value, int roundingFlag);
-
-    /**
-     * @return the value corresponds to the given ID
-     * @throws IllegalArgumentException
-     *             if ID is not found in dictionary
-     */
-    final public T getValueFromId(int id) {
-        if (isNullId(id))
-            return null;
-        else
-            return getValueFromIdImpl(id);
-    }
-
-    abstract protected T getValueFromIdImpl(int id);
-
-    /**
-     * Convenient form of
-     * <code>getIdFromValueBytes(value, offset, len, 0)</code>
-     */
-    final public int getIdFromValueBytes(byte[] value, int offset, int len) {
-        return getIdFromValueBytes(value, offset, len, 0);
-    }
-
-    /**
-     * A lower level API, return ID integer from raw value bytes. In case of not found
-     * <p>
-     * - if roundingFlag=0, throw IllegalArgumentException; <br>
-     * - if roundingFlag<0, the closest smaller ID integer if exist; <br> 
-     * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
-     * <p>
-     * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
-     * 
-     * @throws IllegalArgumentException
-     *             if value is not found in dictionary and rounding is off or failed
-     */
-    final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
-        if (isNullByteForm(value, offset, len))
-            return nullId();
-        else
-            return getIdFromValueBytesImpl(value, offset, len, roundingFlag);
-    }
-
-    protected boolean isNullByteForm(byte[] value, int offset, int len) {
-        return value == null;
-    }
-
-    abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag);
-
-    /**
-     * A lower level API, get byte values from ID, return the number of bytes
-     * written. Bypassing the cache layer, this could be significantly slower
-     * than getIdFromValue(T value).
-     *
-     * @return size of value bytes, 0 if empty string, -1 if null
-     *
-     * @throws IllegalArgumentException
-     *             if ID is not found in dictionary
-     */
-    final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
-        if (isNullId(id))
-            return -1;
-        else
-            return getValueBytesFromIdImpl(id, returnValue, offset);
-    }
-
-    abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset);
-
-    abstract public void dump(PrintStream out);
-
-    public int nullId() {
-        return NULL_ID[getSizeOfId()];
-    }
-
-    public boolean isNullId(int id) {
-        int nullId = NULL_ID[getSizeOfId()];
-        return (nullId & id) == nullId;
-    }
-
-    /** utility that converts a dictionary ID to string, preserving order */
-    public static String dictIdToString(byte[] idBytes, int offset, int length) {
-        try {
-            return new String(idBytes, offset, length, "ISO-8859-1");
-        } catch (UnsupportedEncodingException e) {
-            // never happen
-            return null;
-        }
-    }
-
-    /** the reverse of dictIdToString(), returns integer ID */
-    public static int stringToDictId(String str) {
-        try {
-            byte[] bytes = str.getBytes("ISO-8859-1");
-            return BytesUtil.readUnsigned(bytes, 0, bytes.length);
-        } catch (UnsupportedEncodingException e) {
-            // never happen
-            return 0;
-        }
-    }
-}