You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2015/12/15 09:46:29 UTC
[6/6] kylin git commit: KYLIN-976 Support Custom Aggregation Types
KYLIN-976 Support Custom Aggregation Types
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/c721d679
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/c721d679
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/c721d679
Branch: refs/heads/1.x-staging
Commit: c721d679727b0345a7fcdd8335a088e7c336aacc
Parents: cdea731
Author: honma <ho...@ebay.com>
Authored: Fri Dec 11 16:08:59 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Tue Dec 15 16:50:58 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/KylinConfig.java | 40 +-
.../common/hll/HyperLogLogPlusCounter.java | 87 +++-
.../org/apache/kylin/common/util/BytesUtil.java | 16 +
.../apache/kylin/common/util/Dictionary.java | 199 +++++++++
.../kylin/cube/CubeCapabilityChecker.java | 147 ++++---
.../org/apache/kylin/cube/CubeInstance.java | 20 +-
.../java/org/apache/kylin/cube/CubeManager.java | 6 +-
.../java/org/apache/kylin/cube/CubeSegment.java | 14 +-
.../cube/estimation/CubeSizeEstimationCLI.java | 4 +-
.../kylin/cube/kv/AbstractRowKeyEncoder.java | 2 +-
.../apache/kylin/cube/kv/RowKeyColumnIO.java | 2 +-
.../apache/kylin/cube/kv/RowKeyColumnOrder.java | 2 +-
.../apache/kylin/cube/kv/RowValueDecoder.java | 27 +-
.../org/apache/kylin/cube/model/CubeDesc.java | 61 ++-
.../apache/kylin/cube/model/v1/CubeDesc.java | 9 -
.../model/validation/rule/FunctionRule.java | 33 +-
.../kylin/cube/DictionaryManagerTest.java | 2 +-
.../kylin/cube/kv/RowValueDecoderTest.java | 6 +-
.../metadata/measure/MeasureCodecTest.java | 3 +-
cube/src/test/resources/data/TEST1_desc.json | 7 +-
cube/src/test/resources/data/TEST2_desc.json | 7 +-
cube/src/test/resources/data/TEST3_desc.json | 418 +++++++++++--------
.../apache/kylin/dict/DateStrDictionary.java | 1 +
.../java/org/apache/kylin/dict/Dictionary.java | 199 ---------
.../apache/kylin/dict/DictionaryGenerator.java | 91 ++--
.../org/apache/kylin/dict/DictionaryInfo.java | 1 +
.../kylin/dict/DictionaryInfoSerializer.java | 1 +
.../apache/kylin/dict/DictionaryManager.java | 1 +
.../java/org/apache/kylin/dict/ISegment.java | 3 +-
.../dict/MultipleDictionaryValueEnumerator.java | 1 +
.../org/apache/kylin/dict/TrieDictionary.java | 1 +
.../apache/kylin/dict/lookup/SnapshotTable.java | 2 +-
.../test_kylin_cube_without_slr_desc.json | 7 +-
...t_kylin_cube_without_slr_left_join_desc.json | 7 +-
.../invertedindex/IICapabilityChecker.java | 182 ++++++++
.../apache/kylin/invertedindex/IIInstance.java | 16 +-
.../apache/kylin/invertedindex/IIManager.java | 2 +-
.../apache/kylin/invertedindex/IISegment.java | 4 +-
.../invertedindex/index/BitMapContainer.java | 2 +-
.../index/CompressedValueContainer.java | 2 +-
.../invertedindex/index/RawTableRecord.java | 2 +-
.../invertedindex/index/TableRecordInfo.java | 2 +-
.../index/TableRecordInfoDigest.java | 2 +-
.../kylin/invertedindex/model/IIDesc.java | 4 +
.../invertedindex/IIInstanceTest.java | 2 +-
.../invertedindex/InvertedIndexLocalTest.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 76 ++--
.../kylin/job/hadoop/cube/CubeHFileMapper.java | 4 +-
.../apache/kylin/job/hadoop/cube/CuboidJob.java | 5 +-
.../kylin/job/hadoop/cube/CuboidReducer.java | 6 +-
.../job/hadoop/cube/MergeCuboidMapper.java | 57 ++-
.../job/hadoop/cube/NewBaseCuboidMapper.java | 346 ---------------
.../kylin/job/dataGen/FactTableGenerator.java | 2 +-
.../job/hadoop/cube/BaseCuboidMapperTest.java | 7 +-
.../job/hadoop/cube/CubeHFileMapper2Test.java | 2 +-
.../kylin/job/hadoop/cube/CubeReducerTest.java | 2 +-
.../job/hadoop/cube/MergeCuboidMapperTest.java | 2 +-
.../job/hadoop/cube/NDCuboidMapperTest.java | 17 +-
.../apache/kylin/measure/MeasureAggregator.java | 78 ++++
.../kylin/measure/MeasureAggregators.java | 81 ++++
.../org/apache/kylin/measure/MeasureCodec.java | 79 ++++
.../apache/kylin/measure/MeasureIngester.java | 48 +++
.../org/apache/kylin/measure/MeasureType.java | 146 +++++++
.../kylin/measure/MeasureTypeFactory.java | 93 +++++
.../kylin/measure/basic/BasicMeasureType.java | 152 +++++++
.../kylin/measure/basic/BigDecimalIngester.java | 41 ++
.../measure/basic/BigDecimalMaxAggregator.java | 56 +++
.../measure/basic/BigDecimalMinAggregator.java | 57 +++
.../measure/basic/BigDecimalSumAggregator.java | 53 +++
.../kylin/measure/basic/DoubleIngester.java | 46 ++
.../measure/basic/DoubleMaxAggregator.java | 55 +++
.../measure/basic/DoubleMinAggregator.java | 55 +++
.../measure/basic/DoubleSumAggregator.java | 52 +++
.../kylin/measure/basic/LongIngester.java | 46 ++
.../kylin/measure/basic/LongMaxAggregator.java | 55 +++
.../kylin/measure/basic/LongMinAggregator.java | 55 +++
.../kylin/measure/basic/LongSumAggregator.java | 52 +++
.../kylin/measure/hllc/HLLCAggregator.java | 64 +++
.../kylin/measure/hllc/HLLCMeasureType.java | 127 ++++++
.../kylin/measure/hllc/HLLCSerializer.java | 88 ++++
.../measure/hllc/HLLDistinctCountAggFunc.java | 152 +++++++
.../metadata/datatype/BigDecimalSerializer.java | 105 +++++
.../kylin/metadata/datatype/DataType.java | 285 +++++++++++++
.../metadata/datatype/DataTypeSerializer.java | 90 ++++
.../metadata/datatype/DateTimeSerializer.java | 57 +++
.../metadata/datatype/DoubleSerializer.java | 75 ++++
.../kylin/metadata/datatype/LongSerializer.java | 82 ++++
.../metadata/datatype/StringSerializer.java | 52 +++
.../measure/BigDecimalMaxAggregator.java | 53 ---
.../measure/BigDecimalMinAggregator.java | 54 ---
.../metadata/measure/BigDecimalSerializer.java | 62 ---
.../measure/BigDecimalSumAggregator.java | 50 ---
.../metadata/measure/DoubleMaxAggregator.java | 54 ---
.../metadata/measure/DoubleMinAggregator.java | 54 ---
.../metadata/measure/DoubleSerializer.java | 55 ---
.../metadata/measure/DoubleSumAggregator.java | 51 ---
.../kylin/metadata/measure/HLLCAggregator.java | 57 ---
.../kylin/metadata/measure/HLLCSerializer.java | 67 ---
.../kylin/metadata/measure/LDCAggregator.java | 64 ---
.../metadata/measure/LongMaxAggregator.java | 54 ---
.../metadata/measure/LongMinAggregator.java | 54 ---
.../kylin/metadata/measure/LongSerializer.java | 56 ---
.../metadata/measure/LongSumAggregator.java | 51 ---
.../metadata/measure/MeasureAggregator.java | 102 -----
.../metadata/measure/MeasureAggregators.java | 82 ----
.../kylin/metadata/measure/MeasureCodec.java | 83 ----
.../metadata/measure/MeasureSerializer.java | 68 ---
.../measure/fixedlen/FixedHLLCodec.java | 2 +-
.../measure/fixedlen/FixedLenMeasureCodec.java | 5 +-
.../measure/fixedlen/FixedPointLongCodec.java | 2 +-
.../apache/kylin/metadata/model/ColumnDesc.java | 1 +
.../apache/kylin/metadata/model/DataType.java | 290 -------------
.../kylin/metadata/model/FunctionDesc.java | 53 ++-
.../kylin/metadata/model/MeasureDesc.java | 4 -
.../kylin/metadata/model/ParameterDesc.java | 59 ++-
.../apache/kylin/metadata/model/TblColRef.java | 1 +
.../metadata/realization/CapabilityResult.java | 63 +++
.../metadata/realization/IRealization.java | 15 +-
.../metadata/realization/SQLDigestUtil.java | 2 +-
.../org/apache/kylin/metadata/tuple/Tuple.java | 164 ++++++++
.../apache/kylin/metadata/tuple/TupleInfo.java | 113 +++++
.../datatype/BigDecimalSerializerTest.java | 69 +++
.../query/enumerator/LookupTableEnumerator.java | 2 +-
.../kylin/query/relnode/OLAPAggregateRel.java | 14 +-
.../apache/kylin/query/routing/Candidate.java | 100 +++++
.../apache/kylin/query/routing/QueryRouter.java | 63 ++-
.../apache/kylin/query/routing/RoutingRule.java | 34 +-
.../AdjustForWeeklyMatchedRealization.java | 101 -----
.../routing/RoutingRules/CubesSortRule.java | 67 ---
.../RoutingRules/RealizationPriorityRule.java | 60 ---
.../RoutingRules/RealizationSortRule.java | 65 ---
.../RemoveUncapableRealizationsRule.java | 42 --
.../SimpleQueryMoreColumnsCubeFirstRule.java | 50 ---
.../routing/rules/RealizationSortRule.java | 34 ++
.../rules/RemoveUncapableRealizationsRule.java | 44 ++
.../apache/kylin/query/schema/OLAPTable.java | 12 +-
.../query/sqlfunc/HLLDistinctCountAggFunc.java | 152 -------
.../apache/kylin/query/test/IIQueryTest.java | 18 +-
.../apache/kylin/query/test/KylinQueryTest.java | 2 +-
.../apache/kylin/query/test/KylinTestBase.java | 4 +-
.../apache/kylin/rest/service/JobService.java | 10 +-
.../storage/filter/BitMapFilterEvaluator.java | 2 +-
.../kylin/storage/hbase/ColumnValueRange.java | 2 +-
.../storage/hbase/CubeSegmentTupleIterator.java | 211 +++++++---
.../kylin/storage/hbase/CubeStorageEngine.java | 24 +-
.../kylin/storage/hbase/CubeTupleConverter.java | 139 ++++++
.../hbase/SerializedHBaseTupleIterator.java | 2 +-
.../hbase/coprocessor/AggregationCache.java | 7 +-
.../hbase/coprocessor/CoprocessorFilter.java | 2 +-
.../CoprocessorTupleFilterTranslator.java | 2 +-
.../endpoint/EndpointAggregationCache.java | 2 +-
.../endpoint/EndpointAggregators.java | 11 +-
.../endpoint/EndpointTupleIterator.java | 9 +-
.../hbase/coprocessor/endpoint/IIEndpoint.java | 2 +-
.../observer/AggregationScanner.java | 2 +-
.../observer/ObserverAggregationCache.java | 2 +-
.../observer/ObserverAggregators.java | 24 +-
.../coprocessor/observer/ObserverEnabler.java | 2 +-
.../coprocessor/observer/ObserverTuple.java | 2 +-
.../kylin/storage/hybrid/HybridInstance.java | 31 +-
.../storage/hybrid/HybridStorageEngine.java | 2 +-
.../org/apache/kylin/storage/tuple/Tuple.java | 252 -----------
.../apache/kylin/storage/tuple/TupleInfo.java | 120 ------
.../filter/BitMapFilterEvaluatorTest.java | 2 +-
.../kylin/storage/filter/FilterBaseTest.java | 4 +-
.../storage/filter/FilterEvaluateTest.java | 2 +-
.../kylin/storage/filter/FilterPerfTest.java | 4 +-
.../storage/hbase/ColumnValueRangeTest.java | 2 +-
.../endpoint/EndpointAggregationTest.java | 2 +-
169 files changed, 4764 insertions(+), 3644 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/KylinConfig.java b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
index 7816487..8a123dc 100644
--- a/common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -45,7 +45,7 @@ import com.google.common.collect.Sets;
* @author yangli9
*/
public class KylinConfig {
-
+
/*
* NO NEED TO DEFINE PUBLIC CONSTANTS FOR KEY NAMES!
*
@@ -272,7 +272,7 @@ public class KylinConfig {
// ============================================================================
public boolean isHiveReroutingEnabled() {
- return Boolean.parseBoolean(getOptional("kylin.route.hive.enabled", "false"));
+ return Boolean.parseBoolean(getOptional("kylin.route.hive.enabled", "false"));
}
public String getHiveRerouteUrl() {
@@ -336,7 +336,7 @@ public class KylinConfig {
}
return getFileName(kylinHome + File.separator + "lib", JOB_JAR_NAME_PATTERN);
}
-
+
public String getKylinJobMRLibDir() {
return getOptional("kylin.job.mr.lib.dir", "");
}
@@ -499,7 +499,7 @@ public class KylinConfig {
public int getHBaseKeyValueSize() {
return Integer.parseInt(this.getOptional("kylin.hbase.client.keyvalue.maxsize", "10485760"));
}
-
+
public int getHBaseScanCacheRows() {
return Integer.parseInt(this.getOptional("kylin.hbase.scan.cache_rows", "1024"));
}
@@ -507,7 +507,11 @@ public class KylinConfig {
public int getHBaseScanMaxResultSize() {
return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB
}
-
+
+ public boolean isQueryIgnoreUnknownFunction() {
+ return Boolean.parseBoolean(this.getOptional("kylin.query.ignore_unknown_function", "false"));
+ }
+
public String getHbaseDefaultCompressionCodec() {
return getOptional(HTABLE_DEFAULT_COMPRESSION_CODEC, "");
@@ -704,26 +708,26 @@ public class KylinConfig {
public int getHBaseRegionCountMax() {
return Integer.parseInt(getOptional(HBASE_REGION_COUNT_MAX, "500"));
}
-
+
public int getHBaseRegionCut(String capacity) {
String cut;
switch (capacity) {
- case "SMALL":
- cut = getProperty(HBASE_REGION_CUT_SMALL, "10");
- break;
- case "MEDIUM":
- cut = getProperty(HBASE_REGION_CUT_MEDIUM, "20");
- break;
- case "LARGE":
- cut = getProperty(HBASE_REGION_CUT_LARGE, "100");
- break;
- default:
- throw new IllegalArgumentException("Capacity not recognized: " + capacity);
+ case "SMALL":
+ cut = getProperty(HBASE_REGION_CUT_SMALL, "10");
+ break;
+ case "MEDIUM":
+ cut = getProperty(HBASE_REGION_CUT_MEDIUM, "20");
+ break;
+ case "LARGE":
+ cut = getProperty(HBASE_REGION_CUT_LARGE, "100");
+ break;
+ default:
+ throw new IllegalArgumentException("Capacity not recognized: " + capacity);
}
return Integer.valueOf(cut);
}
-
+
public String getProperty(String key, String defaultValue) {
return kylinConfig.getString(key, defaultValue);
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
index 5ee323e..d37d6db 100644
--- a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
+++ b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
@@ -6,19 +6,20 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+*/
package org.apache.kylin.common.hll;
import java.io.IOException;
+import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;
@@ -30,13 +31,14 @@ import com.google.common.hash.Hashing;
/**
* About compression, test on HLLC data shows
- *
+ *
* - LZF compression ratio is around 65%-80%, fast
* - GZIP compression ratio is around 41%-46%, very slow
- *
+ *
* @author yangli9
*/
-public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter> {
+@SuppressWarnings("serial")
+public class HyperLogLogPlusCounter implements Serializable, Comparable<HyperLogLogPlusCounter> {
private final int p;
private final int m;
@@ -108,10 +110,6 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
return new HLLCSnapshot(this).getCountEstimate();
}
- public int getMemBytes() {
- return 12 + m;
- }
-
public double getErrorRate() {
return 1.04 / Math.sqrt(m);
}
@@ -125,6 +123,11 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
return size;
}
+ @Override
+ public String toString() {
+ return "" + getCountEstimate();
+ }
+
// ============================================================================
// a memory efficient snapshot of HLL registers which can yield count
@@ -176,7 +179,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
// decide output scheme -- map (3*size bytes) or array (2^p bytes)
byte scheme;
- if ((indexLen + 1) * size < m)
+ if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
scheme = 0; // map
else
scheme = 1; // array
@@ -186,15 +189,14 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
BytesUtil.writeVInt(size, out);
for (int i = 0; i < m; i++) {
if (registers[i] > 0) {
- BytesUtil.writeUnsigned(i, indexLen, out);
+ writeUnsigned(i, indexLen, out);
out.put(registers[i]);
}
}
- } else { // array scheme
- for (int i = 0; i < m; i++) {
- out.put(registers[i]);
- }
- }
+ } else if (scheme == 1) { // array scheme
+ out.put(registers);
+ } else
+ throw new IllegalStateException();
}
public void readRegisters(ByteBuffer in) throws IOException {
@@ -207,12 +209,34 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
int indexLen = getRegisterIndexSize();
for (int i = 0; i < size; i++) {
- int key = BytesUtil.readUnsigned(in, indexLen);
+ int key = readUnsigned(in, indexLen);
registers[key] = in.get();
}
- } else { // array scheme
+ } else if (scheme == 1) { // array scheme
in.get(registers);
+ } else
+ throw new IllegalStateException();
+ }
+
+ public int peekLength(ByteBuffer in) {
+ int mark = in.position();
+ int len;
+
+ byte scheme = in.get();
+ if (scheme == 0) { // map scheme
+ int size = BytesUtil.readVInt(in);
+ int indexLen = getRegisterIndexSize();
+ len = in.position() - mark + (indexLen + 1) * size;
+ } else {
+ len = in.position() - mark + m;
}
+
+ in.position(mark);
+ return len;
+ }
+
+ public int maxLength() {
+ return 1 + m;
}
public void writeRegistersArray(final ByteBuffer out) {
@@ -288,4 +312,29 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
}
}
+
+ /**
+ *
+ * @param num
+ * @param size
+ * @param out
+ */
+ public static void writeUnsigned(int num, int size, ByteBuffer out) {
+ for (int i = 0; i < size; i++) {
+ out.put((byte) num);
+ num >>>= 8;
+ }
+ }
+
+ public static int readUnsigned(ByteBuffer in, int size) {
+ int integer = 0;
+ int mask = 0xff;
+ int shift = 0;
+ for (int i = 0; i < size; i++) {
+ integer |= (in.get() << shift) & mask;
+ mask = mask << 8;
+ shift += 8;
+ }
+ return integer;
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
index 54e5871..03da261 100644
--- a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
+++ b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
@@ -230,6 +230,8 @@ public class BytesUtil {
return bytes == null ? null : Bytes.toString(bytes);
}
+
+
public static void writeAsciiString(String str, ByteBuffer out) {
if (str == null) {
BytesUtil.writeVInt(-1, out);
@@ -320,6 +322,20 @@ public class BytesUtil {
return array;
}
+
+ public static int peekByteArrayLength(ByteBuffer in) {
+ int start = in.position();
+ int arrayLen = readVInt(in);
+ int sizeLen = in.position() - start;
+ in.position(start);
+
+ if (arrayLen < 0)
+ return sizeLen;
+ else
+ return sizeLen + arrayLen;
+ }
+
+
public static void writeBooleanArray(boolean[] array, ByteBuffer out) {
if (array == null) {
writeVInt(-1, out);
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
new file mode 100644
index 0000000..0168609
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/Dictionary.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.common.util;
+
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.kylin.common.util.BytesUtil;
+
+/**
+ * A bi-way dictionary that maps from dimension/column values to IDs and vice
+ * versa. By storing IDs instead of real values, the size of cube is
+ * significantly reduced.
+ *
+ * - IDs are smallest integers possible for the cardinality of a column, for the
+ * purpose of minimal storage space - IDs preserve ordering of values, such that
+ * range query can be applied to IDs directly
+ *
+ * A dictionary once built, is immutable. This allows optimal memory footprint
+ * by e.g. flatten the Trie structure into a byte array, replacing node pointers
+ * with array offsets.
+ *
+ * @author yangli9
+ */
+abstract public class Dictionary<T> implements Writable {
+
+ public static final byte NULL = (byte) 0xff;
+
+ // ID with all bit-1 (0xff e.g.) reserved for NULL value
+ public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffff };
+
+ abstract public int getMinId();
+
+ abstract public int getMaxId();
+
+ public int getSize() {
+ return getMaxId() - getMinId() + 1;
+ }
+
+ /**
+ * @return the size of an ID in bytes, determined by the cardinality of
+ * column
+ */
+ abstract public int getSizeOfId();
+
+ /**
+ * @return the (maximum) size of value in bytes, determined by the longest
+ * value of column
+ */
+ abstract public int getSizeOfValue();
+
+ /**
+ * Convenient form of <code>getIdFromValue(value, 0)</code>
+ */
+ final public int getIdFromValue(T value) throws IllegalArgumentException {
+ return getIdFromValue(value, 0);
+ }
+
+ /**
+ * Returns the ID integer of given value. In case of not found
+ * <p>
+ * - if roundingFlag=0, throw IllegalArgumentException; <br>
+ * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+ * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+ * <p>
+ * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
+ *
+ * @throws IllegalArgumentException
+ * if value is not found in dictionary and rounding is off or
+ * failed
+ */
+ final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
+ if (isNullObjectForm(value))
+ return nullId();
+ else
+ return getIdFromValueImpl(value, roundingFlag);
+ }
+
+ protected boolean isNullObjectForm(T value) {
+ return value == null;
+ }
+
+ abstract protected int getIdFromValueImpl(T value, int roundingFlag);
+
+ /**
+ * @return the value corresponds to the given ID
+ * @throws IllegalArgumentException
+ * if ID is not found in dictionary
+ */
+ final public T getValueFromId(int id) {
+ if (isNullId(id))
+ return null;
+ else
+ return getValueFromIdImpl(id);
+ }
+
+ abstract protected T getValueFromIdImpl(int id);
+
+ /**
+ * Convenient form of
+ * <code>getIdFromValueBytes(value, offset, len, 0)</code>
+ */
+ final public int getIdFromValueBytes(byte[] value, int offset, int len) {
+ return getIdFromValueBytes(value, offset, len, 0);
+ }
+
+ /**
+ * A lower level API, return ID integer from raw value bytes. In case of not found
+ * <p>
+ * - if roundingFlag=0, throw IllegalArgumentException; <br>
+ * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
+ * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
+ * <p>
+ * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
+ *
+ * @throws IllegalArgumentException
+ * if value is not found in dictionary and rounding is off or failed
+ */
+ final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
+ if (isNullByteForm(value, offset, len))
+ return nullId();
+ else
+ return getIdFromValueBytesImpl(value, offset, len, roundingFlag);
+ }
+
+ protected boolean isNullByteForm(byte[] value, int offset, int len) {
+ return value == null;
+ }
+
+ abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag);
+
+ /**
+ * A lower level API, get byte values from ID, return the number of bytes
+ * written. Bypassing the cache layer, this could be significantly slower
+ * than getIdFromValue(T value).
+ *
+ * @return size of value bytes, 0 if empty string, -1 if null
+ *
+ * @throws IllegalArgumentException
+ * if ID is not found in dictionary
+ */
+ final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
+ if (isNullId(id))
+ return -1;
+ else
+ return getValueBytesFromIdImpl(id, returnValue, offset);
+ }
+
+ abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset);
+
+ abstract public void dump(PrintStream out);
+
+ public int nullId() {
+ return NULL_ID[getSizeOfId()];
+ }
+
+ public boolean isNullId(int id) {
+ int nullId = NULL_ID[getSizeOfId()];
+ return (nullId & id) == nullId;
+ }
+
+ /** utility that converts a dictionary ID to string, preserving order */
+ public static String dictIdToString(byte[] idBytes, int offset, int length) {
+ try {
+ return new String(idBytes, offset, length, "ISO-8859-1");
+ } catch (UnsupportedEncodingException e) {
+ // never happen
+ return null;
+ }
+ }
+
+ /** the reverse of dictIdToString(), returns integer ID */
+ public static int stringToDictId(String str) {
+ try {
+ byte[] bytes = str.getBytes("ISO-8859-1");
+ return BytesUtil.readUnsigned(bytes, 0, bytes.length);
+ } catch (UnsupportedEncodingException e) {
+ // never happen
+ return 0;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java b/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
index e713774..5668030 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java
@@ -6,84 +6,113 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+*/
package org.apache.kylin.cube;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
+import java.util.Set;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.DimensionDesc;
+import org.apache.kylin.measure.MeasureType;
+import org.apache.kylin.measure.basic.BasicMeasureType;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.JoinDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.CapabilityResult;
+import org.apache.kylin.metadata.realization.CapabilityResult.CapabilityInfluence;
import org.apache.kylin.metadata.realization.SQLDigest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Sets;
+
/**
- * Created by Hongbin Ma(Binmahone) on 1/8/15.
*/
public class CubeCapabilityChecker {
private static final Logger logger = LoggerFactory.getLogger(CubeCapabilityChecker.class);
- public static boolean check(CubeInstance cube, SQLDigest digest, boolean allowWeekMatch) {
+ public static CapabilityResult check(CubeInstance cube, SQLDigest digest) {
+ CapabilityResult result = new CapabilityResult();
+ result.capable = false;
- // retrieve members from olapContext
- Collection<TblColRef> dimensionColumns = CubeDimensionDeriver.getDimensionColumns(digest.groupbyColumns, digest.filterColumns);
- Collection<FunctionDesc> functions = digest.aggregations;
- Collection<TblColRef> metricsColumns = digest.metricColumns;
- Collection<JoinDesc> joins = digest.joinDescs;
+ // match joins
+ boolean isJoinMatch = isJoinMatch(digest.joinDescs, cube);
+ if (!isJoinMatch) {
+ logger.info("Exclude cube " + cube.getName() + " because unmatched joins");
+ return result;
+ }
- // match dimensions & aggregations & joins
+ // dimensions & measures
+ Collection<TblColRef> dimensionColumns = getDimensionColumns(digest);
+ Collection<FunctionDesc> aggrFunctions = digest.aggregations;
+ Collection<TblColRef> unmatchedDimensions = unmatchedDimensions(dimensionColumns, cube);
+ Collection<FunctionDesc> unmatchedAggregations = unmatchedAggregations(aggrFunctions, cube);
- boolean isOnline = cube.isReady();
+ // try custom measure types
+ if (!unmatchedDimensions.isEmpty() || !unmatchedAggregations.isEmpty()) {
+ tryCustomMeasureTypes(unmatchedDimensions, unmatchedAggregations, digest, cube, result);
+ }
- boolean matchDimensions = isMatchedWithDimensions(dimensionColumns, cube);
- boolean matchAggregation = isMatchedWithAggregations(functions, cube);
- boolean matchJoin = isMatchedWithJoins(joins, cube);
+ // try dimension-as-measure
+ if (!unmatchedAggregations.isEmpty()) {
+ tryDimensionAsMeasures(unmatchedAggregations, digest, cube, result);
+ }
- // Some cubes are not "perfectly" match, but still save them in case of usage
- if (allowWeekMatch && isOnline && matchDimensions && !matchAggregation && matchJoin) {
- // sometimes metrics are indeed dimensions
- // e.g. select min(cal_dt) from ..., where cal_dt is actually a dimension
- if (isWeaklyMatchedWithAggregations(functions, metricsColumns, cube)) {
- logger.info("Weakly matched cube found " + cube.getName());
- return true;
- }
+ if (!unmatchedDimensions.isEmpty()) {
+ logger.info("Exclude cube " + cube.getName() + " because unmatched dimensions");
+ return result;
}
- if (!isOnline || !matchDimensions || !matchAggregation || !matchJoin) {
- logger.info("Exclude cube " + cube.getName() + " because " + " isOnlne=" + isOnline + ",matchDimensions=" + matchDimensions + ",matchAggregation=" + matchAggregation + ",matchJoin=" + matchJoin);
- return false;
+ if (!unmatchedAggregations.isEmpty()) {
+ logger.info("Exclude cube " + cube.getName() + " because unmatched aggregations");
+ return result;
}
- return true;
+ // cost will be minded by caller
+ result.capable = true;
+ return result;
}
- private static boolean isMatchedWithDimensions(Collection<TblColRef> dimensionColumns, CubeInstance cube) {
+ private static Collection<TblColRef> getDimensionColumns(SQLDigest sqlDigest) {
+ Collection<TblColRef> groupByColumns = sqlDigest.groupbyColumns;
+ Collection<TblColRef> filterColumns = sqlDigest.filterColumns;
+
+ Collection<TblColRef> dimensionColumns = new HashSet<TblColRef>();
+ dimensionColumns.addAll(groupByColumns);
+ dimensionColumns.addAll(filterColumns);
+ return dimensionColumns;
+ }
+
+ private static Set<TblColRef> unmatchedDimensions(Collection<TblColRef> dimensionColumns, CubeInstance cube) {
+ HashSet<TblColRef> result = Sets.newHashSet(dimensionColumns);
CubeDesc cubeDesc = cube.getDescriptor();
- boolean matchAgg = cubeDesc.listDimensionColumnsIncludingDerived().containsAll(dimensionColumns);
- return matchAgg;
+ result.removeAll(cubeDesc.listDimensionColumnsIncludingDerived());
+ return result;
}
- private static boolean isMatchedWithAggregations(Collection<FunctionDesc> aggregations, CubeInstance cube) {
+ private static Set<FunctionDesc> unmatchedAggregations(Collection<FunctionDesc> aggregations, CubeInstance cube) {
+ HashSet<FunctionDesc> result = Sets.newHashSet(aggregations);
CubeDesc cubeDesc = cube.getDescriptor();
- boolean matchAgg = cubeDesc.listAllFunctions().containsAll(aggregations);
- return matchAgg;
+ result.removeAll(cubeDesc.listAllFunctions());
+ return result;
}
- private static boolean isMatchedWithJoins(Collection<JoinDesc> joins, CubeInstance cube) {
+ private static boolean isJoinMatch(Collection<JoinDesc> joins, CubeInstance cube) {
CubeDesc cubeDesc = cube.getDescriptor();
List<JoinDesc> cubeJoins = new ArrayList<JoinDesc>(cubeDesc.getDimensions().size());
@@ -118,30 +147,50 @@ public class CubeCapabilityChecker {
return true;
}
- private static boolean isWeaklyMatchedWithAggregations(Collection<FunctionDesc> aggregations, Collection<TblColRef> metricColumns, CubeInstance cube) {
+ private static void tryDimensionAsMeasures(Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube, CapabilityResult result) {
CubeDesc cubeDesc = cube.getDescriptor();
Collection<FunctionDesc> cubeFuncs = cubeDesc.listAllFunctions();
- boolean matched = true;
- for (FunctionDesc functionDesc : aggregations) {
- if (cubeFuncs.contains(functionDesc))
+ Iterator<FunctionDesc> it = unmatchedAggregations.iterator();
+ while (it.hasNext()) {
+ FunctionDesc functionDesc = it.next();
+
+ if (cubeFuncs.contains(functionDesc)) {
+ it.remove();
continue;
+ }
- // only inverted-index cube does not have count, and let calcite handle in this case
- if (functionDesc.isCount())
+ // let calcite handle count
+ if (functionDesc.isCount()) {
+ it.remove();
continue;
+ }
- if (functionDesc.isCountDistinct()) // calcite can not handle distinct count
- matched = false;
+ // calcite can do aggregation from columns on-the-fly
+ List<TblColRef> neededCols = functionDesc.getParameter().getColRefs();
+ if (neededCols.size() > 0 && cubeDesc.listDimensionColumnsIncludingDerived().containsAll(neededCols)) {
+ result.influences.add(new CapabilityResult.DimensionAsMeasure(functionDesc));
+ it.remove();
+ continue;
+ }
+ }
+ }
- TblColRef col = null;
- if (functionDesc.getParameter().getColRefs().size() > 0)
- col = functionDesc.getParameter().getColRefs().get(0);
+ // custom measure types can cover unmatched dimensions or measures
+ private static void tryCustomMeasureTypes(Collection<TblColRef> unmatchedDimensions, Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube, CapabilityResult result) {
+ CubeDesc cubeDesc = cube.getDescriptor();
+ for (MeasureDesc measure : cubeDesc.getMeasures()) {
+ if (unmatchedDimensions.isEmpty() && unmatchedAggregations.isEmpty())
+ break;
- if (col == null || !cubeDesc.listDimensionColumnsIncludingDerived().contains(col)) {
- matched = false;
- }
+ MeasureType<?> measureType = measure.getFunction().getMeasureType();
+ if (measureType instanceof BasicMeasureType)
+ continue;
+
+ CapabilityInfluence inf = measureType.influenceCapabilityCheck(unmatchedDimensions, unmatchedAggregations, digest, measure);
+ if (inf != null)
+ result.influences.add(inf);
}
- return matched;
}
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
index 81a64e8..7773351 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
@@ -28,6 +28,7 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.CapabilityResult;
import org.apache.kylin.metadata.realization.IRealization;
import org.apache.kylin.metadata.realization.RealizationStatusEnum;
import org.apache.kylin.metadata.realization.RealizationType;
@@ -321,12 +322,20 @@ public class CubeInstance extends RootPersistentEntity implements IRealization {
}
@Override
- public boolean isCapable(SQLDigest digest) {
- return CubeCapabilityChecker.check(this, digest, true);
+ public CapabilityResult isCapable(SQLDigest digest) {
+ CapabilityResult result = CubeCapabilityChecker.check(this, digest);
+ if (result.capable) {
+ result.cost = getCost(digest);
+ for (CapabilityResult.CapabilityInfluence i : result.influences) {
+ result.cost *= (i.suggestCostMultiplier() == 0) ? 1.0 : i.suggestCostMultiplier();
+ }
+ } else {
+ result.cost = -1;
+ }
+ return result;
}
- @Override
- public int getCost(SQLDigest digest) {
+ private int getCost(SQLDigest digest) {
return cost;
}
@@ -353,9 +362,6 @@ public class CubeInstance extends RootPersistentEntity implements IRealization {
if (!this.getDescriptor().getModel().getPartitionDesc().isPartitioned())
return false;
- if (this.getDescriptor().hasHolisticCountDistinctMeasures())
- return false;
-
return this.getDescriptor().getAutoMergeTimeRanges() != null && this.getDescriptor().getAutoMergeTimeRanges().length > 0;
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
index b3eeeef..d3bbf59 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeManager.java
@@ -36,11 +36,11 @@ import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.Serializer;
import org.apache.kylin.common.restclient.Broadcaster;
import org.apache.kylin.common.restclient.CaseInsensitiveStringCache;
+import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.DimensionDesc;
-import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.dict.DictionaryInfo;
import org.apache.kylin.dict.DictionaryManager;
import org.apache.kylin.dict.lookup.HiveTable;
@@ -176,7 +176,7 @@ public class CubeManager implements IRealizationProvider {
/**
* return null if no dictionary for given column
*/
- public Dictionary<?> getDictionary(CubeSegment cubeSeg, TblColRef col) {
+ public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
DictionaryInfo info = null;
try {
DictionaryManager dictMgr = getDictionaryManager();
@@ -193,7 +193,7 @@ public class CubeManager implements IRealizationProvider {
throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e);
}
- return info.getDictionaryObject();
+ return (Dictionary<String>) info.getDictionaryObject();
}
public SnapshotTable buildSnapshotTable(CubeSegment cubeSeg, String lookupTable) throws IOException {
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java b/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
index 5db3251..12f0bf7 100644
--- a/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
+++ b/cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
@@ -24,8 +24,9 @@ import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.ConcurrentHashMap;
+import com.google.common.collect.Maps;
import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.dict.ISegment;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.model.TblColRef;
@@ -261,10 +262,19 @@ public class CubeSegment implements Comparable<CubeSegment>, ISegment {
}
@Override
- public Dictionary<?> getDictionary(TblColRef col) {
+ public Dictionary<String> getDictionary(TblColRef col) {
return CubeManager.getInstance(this.getCubeInstance().getConfig()).getDictionary(this, col);
}
+ public Map<TblColRef, Dictionary<String>> buildDictionaryMap() {
+ Map<TblColRef, Dictionary<String>> result = Maps.newHashMap();
+ for (TblColRef col : getCubeDesc().getAllColumnsNeedDictionary()) {
+ result.put(col, (Dictionary<String>) getDictionary(col));
+ }
+ return result;
+ }
+
+
public void validate() {
if (cubeInstance.getDescriptor().getModel().getPartitionDesc().isPartitioned() && dateRangeStart >= dateRangeEnd)
throw new IllegalStateException("dateRangeStart(" + dateRangeStart + ") must be greater than dateRangeEnd(" + dateRangeEnd + ") in segment " + this);
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java b/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
index 1172d66..188b157 100644
--- a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
+++ b/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java
@@ -34,7 +34,7 @@ import org.apache.kylin.cube.model.DimensionDesc;
import org.apache.kylin.cube.model.HierarchyDesc;
import org.apache.kylin.cube.model.RowKeyColDesc;
import org.apache.kylin.cube.model.RowKeyDesc;
-import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.MeasureDesc;
/**
@@ -140,7 +140,7 @@ public class CubeSizeEstimationCLI {
int space = 0;
for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
DataType returnType = measureDesc.getFunction().getReturnDataType();
- space += returnType.getSpaceEstimate();
+ space += returnType.getStorageBytesEstimate();
}
return space;
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
index c923b33..532950b 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java
@@ -22,7 +22,7 @@ import java.util.Map;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
index 1a84abb..00ecd46 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java
@@ -22,7 +22,7 @@ import java.util.Arrays;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.dict.ISegment;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
index 7a64132..64bc813 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java
@@ -21,7 +21,7 @@ package org.apache.kylin.cube.kv;
import java.util.Collection;
import java.util.Comparator;
-import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.datatype.DataType;
/**
* @author yangli9
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
index f90a88d..5fe4e2e 100644
--- a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
+++ b/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java
@@ -25,11 +25,9 @@ import java.util.Collection;
import java.util.List;
import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.kylin.cube.model.HBaseColumnDesc;
-import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.measure.MeasureCodec;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
@@ -80,14 +78,19 @@ public class RowValueDecoder implements Cloneable {
for (int i = 0; i < mapredObjs.length; i++) {
Object o = mapredObjs[i];
+ // if (o instanceof LongWritable)
+ // o = ((LongWritable) o).get();
+ // else if (o instanceof IntWritable)
+ // o = ((IntWritable) o).get();
+ // else if (o instanceof DoubleWritable)
+ // o = ((DoubleWritable) o).get();
+ // else if (o instanceof FloatWritable)
+ // o = ((FloatWritable) o).get();
+
if (o instanceof LongWritable)
o = ((LongWritable) o).get();
- else if (o instanceof IntWritable)
- o = ((IntWritable) o).get();
else if (o instanceof DoubleWritable)
o = ((DoubleWritable) o).get();
- else if (o instanceof FloatWritable)
- o = ((FloatWritable) o).get();
results[i] = o;
}
@@ -117,22 +120,20 @@ public class RowValueDecoder implements Cloneable {
return measures;
}
- public boolean hasMemHungryCountDistinct() {
+ public boolean hasMemHungryMeasures() {
for (int i = projectionIndex.nextSetBit(0); i >= 0; i = projectionIndex.nextSetBit(i + 1)) {
FunctionDesc func = measures[i].getFunction();
- if (func.isCountDistinct() && !func.isHolisticCountDistinct()) {
+ if (func.getMeasureType().isMemoryHungry())
return true;
- }
}
return false;
}
- public static boolean hasMemHungryCountDistinct(Collection<RowValueDecoder> rowValueDecoders) {
+ public static boolean hasMemHungryMeasures(Collection<RowValueDecoder> rowValueDecoders) {
for (RowValueDecoder decoder : rowValueDecoders) {
- if (decoder.hasMemHungryCountDistinct())
+ if (decoder.hasMemHungryMeasures())
return true;
}
return false;
}
-
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 8fd2c4e..3a9df54 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -34,7 +34,6 @@ import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
import org.apache.commons.net.util.Base64;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
@@ -42,6 +41,7 @@ import org.apache.kylin.common.persistence.RootPersistentEntity;
import org.apache.kylin.common.util.Array;
import org.apache.kylin.common.util.CaseInsensitiveStringMap;
import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.measure.MeasureType;
import org.apache.kylin.metadata.MetadataConstants;
import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.model.ColumnDesc;
@@ -562,7 +562,7 @@ public class CubeDesc extends RootPersistentEntity {
private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) {
if (hostCols.length == 0 || derivedCols.length == 0)
throw new IllegalStateException("host/derived columns must not be empty");
-
+
// Although FK derives PK automatically, user unaware of this can declare PK as derived dimension explicitly.
// In that case, derivedCols[] will contain a FK which is transformed from the PK by initDimensionColRef().
// Must drop FK from derivedCols[] before continue.
@@ -641,25 +641,22 @@ public class CubeDesc extends RootPersistentEntity {
f.setExpression(f.getExpression().toUpperCase());
f.initReturnDataType();
- ParameterDesc p = f.getParameter();
- p.normalizeColumnValue();
+ for (ParameterDesc p = f.getParameter(); p != null; p = p.getNextParameter()) {
+ p.setValue(p.getValue().toUpperCase());
+ }
- if (p.isColumnType()) {
- ArrayList<TblColRef> colRefs = Lists.newArrayList();
- for (String cName : p.getValue().split("\\s*,\\s*")) {
- ColumnDesc sourceColumn = factTable.findColumnByName(cName);
+ ArrayList<TblColRef> colRefs = Lists.newArrayList();
+ for (ParameterDesc p = f.getParameter(); p != null; p = p.getNextParameter()) {
+ if (p.isColumnType()) {
+ ColumnDesc sourceColumn = factTable.findColumnByName(p.getValue());
TblColRef colRef = new TblColRef(sourceColumn);
colRefs.add(colRef);
allColumns.add(colRef);
}
- if (colRefs.isEmpty() == false)
- p.setColRefs(colRefs);
}
- // verify holistic count distinct as a dependent measure
- if (m.isHolisticCountDistinct() && StringUtils.isBlank(m.getDependentMeasureRef())) {
- throw new IllegalStateException(m + " is a holistic count distinct but it has no DependentMeasureRef defined!");
- }
+ f.getParameter().setColRefs(colRefs);
+
}
}
@@ -730,15 +727,6 @@ public class CubeDesc extends RootPersistentEntity {
}
}
- public boolean hasHolisticCountDistinctMeasures() {
- for (MeasureDesc measure : measures) {
- if (measure.getFunction().isHolisticCountDistinct()) {
- return true;
- }
- }
- return false;
- }
-
/**
* Add error info and thrown exception out
*
@@ -794,4 +782,31 @@ public class CubeDesc extends RootPersistentEntity {
this.autoMergeTimeRanges = autoMergeTimeRanges;
}
+ public List<TblColRef> getAllColumnsNeedDictionary() {
+ List<TblColRef> result = Lists.newArrayList();
+
+ for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) {
+ TblColRef colRef = rowKeyColDesc.getColRef();
+ if (rowkey.isUseDictionary(colRef)) {
+ result.add(colRef);
+ }
+ }
+
+ for (MeasureDesc measure : measures) {
+ MeasureType<?> aggrType = measure.getFunction().getMeasureType();
+ result.addAll(aggrType.getColumnsNeedDictionary(measure.getFunction()));
+ }
+ return result;
+ }
+
+ public boolean hasMemoryHungryMeasures() {
+ for (MeasureDesc measure : measures) {
+ if (measure.getFunction().getMeasureType().isMemoryHungry()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java b/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
index b11056a..41d75c2 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/v1/CubeDesc.java
@@ -718,7 +718,6 @@ public class CubeDesc extends RootPersistentEntity {
f.initReturnDataType();
ParameterDesc p = f.getParameter();
- p.normalizeColumnValue();
if (p.isColumnType()) {
ArrayList<TblColRef> colRefs = Lists.newArrayList();
@@ -801,14 +800,6 @@ public class CubeDesc extends RootPersistentEntity {
}
}
- public boolean hasHolisticCountDistinctMeasures() {
- for (MeasureDesc measure : measures) {
- if (measure.getFunction().isHolisticCountDistinct()) {
- return true;
- }
- }
- return false;
- }
/**
* Add error info and thrown exception out
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
index a271ab5..d7d9f13 100644
--- a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
+++ b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java
@@ -25,14 +25,12 @@ import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
-import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.validation.IValidatorRule;
import org.apache.kylin.cube.model.validation.ResultLevel;
import org.apache.kylin.cube.model.validation.ValidateContext;
import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.model.ColumnDesc;
-import org.apache.kylin.metadata.model.DataType;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.ParameterDesc;
@@ -94,7 +92,11 @@ public class FunctionRule implements IValidatorRule<CubeDesc> {
} else if (StringUtils.equals(FunctionDesc.PARAMTER_TYPE_CONSTANT, type)) {
validateCostantParameter(context, cube, value);
}
- validateReturnType(context, cube, func);
+ try {
+ func.getMeasureType().validate(func);
+ } catch (IllegalArgumentException ex) {
+ context.addResult(ResultLevel.ERROR, ex.getMessage());
+ }
if (func.isCount())
countFuncs.add(func);
@@ -105,31 +107,6 @@ public class FunctionRule implements IValidatorRule<CubeDesc> {
}
}
- private void validateReturnType(ValidateContext context, CubeDesc cube, FunctionDesc funcDesc) {
-
- String func = funcDesc.getExpression();
- DataType rtype = funcDesc.getReturnDataType();
-
- if (funcDesc.isCount()) {
- if (rtype.isIntegerFamily() == false) {
- context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be one of " + DataType.INTEGER_FAMILY);
- }
- } else if (funcDesc.isCountDistinct()) {
- if (rtype.isHLLC() == false && funcDesc.isHolisticCountDistinct() == false) {
- context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be hllc(10), hllc(12) etc.");
- }
- } else if (funcDesc.isMax() || funcDesc.isMin() || funcDesc.isSum()) {
- if (rtype.isNumberFamily() == false) {
- context.addResult(ResultLevel.ERROR, "Return type for function " + func + " must be one of " + DataType.NUMBER_FAMILY);
- }
- } else {
- if (StringUtils.equalsIgnoreCase(KylinConfig.getInstanceFromEnv().getProperty(KEY_IGNORE_UNKNOWN_FUNC, "false"), "false")) {
- context.addResult(ResultLevel.ERROR, "Unrecognized function: [" + func + "]");
- }
- }
-
- }
-
/**
* @param context
* @param cube
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
index a076938..f093032 100644
--- a/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
+++ b/cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
@@ -26,7 +26,7 @@ import java.util.HashSet;
import org.apache.kylin.common.util.JsonUtil;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.dict.DictionaryInfo;
import org.apache.kylin.dict.DictionaryManager;
import org.apache.kylin.metadata.model.TblColRef;
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java b/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
index b4aa238..a00afef 100644
--- a/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
+++ b/cube/src/test/java/org/apache/kylin/cube/kv/RowValueDecoderTest.java
@@ -30,8 +30,8 @@ import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.HBaseColumnDesc;
+import org.apache.kylin.measure.MeasureCodec;
import org.apache.kylin.metadata.MetadataManager;
-import org.apache.kylin.metadata.measure.MeasureCodec;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.junit.After;
@@ -63,7 +63,7 @@ public class RowValueDecoderTest extends LocalFileMetadataTestCase {
LongWritable count = new LongWritable(2);
LongWritable item_count = new LongWritable(99999);
ByteBuffer buf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
- codec.encode(new Object[] { sum, min, max, count,item_count }, buf);
+ codec.encode(new Object[] { sum, min, max, count, item_count }, buf);
buf.flip();
byte[] valueBytes = new byte[buf.limit()];
@@ -81,7 +81,7 @@ public class RowValueDecoderTest extends LocalFileMetadataTestCase {
Object[] measureValues = rowValueDecoder.getValues();
assertEquals("[PRICE, MIN_PRICE_, MAX_PRICE_, COUNT__, ITEM_COUNT]", measureNames.toString());
- assertEquals("[333.1234567, 333.1111111, 333.1999999, 2, 99999]", Arrays.toString(measureValues));
+ assertEquals("[333.1235, 333.1111, 333.2000, 2, 99999]", Arrays.toString(measureValues));
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
----------------------------------------------------------------------
diff --git a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
index 3ed6094..0870914 100644
--- a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
+++ b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
import org.apache.kylin.cube.kv.RowConstants;
+import org.apache.kylin.measure.MeasureCodec;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.junit.Test;
@@ -45,7 +46,7 @@ public class MeasureCodecTest {
DoubleWritable d = new DoubleWritable(1.0);
LongWritable l = new LongWritable(2);
- BigDecimal b = new BigDecimal("333.1234567");
+ BigDecimal b = new BigDecimal("333.1234");
HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(16);
hllc.add("1234567");
hllc.add("abcdefg");
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST1_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST1_desc.json b/cube/src/test/resources/data/TEST1_desc.json
index 082a3cf..0373496 100644
--- a/cube/src/test/resources/data/TEST1_desc.json
+++ b/cube/src/test/resources/data/TEST1_desc.json
@@ -127,7 +127,12 @@
"expression" : "COUNT_DISTINCT",
"parameter" : {
"type" : "column",
- "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+ "value" : "LSTG_FORMAT_NAME",
+ "next_parameter" : {
+ "type" : "column",
+ "value" : "SELLER_ID",
+ "next_parameter" : null
+ }
},
"returntype" : "hllc10"
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST2_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST2_desc.json b/cube/src/test/resources/data/TEST2_desc.json
index 90bd806..de6bfe0 100644
--- a/cube/src/test/resources/data/TEST2_desc.json
+++ b/cube/src/test/resources/data/TEST2_desc.json
@@ -112,7 +112,12 @@
"expression" : "COUNT_DISTINCT",
"parameter" : {
"type" : "column",
- "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+ "value" : "LSTG_FORMAT_NAME",
+ "next_parameter" : {
+ "type" : "column",
+ "value" : "SELLER_ID",
+ "next_parameter" : null
+ }
},
"returntype" : "hllc10"
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/cube/src/test/resources/data/TEST3_desc.json
----------------------------------------------------------------------
diff --git a/cube/src/test/resources/data/TEST3_desc.json b/cube/src/test/resources/data/TEST3_desc.json
index 4b0836b..54f0cf9 100644
--- a/cube/src/test/resources/data/TEST3_desc.json
+++ b/cube/src/test/resources/data/TEST3_desc.json
@@ -1,182 +1,268 @@
{
- "uuid" : "9c53506d-d7b9-4ad4-b3b1-53ea42673c4b",
- "last_modified" : 1401429176099,
- "name" : "TEST1_desc",
+ "uuid": "9c53506d-d7b9-4ad4-b3b1-53ea42673c4b",
+ "last_modified": 1401429176099,
+ "name": "TEST1_desc",
"model_name": "TEST1_model_desc",
- "dimensions" : [ {
- "id" : 1,
- "name" : "CAL_DT",
- "join" : {
- "type" : "inner",
- "primary_key" : [ "CAL_DT" ],
- "foreign_key" : [ "CAL_DT" ]
+ "dimensions": [
+ {
+ "id": 1,
+ "name": "CAL_DT",
+ "join": {
+ "type": "inner",
+ "primary_key": [
+ "CAL_DT"
+ ],
+ "foreign_key": [
+ "CAL_DT"
+ ]
+ },
+ "hierarchy": null,
+ "table": "TEST_CAL_DT",
+ "column": [
+ "TEST_CAL_DT.CAL_DT"
+ ],
+ "datatype": "date",
+ "derived": [
+ "WEEK_BEG_DT"
+ ]
},
- "hierarchy" : null,
- "table" : "TEST_CAL_DT",
- "column" : ["TEST_CAL_DT.CAL_DT"],
- "datatype" : "date",
- "derived" : [ "WEEK_BEG_DT" ]
- }, {
- "id" : 2,
- "name" : "CATEGORY",
- "join" : {
- "type" : "inner",
- "primary_key" : [ "LEAF_CATEG_ID", "SITE_ID" ],
- "foreign_key" : [ "LEAF_CATEG_ID", "LSTG_SITE_ID" ]
+ {
+ "id": 2,
+ "name": "CATEGORY",
+ "join": {
+ "type": "inner",
+ "primary_key": [
+ "LEAF_CATEG_ID",
+ "SITE_ID"
+ ],
+ "foreign_key": [
+ "LEAF_CATEG_ID",
+ "LSTG_SITE_ID"
+ ]
+ },
+ "hierarchy": true,
+ "table": "TEST_CATEGORY_GROUPINGS",
+ "column": [
+ "TEST_CATEGORY_GROUPINGS.META_CATEG_NAME",
+ "TEST_CATEGORY_GROUPINGS.CATEG_LVL2_NAME",
+ "TEST_CATEGORY_GROUPINGS.CATEG_LVL3_NAME"
+ ],
+ "derived": null
},
- "hierarchy" : true,
- "table" : "TEST_CATEGORY_GROUPINGS",
- "column" : ["TEST_CATEGORY_GROUPINGS.META_CATEG_NAME", "TEST_CATEGORY_GROUPINGS.CATEG_LVL2_NAME", "TEST_CATEGORY_GROUPINGS.CATEG_LVL3_NAME"],
- "derived" : null
- }, {
- "id" : 3,
- "name" : "LSTG_FORMAT_NAME",
- "join" : null,
- "hierarchy" : null,
- "table" : "TEST_KYLIN_FACT",
- "column" : ["TEST_KYLIN_FACT.LSTG_FORMAT_NAME"],
- "datatype" : "string",
- "derived" : null
- }, {
- "id" : 4,
- "name" : "SITE_ID",
- "join" : {
- "type" : "inner",
- "primary_key" : [ "SITE_ID" ],
- "foreign_key" : [ "LSTG_SITE_ID" ]
+ {
+ "id": 3,
+ "name": "LSTG_FORMAT_NAME",
+ "join": null,
+ "hierarchy": null,
+ "table": "TEST_KYLIN_FACT",
+ "column": [
+ "TEST_KYLIN_FACT.LSTG_FORMAT_NAME"
+ ],
+ "datatype": "string",
+ "derived": null
},
- "hierarchy" : null,
- "table" : "TEST_SITES",
- "column" : ["TEST_SITES.SITE_ID"],
- "datatype" : "string",
- "derived" : [ "SITE_NAME", "CRE_USER" ]
- }, {
- "id" : 5,
- "name" : "SELLER_TYPE_CD",
- "join" : {
- "type" : "inner",
- "primary_key" : [ "SELLER_TYPE_CD" ],
- "foreign_key" : [ "SLR_SEGMENT_CD" ]
+ {
+ "id": 4,
+ "name": "SITE_ID",
+ "join": {
+ "type": "inner",
+ "primary_key": [
+ "SITE_ID"
+ ],
+ "foreign_key": [
+ "LSTG_SITE_ID"
+ ]
+ },
+ "hierarchy": null,
+ "table": "TEST_SITES",
+ "column": [
+ "TEST_SITES.SITE_ID"
+ ],
+ "datatype": "string",
+ "derived": [
+ "SITE_NAME",
+ "CRE_USER"
+ ]
},
- "hierarchy" : null,
- "table" : "TEST_SELLER_TYPE_DIM",
- "column" : ["TEST_SELLER_TYPE_DIM.SELLER_TYPE_CD"],
- "datatype" : "string",
- "derived" : [ "SELLER_TYPE_DESC" ]
- } ],
- "measures" : [ {
- "id" : 1,
- "name" : "GMV_SUM",
- "function" : {
- "expression" : "SUM",
- "parameter" : {
- "type" : "column",
- "value" : "PRICE"
+ {
+ "id": 5,
+ "name": "SELLER_TYPE_CD",
+ "join": {
+ "type": "inner",
+ "primary_key": [
+ "SELLER_TYPE_CD"
+ ],
+ "foreign_key": [
+ "SLR_SEGMENT_CD"
+ ]
},
- "returntype" : "decimal"
+ "hierarchy": null,
+ "table": "TEST_SELLER_TYPE_DIM",
+ "column": [
+ "TEST_SELLER_TYPE_DIM.SELLER_TYPE_CD"
+ ],
+ "datatype": "string",
+ "derived": [
+ "SELLER_TYPE_DESC"
+ ]
}
- }, {
- "id" : 2,
- "name" : "GMV_MIN",
- "function" : {
- "expression" : "MIN",
- "parameter" : {
- "type" : "column",
- "value" : "PRICE"
- },
- "returntype" : "decimal"
+ ],
+ "measures": [
+ {
+ "id": 1,
+ "name": "GMV_SUM",
+ "function": {
+ "expression": "SUM",
+ "parameter": {
+ "type": "column",
+ "value": "PRICE"
+ },
+ "returntype": "decimal"
+ }
+ },
+ {
+ "id": 2,
+ "name": "GMV_MIN",
+ "function": {
+ "expression": "MIN",
+ "parameter": {
+ "type": "column",
+ "value": "PRICE"
+ },
+ "returntype": "decimal"
+ }
+ },
+ {
+ "id": 3,
+ "name": "GMV_MAX",
+ "function": {
+ "expression": "MAX",
+ "parameter": {
+ "type": "column",
+ "value": "PRICE"
+ },
+ "returntype": "decimal"
+ }
+ },
+ {
+ "id": 4,
+ "name": "TRANS_CNT",
+ "function": {
+ "expression": "COUNT",
+ "parameter": {
+ "type": "constant",
+ "value": "1"
+ },
+ "returntype": "long"
+ }
+ },
+ {
+ "id": 5,
+ "name": "SELLER_CNT",
+ "function": {
+ "expression": "COUNT_DISTINCT",
+ "parameter": {
+ "type": "column",
+ "value": "SELLER_ID"
+ },
+ "returntype": "hllc10"
+ }
+ },
+ {
+ "id": 6,
+ "name": "SELLER_FORMAT_CNT",
+ "function": {
+ "expression": "COUNT_DISTINCT",
+ "parameter": {
+ "type": "column",
+ "value": "LSTG_FORMAT_NAME",
+ "next_parameter": {
+ "type": "column",
+ "value": "SELLER_ID",
+ "next_parameter": null
+ }
+ },
+ "returntype": "hllc10"
+ }
}
- }, {
- "id" : 3,
- "name" : "GMV_MAX",
- "function" : {
- "expression" : "MAX",
- "parameter" : {
- "type" : "column",
- "value" : "PRICE"
+ ],
+ "rowkey": {
+ "rowkey_columns": [
+ {
+ "column": "CAL_DT",
+ "length": 10,
+ "dictionary": "date(yyyy-mm-dd)",
+ "mandatory": false
},
- "returntype" : "decimal"
- }
- }, {
- "id" : 4,
- "name" : "TRANS_CNT",
- "function" : {
- "expression" : "COUNT",
- "parameter" : {
- "type" : "constant",
- "value" : "1"
+ {
+ "column": "META_CATEG_NAME",
+ "length": 0,
+ "dictionary": "string",
+ "mandatory": false
},
- "returntype" : "long"
- }
- }, {
- "id" : 5,
- "name" : "SELLER_CNT",
- "function" : {
- "expression" : "COUNT_DISTINCT",
- "parameter" : {
- "type" : "column",
- "value" : "SELLER_ID"
+ {
+ "column": "CATEG_LVL2_NAME",
+ "length": 0,
+ "dictionary": "string",
+ "mandatory": false
},
- "returntype" : "hllc10"
- }
- }, {
- "id" : 6,
- "name" : "SELLER_FORMAT_CNT",
- "function" : {
- "expression" : "COUNT_DISTINCT",
- "parameter" : {
- "type" : "column",
- "value" : "LSTG_FORMAT_NAME,SELLER_ID"
+ {
+ "column": "CATEG_LVL3_NAME",
+ "length": 0,
+ "dictionary": "string",
+ "mandatory": false
},
- "returntype" : "hllc10"
- }
- } ],
- "rowkey" : {
- "rowkey_columns" : [ {
- "column" : "CAL_DT",
- "length" : 10,
- "dictionary" : "date(yyyy-mm-dd)",
- "mandatory" : false
- }, {
- "column" : "META_CATEG_NAME",
- "length" : 0,
- "dictionary" : "string",
- "mandatory" : false
- }, {
- "column" : "CATEG_LVL2_NAME",
- "length" : 0,
- "dictionary" : "string",
- "mandatory" : false
- }, {
- "column" : "CATEG_LVL3_NAME",
- "length" : 0,
- "dictionary" : "string",
- "mandatory" : false
- }, {
- "column" : "LSTG_FORMAT_NAME",
- "length" : 12,
- "dictionary" : null,
- "mandatory" : false
- }, {
- "column" : "SITE_ID",
- "length" : 0,
- "dictionary" : "string",
- "mandatory" : false
- }, {
- "column" : "SELLER_TYPE_CD",
- "length" : 0,
- "dictionary" : "string",
- "mandatory" : false
- } ],
- "aggregation_groups" : [ [ "META_CATEG_NAME", "CATEG_LVL3_NAME", "CATEG_LVL2_NAME", "CAL_DT" ], [ "LSTG_FORMAT_NAME", "SITE_ID", "SELLER_TYPE_CD" ] ]
+ {
+ "column": "LSTG_FORMAT_NAME",
+ "length": 12,
+ "dictionary": null,
+ "mandatory": false
+ },
+ {
+ "column": "SITE_ID",
+ "length": 0,
+ "dictionary": "string",
+ "mandatory": false
+ },
+ {
+ "column": "SELLER_TYPE_CD",
+ "length": 0,
+ "dictionary": "string",
+ "mandatory": false
+ }
+ ],
+ "aggregation_groups": [
+ [
+ "META_CATEG_NAME",
+ "CATEG_LVL3_NAME",
+ "CATEG_LVL2_NAME",
+ "CAL_DT"
+ ],
+ [
+ "LSTG_FORMAT_NAME",
+ "SITE_ID",
+ "SELLER_TYPE_CD"
+ ]
+ ]
},
- "hbase_mapping" : {
- "column_family" : [ {
- "name" : "F1",
- "columns" : [ {
- "qualifier" : "M",
- "measure_refs" : [ "GMV_SUM", "GMV_MIN", "GMV_MAX", "TRANS_CNT", "SELLER_CNT", "SELLER_FORMAT_CNT" ]
- } ]
- } ]
+ "hbase_mapping": {
+ "column_family": [
+ {
+ "name": "F1",
+ "columns": [
+ {
+ "qualifier": "M",
+ "measure_refs": [
+ "GMV_SUM",
+ "GMV_MIN",
+ "GMV_MAX",
+ "TRANS_CNT",
+ "SELLER_CNT",
+ "SELLER_FORMAT_CNT"
+ ]
+ }
+ ]
+ }
+ ]
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 5702be9..14086c6 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -27,6 +27,7 @@ import java.util.Date;
import org.apache.commons.lang.StringUtils;
import org.apache.kylin.common.util.DateFormat;
+import org.apache.kylin.common.util.Dictionary;
/**
* A dictionary for date string (date only, no time).
http://git-wip-us.apache.org/repos/asf/kylin/blob/c721d679/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
deleted file mode 100644
index e99a553..0000000
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.kylin.dict;
-
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.kylin.common.util.BytesUtil;
-
-/**
- * A bi-way dictionary that maps from dimension/column values to IDs and vice
- * versa. By storing IDs instead of real values, the size of cube is
- * significantly reduced.
- *
- * - IDs are smallest integers possible for the cardinality of a column, for the
- * purpose of minimal storage space - IDs preserve ordering of values, such that
- * range query can be applied to IDs directly
- *
- * A dictionary once built, is immutable. This allows optimal memory footprint
- * by e.g. flatten the Trie structure into a byte array, replacing node pointers
- * with array offsets.
- *
- * @author yangli9
- */
-abstract public class Dictionary<T> implements Writable {
-
- public static final byte NULL = (byte) 0xff;
-
- // ID with all bit-1 (0xff e.g.) reserved for NULL value
- public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffff };
-
- abstract public int getMinId();
-
- abstract public int getMaxId();
-
- public int getSize() {
- return getMaxId() - getMinId() + 1;
- }
-
- /**
- * @return the size of an ID in bytes, determined by the cardinality of
- * column
- */
- abstract public int getSizeOfId();
-
- /**
- * @return the (maximum) size of value in bytes, determined by the longest
- * value of column
- */
- abstract public int getSizeOfValue();
-
- /**
- * Convenient form of <code>getIdFromValue(value, 0)</code>
- */
- final public int getIdFromValue(T value) throws IllegalArgumentException {
- return getIdFromValue(value, 0);
- }
-
- /**
- * Returns the ID integer of given value. In case of not found
- * <p>
- * - if roundingFlag=0, throw IllegalArgumentException; <br>
- * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
- * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
- * <p>
- * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
- *
- * @throws IllegalArgumentException
- * if value is not found in dictionary and rounding is off or
- * failed
- */
- final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
- if (isNullObjectForm(value))
- return nullId();
- else
- return getIdFromValueImpl(value, roundingFlag);
- }
-
- protected boolean isNullObjectForm(T value) {
- return value == null;
- }
-
- abstract protected int getIdFromValueImpl(T value, int roundingFlag);
-
- /**
- * @return the value corresponds to the given ID
- * @throws IllegalArgumentException
- * if ID is not found in dictionary
- */
- final public T getValueFromId(int id) {
- if (isNullId(id))
- return null;
- else
- return getValueFromIdImpl(id);
- }
-
- abstract protected T getValueFromIdImpl(int id);
-
- /**
- * Convenient form of
- * <code>getIdFromValueBytes(value, offset, len, 0)</code>
- */
- final public int getIdFromValueBytes(byte[] value, int offset, int len) {
- return getIdFromValueBytes(value, offset, len, 0);
- }
-
- /**
- * A lower level API, return ID integer from raw value bytes. In case of not found
- * <p>
- * - if roundingFlag=0, throw IllegalArgumentException; <br>
- * - if roundingFlag<0, the closest smaller ID integer if exist; <br>
- * - if roundingFlag>0, the closest bigger ID integer if exist. <br>
- * <p>
- * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
- *
- * @throws IllegalArgumentException
- * if value is not found in dictionary and rounding is off or failed
- */
- final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
- if (isNullByteForm(value, offset, len))
- return nullId();
- else
- return getIdFromValueBytesImpl(value, offset, len, roundingFlag);
- }
-
- protected boolean isNullByteForm(byte[] value, int offset, int len) {
- return value == null;
- }
-
- abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag);
-
- /**
- * A lower level API, get byte values from ID, return the number of bytes
- * written. Bypassing the cache layer, this could be significantly slower
- * than getIdFromValue(T value).
- *
- * @return size of value bytes, 0 if empty string, -1 if null
- *
- * @throws IllegalArgumentException
- * if ID is not found in dictionary
- */
- final public int getValueBytesFromId(int id, byte[] returnValue, int offset) {
- if (isNullId(id))
- return -1;
- else
- return getValueBytesFromIdImpl(id, returnValue, offset);
- }
-
- abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset);
-
- abstract public void dump(PrintStream out);
-
- public int nullId() {
- return NULL_ID[getSizeOfId()];
- }
-
- public boolean isNullId(int id) {
- int nullId = NULL_ID[getSizeOfId()];
- return (nullId & id) == nullId;
- }
-
- /** utility that converts a dictionary ID to string, preserving order */
- public static String dictIdToString(byte[] idBytes, int offset, int length) {
- try {
- return new String(idBytes, offset, length, "ISO-8859-1");
- } catch (UnsupportedEncodingException e) {
- // never happen
- return null;
- }
- }
-
- /** the reverse of dictIdToString(), returns integer ID */
- public static int stringToDictId(String str) {
- try {
- byte[] bytes = str.getBytes("ISO-8859-1");
- return BytesUtil.readUnsigned(bytes, 0, bytes.length);
- } catch (UnsupportedEncodingException e) {
- // never happen
- return 0;
- }
- }
-}