You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/06/05 01:13:03 UTC
[6/7] kylin git commit: KYLIN-1379 More stable and functional precise
count distinct implements after KYLIN-1186
KYLIN-1379 More stable and functional precise count distinct implements after KYLIN-1186
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5198b877
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5198b877
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5198b877
Branch: refs/heads/KYLIN-1705-CI
Commit: 5198b877e7cbedd361756dd9cd5addbef46d1ed8
Parents: 59aaeb3
Author: sunyerui <su...@gmail.com>
Authored: Sat May 28 17:50:54 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Sun Jun 5 08:25:49 2016 +0800
----------------------------------------------------------------------
.../validation/rule/DictionaryRuleTest.java | 8 +-
.../kylin/dict/AppendTrieDictionaryTest.java | 18 +
.../kylin/measure/bitmap/BitmapCounter.java | 5 +
.../kylin/measure/bitmap/BitmapMeasureType.java | 82 +-
...t_kylin_cube_without_slr_left_join_desc.json | 22 +-
.../flatten_data_for_without_slr_left_join.csv | 804 +++++++++----------
.../apache/kylin/query/ITKylinQueryTest.java | 2 +-
.../query/sql_distinct_precisely/query03.sql | 1 +
.../query/sql_distinct_precisely/query04.sql | 1 +
.../coprocessor/endpoint/CubeVisitService.java | 6 +-
10 files changed, 508 insertions(+), 441 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
index ba58d40..99ca7cf 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
@@ -71,16 +71,16 @@ public class DictionaryRuleTest extends LocalFileMetadataTestCase {
@Test
public void testBadDesc() throws IOException {
- testBadDictionaryDesc("Column DEFAULT.TEST_KYLIN_FACT.SELLER_ID has inconsistent builders " +
+ testBadDictionaryDesc("Column EDW.TEST_SITES.SITE_NAME has inconsistent builders " +
"FakeBuilderClass and org.apache.kylin.dict.GlobalDictionaryBuilder",
- DictionaryDesc.create("SELLER_ID", null, "FakeBuilderClass"));
+ DictionaryDesc.create("SITE_NAME", null, "FakeBuilderClass"));
}
@Test
public void testBadDesc2() throws IOException {
- testBadDictionaryDesc("Column DEFAULT.TEST_KYLIN_FACT.SELLER_ID has inconsistent builders " +
+ testBadDictionaryDesc("Column EDW.TEST_SITES.SITE_NAME has inconsistent builders " +
"FakeBuilderClass and org.apache.kylin.dict.GlobalDictionaryBuilder",
- DictionaryDesc.create("lstg_site_id", "SELLER_ID", "FakeBuilderClass"));
+ DictionaryDesc.create("lstg_site_id", "SITE_NAME", "FakeBuilderClass"));
}
@Test
http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
index 10bbb77..e7603ec 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
package org.apache.kylin.dict;
import org.apache.hadoop.conf.Configuration;
http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
index 6c90275..bbf812a 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
@@ -22,6 +22,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
import java.io.*;
import java.nio.ByteBuffer;
+import java.util.Iterator;
/**
* Created by sunyerui on 15/12/1.
@@ -75,6 +76,10 @@ public class BitmapCounter implements Comparable<BitmapCounter> {
return this.bitmap.getSizeInBytes();
}
+ public Iterator<Integer> iterator() {
+ return bitmap.iterator();
+ }
+
public void writeRegisters(ByteBuffer out) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
index 1b0edbf..3b0fe29 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
@@ -18,6 +18,8 @@
package org.apache.kylin.measure.bitmap;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.measure.MeasureAggregator;
import org.apache.kylin.measure.MeasureIngester;
@@ -30,6 +32,7 @@ import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -76,11 +79,6 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
if (DATATYPE_BITMAP.equals(functionDesc.getReturnDataType().getName()) == false)
throw new IllegalArgumentException("BitmapMeasureType datatype is not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName());
-
- List<TblColRef> colRefs = functionDesc.getParameter().getColRefs();
- if (colRefs.size() != 1 && colRefs.size() != 2) {
- throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters, but has " + colRefs.size());
- }
}
@Override
@@ -95,25 +93,48 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
@Override
public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
- List<TblColRef> literalCols = measureDesc.getFunction().getParameter().getColRefs();
- TblColRef literalCol = null;
- if (literalCols.size() == 1) {
- literalCol = literalCols.get(0);
- } else if (literalCols.size() == 2) {
- literalCol = literalCols.get(1);
- } else {
- throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters");
- }
- Dictionary<String> dictionary = dictionaryMap.get(literalCol);
BitmapCounter bitmap = current;
bitmap.clear();
- // bitmap measure may have two values due to two parameters, only the first value should be ingested
- if (values != null && values.length > 0 && values[0] != null) {
- int id = dictionary.getIdFromValue(values[0]);
- bitmap.add(id);
+ if (needDictionaryColumn(measureDesc.getFunction())) {
+ TblColRef literalCol = measureDesc.getFunction().getParameter().getColRefs().get(0);
+ Dictionary<String> dictionary = dictionaryMap.get(literalCol);
+ if (values != null && values.length > 0 && values[0] != null) {
+ int id = dictionary.getIdFromValue(values[0]);
+ bitmap.add(id);
+ }
+ } else {
+ for (String value : values) {
+ bitmap.add(value);
+ }
}
return bitmap;
}
+
+ @Override
+ public BitmapCounter reEncodeDictionary(BitmapCounter value, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> oldDicts, Map<TblColRef, Dictionary<String>> newDicts) {
+ if (!needDictionaryColumn(measureDesc.getFunction())) {
+ return value;
+ }
+ TblColRef colRef = measureDesc.getFunction().getParameter().getColRefs().get(0);
+ Dictionary<String> sourceDict = oldDicts.get(colRef);
+ Dictionary<String> mergedDict = newDicts.get(colRef);
+
+ BitmapCounter retValue = new BitmapCounter();
+ byte[] literal = new byte[sourceDict.getSizeOfValue()];
+ Iterator<Integer> iterator = value.iterator();
+ while (iterator.hasNext()) {
+ int id = iterator.next();
+ int newId;
+ int size = sourceDict.getValueBytesFromId(id, literal, 0);
+ if (size < 0) {
+ newId = mergedDict.nullId();
+ } else {
+ newId = mergedDict.getIdFromValueBytes(literal, 0, size);
+ }
+ retValue.add(newId);
+ }
+ return retValue;
+ }
};
}
@@ -122,21 +143,12 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
return new BitmapAggregator();
}
- /**
- * generate dict with first col by default, and with second col if specified
- *
- * Typical case: we have col uuid, and another col flag_uuid (if flag==1, uuid, null),
- * the metrics count(distinct uuid) and count(distinct flag_uuid) should both generate dict with uuid, instead of uuid and flag_uuid
- */
@Override
public List<TblColRef> getColumnsNeedDictionary(FunctionDesc functionDesc) {
- List<TblColRef> literalCols = functionDesc.getParameter().getColRefs();
- if (literalCols.size() == 1) {
- return Collections.singletonList(literalCols.get(0));
- } else if (literalCols.size() == 2) {
- return Collections.singletonList(literalCols.get(1));
+ if (needDictionaryColumn(functionDesc)) {
+ return Collections.singletonList(functionDesc.getParameter().getColRefs().get(0));
} else {
- throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters");
+ return Collections.emptyList();
}
}
@@ -150,4 +162,12 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
return BitmapDistinctCountAggFunc.class;
}
+ // In order to keep compatibility with old version, tinyint/smallint/int column use value directly, without dictionary
+ private boolean needDictionaryColumn(FunctionDesc functionDesc) {
+ DataType dataType = functionDesc.getParameter().getColRefs().get(0).getType();
+ if (dataType.isIntegerFamily() && !dataType.isBigInt()) {
+ return false;
+ }
+ return true;
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 3361a3d..a501bd1 100644
--- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -112,7 +112,19 @@
"value" : "SELLER_ID",
"next_parameter" : null
},
- "returntype" : "hllc(10)"
+ "returntype" : "bitmap"
+ },
+ "dependent_measure_ref" : null
+ }, {
+ "name" : "SITE_NAME_BITMAP",
+ "function" : {
+ "expression" : "COUNT_DISTINCT",
+ "parameter" : {
+ "type" : "column",
+ "value" : "SITE_NAME",
+ "next_parameter" : null
+ },
+ "returntype" : "bitmap"
},
"dependent_measure_ref" : null
}, {
@@ -196,6 +208,12 @@
},
"dependent_measure_ref" : null
} ],
+ "dictionaries" : [
+ {
+ "column" : "SITE_NAME",
+ "builder": "org.apache.kylin.dict.GlobalDictionaryBuilder"
+ }
+ ],
"rowkey" : {
"rowkey_columns" : [ {
"column" : "cal_dt",
@@ -238,7 +256,7 @@
"name" : "f2",
"columns" : [ {
"qualifier" : "m",
- "measure_refs" : [ "seller_cnt_bitmap", "seller_format_cnt"]
+ "measure_refs" : [ "seller_cnt_bitmap", "site_name_bitmap", "seller_format_cnt"]
} ]
}, {
"name" : "f3",