You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/06/04 01:15:49 UTC
[kylin] branch master updated: KYLIN-3161 Enforce global dictionary
for bitmap count distinct column(as data type is not int)
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push:
new e3dcafb KYLIN-3161 Enforce global dictionary for bitmap count distinct column(as data type is not int)
e3dcafb is described below
commit e3dcafb097d04f50f22994dc722d8bc0736dccbb
Author: Chao Long <wa...@qq.com>
AuthorDate: Wed May 30 14:45:23 2018 +0800
KYLIN-3161 Enforce global dictionary for bitmap count distinct column(as data type is not int)
---
.../cube/model/validation/rule/DictionaryRule.java | 34 +++++++++++++++++++---
.../model/validation/rule/DictionaryRuleTest.java | 16 ++++++++++
.../localmeta/cube_desc/ci_inner_join_cube.json | 2 +-
...test_kylin_cube_without_slr_left_join_desc.json | 13 ++++++++-
4 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
index df1316d..8f73ffb 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
@@ -19,8 +19,10 @@
package org.apache.kylin.cube.model.validation.rule;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
@@ -31,6 +33,8 @@ import org.apache.kylin.cube.model.validation.IValidatorRule;
import org.apache.kylin.cube.model.validation.ResultLevel;
import org.apache.kylin.cube.model.validation.ValidateContext;
import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.measure.bitmap.BitmapMeasureType;
+import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
/**
@@ -48,6 +52,8 @@ public class DictionaryRule implements IValidatorRule<CubeDesc> {
static final String ERROR_REUSE_BUILDER_BOTH_EMPTY = "REUSE and BUILDER both empty on dictionary for column: ";
static final String ERROR_TRANSITIVE_REUSE = "Transitive REUSE is not allowed for dictionary: ";
static final String ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE = "If one column is used for both dimension and precisely count distinct measure, its dimension encoding should not be dict: ";
+ static final String ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE = "For bitmap based count distinct column (as the data type is not int), a Global dictionary is required: ";
+ static final String ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE = "If one bitmap based count distinct column (as the data type is not int) REUSE another column, a Global dictionary is required: ";
@Override
public void validate(CubeDesc cubeDesc, ValidateContext context) {
@@ -60,8 +66,13 @@ public class DictionaryRule implements IValidatorRule<CubeDesc> {
}
Set<TblColRef> allDictCols = new HashSet<>();
- Set<TblColRef> baseCols = new HashSet<>(); // col with builder
+ Map<TblColRef, DictionaryDesc> baseCols = new HashMap<>(); // col with builder
List<DictionaryDesc> reuseDictionaries = new ArrayList<>();
+ Map<TblColRef, MeasureDesc> bitmapMeasures = new HashMap<>();
+ for (MeasureDesc measureDesc : cubeDesc.getMeasures()){
+ if (measureDesc.getFunction().getMeasureType() instanceof BitmapMeasureType)
+ bitmapMeasures.put(measureDesc.getFunction().getParameter().getColRef(), measureDesc);
+ }
// first pass
for (DictionaryDesc dictDesc : dictDescs) {
@@ -89,17 +100,32 @@ public class DictionaryRule implements IValidatorRule<CubeDesc> {
return;
}
+ if (StringUtils.isNotEmpty(builderClass) && !builderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName()) && bitmapMeasures.containsKey(dictCol) && !dictCol.getColumnDesc().getType().isIntegerFamily()){
+ context.addResult(ResultLevel.ERROR, ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
+ return;
+ }
+
if (reuseCol != null) {
reuseDictionaries.add(dictDesc);
} else {
- baseCols.add(dictCol);
+ baseCols.put(dictCol, dictDesc);
}
}
// second pass: check no transitive reuse
for (DictionaryDesc dictDesc : reuseDictionaries) {
- if (!baseCols.contains(dictDesc.getResuseColumnRef())) {
- context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + dictDesc.getColumnRef());
+ TblColRef dictCol = dictDesc.getColumnRef();
+
+ if (!baseCols.containsKey(dictDesc.getResuseColumnRef())) {
+ context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + dictCol);
+ return;
+ }
+
+ TblColRef reuseCol = dictDesc.getResuseColumnRef();
+ String reuseBuilderClass = baseCols.get(reuseCol).getBuilderClass();
+
+ if (bitmapMeasures.containsKey(dictCol) && !dictCol.getColumnDesc().getType().isIntegerFamily() && !reuseBuilderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName())){
+ context.addResult(ResultLevel.ERROR, ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
return;
}
}
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
index 0dd9b76..b4ecb01 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
@@ -19,9 +19,11 @@
package org.apache.kylin.cube.model.validation.rule;
import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_DUPLICATE_DICTIONARY_COLUMN;
+import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE;
import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_EMPTY;
import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_SET;
+import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
import static org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_TRANSITIVE_REUSE;
import static org.junit.Assert.assertTrue;
@@ -38,6 +40,7 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.DictionaryDesc;
import org.apache.kylin.cube.model.validation.ValidateContext;
import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -102,6 +105,19 @@ public class DictionaryRuleTest extends LocalFileMetadataTestCase {
}
@Test
+ public void testBadDesc6() throws IOException {
+ testDictionaryDesc(ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+ DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP", null, SegmentAppendTrieDictBuilder.class.getName()));
+ }
+
+ @Test
+ public void testBadDesc7() throws IOException {
+ testDictionaryDesc(ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+ DictionaryDesc.create("SELLER_ID", null, SegmentAppendTrieDictBuilder.class.getName()),
+ DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP", "SELLER_ID", null));
+ }
+
+ @Test
public void testGoodDesc2() throws IOException {
testDictionaryDesc(null, DictionaryDesc.create("SELLER_ID", null, GlobalDictionaryBuilder.class.getName()));
}
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
index ec1100a..a1e3202 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
@@ -323,7 +323,7 @@
"dictionaries": [
{
"column": "TEST_KYLIN_FACT.TEST_COUNT_DISTINCT_BITMAP",
- "builder": "org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder"
+ "builder": "org.apache.kylin.dict.GlobalDictionaryBuilder"
}
],
"rowkey": {
diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 25b66f2..b035cae 100644
--- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -127,6 +127,17 @@
"returntype" : "bitmap"
},
"dependent_measure_ref" : null
+ },{
+ "name": "TEST_COUNT_DISTINCT_BITMAP",
+ "function": {
+ "expression": "COUNT_DISTINCT",
+ "parameter": {
+ "type": "column",
+ "value": "TEST_COUNT_DISTINCT_BITMAP"
+ },
+ "returntype": "bitmap"
+ },
+ "dependent_measure_ref" : null
}, {
"name" : "SELLER_FORMAT_CNT",
"function" : {
@@ -255,7 +266,7 @@
"name" : "f2",
"columns" : [ {
"qualifier" : "m",
- "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap", "seller_format_cnt"]
+ "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap", "TEST_COUNT_DISTINCT_BITMAP", "seller_format_cnt"]
} ]
}, {
"name" : "f3",
--
To stop receiving notification emails like this one, please contact
shaofengshi@apache.org.