You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/04/20 10:48:55 UTC
kylin git commit: KYLIN-1016 Count distinct on any dimension should
work even not a predefined measure
Repository: kylin
Updated Branches:
refs/heads/yang-m1 74ba02972 -> 074f8bcd5
KYLIN-1016 Count distinct on any dimension should work even not a predefined measure
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/074f8bcd
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/074f8bcd
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/074f8bcd
Branch: refs/heads/yang-m1
Commit: 074f8bcd583a180d1151d78a55730e5c74c31c3d
Parents: 74ba029
Author: lidongsjtu <li...@apache.org>
Authored: Wed Apr 20 16:48:16 2016 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Wed Apr 20 16:48:20 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 11 +++++
.../org/apache/kylin/measure/MeasureType.java | 6 ++-
.../kylin/measure/MeasureTypeFactory.java | 10 +++++
.../kylin/metadata/model/FunctionDesc.java | 43 ++++++++++++++++++--
.../src/test/resources/query/debug/query78.sql | 22 ----------
.../kylin/query/relnode/OLAPAggregateRel.java | 26 ++++++------
.../org/apache/kylin/query/relnode/OLAPRel.java | 3 +-
7 files changed, 81 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 23f0fa7..011a76b 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -630,4 +630,15 @@ abstract public class KylinConfigBase implements Serializable {
return getOptional("kylin.init.tasks");
}
+ public String getMRBatchEngineV1Class() {
+ return getOptional("kylin.cube.mr.engine.v1.class", "org.apache.kylin.engine.mr.MRBatchCubingEngine");
+ }
+
+ public String getMRBatchEngineV2Class() {
+ return getOptional("kylin.cube.mr.engine.v2.class", "org.apache.kylin.engine.mr.MRBatchCubingEngine2");
+ }
+
+ public int getDimCountDistinctMaxCardinality() {
+ return Integer.parseInt(getOptional("kylin.query.dim.distinct.max", "5000000"));
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
index 98aa752..4dea46b 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
@@ -106,7 +106,11 @@ abstract public class MeasureType<T> {
/** Whether or not Calcite rel-tree needs rewrite to do last around of aggregation */
abstract public boolean needRewrite();
-
+
+ public boolean needRewriteField() {
+ return true;
+ }
+
/** Returns a Calcite aggregation function implementation class */
abstract public Class<?> getRewriteCalciteAggrFunctionClass();
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
index bd235d6..1d264e1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
import org.apache.kylin.measure.basic.BasicMeasureType;
import org.apache.kylin.measure.bitmap.BitmapMeasureType;
+import org.apache.kylin.measure.dim.DimCountDistinctMeasureType;
import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType;
import org.apache.kylin.measure.hllc.HLLCMeasureType;
import org.apache.kylin.measure.raw.RawMeasureType;
@@ -132,6 +133,15 @@ abstract public class MeasureTypeFactory<T> {
return create(funcName, DataType.getType(dataType));
}
+ public static MeasureType<?> createNoRewriteFieldsMeasureType(String funcName, DataType dataType) {
+ // currently only has DimCountDistinctAgg
+ if (funcName.equalsIgnoreCase("COUNT_DISTINCT")) {
+ return new DimCountDistinctMeasureType.DimCountDistinctMeasureTypeFactory().createMeasureType(funcName, dataType);
+ }
+
+ throw new UnsupportedOperationException("No measure type found.");
+ }
+
public static MeasureType<?> create(String funcName, DataType dataType) {
funcName = funcName.toUpperCase();
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
index 9e3effb..42f7950 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
@@ -21,6 +21,7 @@ package org.apache.kylin.metadata.model;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Set;
import org.apache.kylin.measure.MeasureType;
import org.apache.kylin.measure.MeasureTypeFactory;
@@ -31,6 +32,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
/**
*/
@@ -41,6 +43,16 @@ public class FunctionDesc {
public static final String FUNC_MIN = "MIN";
public static final String FUNC_MAX = "MAX";
public static final String FUNC_COUNT = "COUNT";
+ public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT";
+ public static final Set<String> BUILT_IN_AGGREGATIONS = Sets.newHashSet();
+
+ static {
+ BUILT_IN_AGGREGATIONS.add(FUNC_COUNT);
+ BUILT_IN_AGGREGATIONS.add(FUNC_MAX);
+ BUILT_IN_AGGREGATIONS.add(FUNC_MIN);
+ BUILT_IN_AGGREGATIONS.add(FUNC_SUM);
+ BUILT_IN_AGGREGATIONS.add(FUNC_COUNT_DISTINCT);
+ }
public static final String PARAMETER_TYPE_CONSTANT = "constant";
public static final String PARAMETER_TYPE_COLUMN = "column";
@@ -91,23 +103,41 @@ public class FunctionDesc {
throw new IllegalStateException("Column is not found in any table from the model: " + columnName);
}
+ private void reInitMeasureType() {
+ if (isDimensionAsMetric && isCountDistinct()) {
+ // create DimCountDis
+ measureType = MeasureTypeFactory.createNoRewriteFieldsMeasureType(getExpression(), getReturnDataType());
+ } else {
+ measureType = MeasureTypeFactory.create(getExpression(), getReturnDataType());
+ }
+ }
+
public MeasureType<?> getMeasureType() {
- if (isDimensionAsMetric)
+ if (isDimensionAsMetric && !isCountDistinct()) {
return null;
+ }
if (measureType == null) {
- measureType = MeasureTypeFactory.create(getExpression(), getReturnDataType());
+ reInitMeasureType();
}
+
return measureType;
}
public boolean needRewrite() {
- if (isDimensionAsMetric)
+ if (getMeasureType() == null)
return false;
return getMeasureType().needRewrite();
}
+ public boolean needRewriteField() {
+ if (!needRewrite())
+ return false;
+
+ return getMeasureType().needRewriteField();
+ }
+
public String getRewriteFieldName() {
if (isSum()) {
return getParameter().getValue();
@@ -153,6 +183,10 @@ public class FunctionDesc {
return FUNC_COUNT.equalsIgnoreCase(expression);
}
+ public boolean isCountDistinct() {
+ return FUNC_COUNT_DISTINCT.equalsIgnoreCase(expression);
+ }
+
/**
* Get Full Expression such as sum(amount), count(1), count(*)...
*/
@@ -172,6 +206,9 @@ public class FunctionDesc {
public void setDimensionAsMetric(boolean isDimensionAsMetric) {
this.isDimensionAsMetric = isDimensionAsMetric;
+ if (measureType != null) {
+ reInitMeasureType();
+ }
}
public String getExpression() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/kylin-it/src/test/resources/query/debug/query78.sql
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/resources/query/debug/query78.sql b/kylin-it/src/test/resources/query/debug/query78.sql
deleted file mode 100644
index 299f1a4..0000000
--- a/kylin-it/src/test/resources/query/debug/query78.sql
+++ /dev/null
@@ -1,22 +0,0 @@
---
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
-
-select count(*) as c,sum(PRICE) as GMV, LSTG_FORMAT_NAME as FORMAT_NAME
-from test_kylin_fact
-where (LSTG_FORMAT_NAME in ('ABIN')) or (LSTG_FORMAT_NAME>='FP-GTC' and LSTG_FORMAT_NAME<='Others')
-group by LSTG_FORMAT_NAME
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
index 9414757..9a2fa5f 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
@@ -163,7 +163,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
for (int i = 0; i < this.aggregations.size(); i++) {
FunctionDesc aggFunc = this.aggregations.get(i);
TblColRef aggCol = null;
- if (aggFunc.needRewrite()) {
+ if (aggFunc.needRewriteField()) {
aggCol = buildRewriteColumn(aggFunc);
} else {
AggregateCall aggCall = this.rewriteAggCalls.get(i);
@@ -179,7 +179,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
private TblColRef buildRewriteColumn(FunctionDesc aggFunc) {
TblColRef colRef;
- if (aggFunc.needRewrite()) {
+ if (aggFunc.needRewriteField()) {
ColumnDesc column = new ColumnDesc();
column.setName(aggFunc.getRewriteFieldName());
TableDesc table = this.context.firstTableScan.getOlapTable().getSourceTable();
@@ -234,7 +234,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
translateAggregation();
buildRewriteFieldsAndMetricsColumns();
}
-
+
implementor.visitChild(this, getInput());
// only rewrite the innermost aggregation
@@ -280,18 +280,18 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
private void buildRewriteFieldsAndMetricsColumns() {
fillbackOptimizedColumn();
-
+
ColumnRowType inputColumnRowType = ((OLAPRel) getInput()).getColumnRowType();
RelDataTypeFactory typeFactory = getCluster().getTypeFactory();
for (int i = 0; i < this.aggregations.size(); i++) {
FunctionDesc aggFunc = this.aggregations.get(i);
-
+
if (aggFunc.isDimensionAsMetric()) {
this.context.groupByColumns.addAll(aggFunc.getParameter().getColRefs());
continue; // skip rewrite, let calcite handle
}
-
- if (aggFunc.needRewrite()) {
+
+ if (aggFunc.needRewriteField()) {
String rewriteFieldName = aggFunc.getRewriteFieldName();
RelDataType rewriteFieldType = OLAPTable.createSqlType(typeFactory, aggFunc.getRewriteFieldType(), true);
this.context.rewriteFields.put(rewriteFieldName, rewriteFieldType);
@@ -327,12 +327,14 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
}
private AggregateCall rewriteAggregateCall(AggregateCall aggCall, FunctionDesc func) {
-
// rebuild parameters
- List<Integer> newArgList = new ArrayList<Integer>(1);
- String fieldName = func.getRewriteFieldName();
- RelDataTypeField field = getInput().getRowType().getField(fieldName, true, false);
- newArgList.add(field.getIndex());
+ List<Integer> newArgList = Lists.newArrayListWithCapacity(1);
+ if (func.needRewriteField()) {
+ RelDataTypeField field = getInput().getRowType().getField(func.getRewriteFieldName(), true, false);
+ newArgList.add(field.getIndex());
+ } else {
+ newArgList = aggCall.getArgList();
+ }
// rebuild function
RelDataType fieldType = aggCall.getType();
http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
index 59179df..f2b52a7 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
@@ -127,8 +127,7 @@ public interface OLAPRel extends RelNode {
public static boolean needRewrite(OLAPContext ctx) {
boolean hasFactTable = ctx.hasJoin || ctx.firstTableScan.getTableName().equals(ctx.realization.getFactTable());
- boolean hasRewriteFields = !ctx.rewriteFields.isEmpty();
- return hasRewriteFields && hasFactTable;
+ return hasFactTable;
}
}