You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/04/20 10:48:55 UTC

kylin git commit: KYLIN-1016 Count distinct on any dimension should work even not a predefined measure

Repository: kylin
Updated Branches:
  refs/heads/yang-m1 74ba02972 -> 074f8bcd5


KYLIN-1016 Count distinct on any dimension should work even not a predefined measure


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/074f8bcd
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/074f8bcd
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/074f8bcd

Branch: refs/heads/yang-m1
Commit: 074f8bcd583a180d1151d78a55730e5c74c31c3d
Parents: 74ba029
Author: lidongsjtu <li...@apache.org>
Authored: Wed Apr 20 16:48:16 2016 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Wed Apr 20 16:48:20 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    | 11 +++++
 .../org/apache/kylin/measure/MeasureType.java   |  6 ++-
 .../kylin/measure/MeasureTypeFactory.java       | 10 +++++
 .../kylin/metadata/model/FunctionDesc.java      | 43 ++++++++++++++++++--
 .../src/test/resources/query/debug/query78.sql  | 22 ----------
 .../kylin/query/relnode/OLAPAggregateRel.java   | 26 ++++++------
 .../org/apache/kylin/query/relnode/OLAPRel.java |  3 +-
 7 files changed, 81 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 23f0fa7..011a76b 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -630,4 +630,15 @@ abstract public class KylinConfigBase implements Serializable {
         return getOptional("kylin.init.tasks");
     }
 
+    public String getMRBatchEngineV1Class() {
+        return getOptional("kylin.cube.mr.engine.v1.class", "org.apache.kylin.engine.mr.MRBatchCubingEngine");
+    }
+
+    public String getMRBatchEngineV2Class() {
+        return getOptional("kylin.cube.mr.engine.v2.class", "org.apache.kylin.engine.mr.MRBatchCubingEngine2");
+    }
+
+    public int getDimCountDistinctMaxCardinality() {
+        return Integer.parseInt(getOptional("kylin.query.dim.distinct.max", "5000000"));
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
index 98aa752..4dea46b 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
@@ -106,7 +106,11 @@ abstract public class MeasureType<T> {
     
     /** Whether or not Calcite rel-tree needs rewrite to do last around of aggregation */
     abstract public boolean needRewrite();
-    
+
+    public boolean needRewriteField() {
+        return true;
+    }
+
     /** Returns a Calcite aggregation function implementation class */
     abstract public Class<?> getRewriteCalciteAggrFunctionClass();
     

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
index bd235d6..1d264e1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 
 import org.apache.kylin.measure.basic.BasicMeasureType;
 import org.apache.kylin.measure.bitmap.BitmapMeasureType;
+import org.apache.kylin.measure.dim.DimCountDistinctMeasureType;
 import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType;
 import org.apache.kylin.measure.hllc.HLLCMeasureType;
 import org.apache.kylin.measure.raw.RawMeasureType;
@@ -132,6 +133,15 @@ abstract public class MeasureTypeFactory<T> {
         return create(funcName, DataType.getType(dataType));
     }
 
+    public static MeasureType<?> createNoRewriteFieldsMeasureType(String funcName, DataType dataType) {
+        // currently only has DimCountDistinctAgg
+        if (funcName.equalsIgnoreCase("COUNT_DISTINCT")) {
+            return new DimCountDistinctMeasureType.DimCountDistinctMeasureTypeFactory().createMeasureType(funcName, dataType);
+        }
+
+        throw new UnsupportedOperationException("No measure type found.");
+    }
+
     public static MeasureType<?> create(String funcName, DataType dataType) {
         funcName = funcName.toUpperCase();
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
index 9e3effb..42f7950 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
@@ -21,6 +21,7 @@ package org.apache.kylin.metadata.model;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.kylin.measure.MeasureType;
 import org.apache.kylin.measure.MeasureTypeFactory;
@@ -31,6 +32,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 /**
  */
@@ -41,6 +43,16 @@ public class FunctionDesc {
     public static final String FUNC_MIN = "MIN";
     public static final String FUNC_MAX = "MAX";
     public static final String FUNC_COUNT = "COUNT";
+    public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT";
+    public static final Set<String> BUILT_IN_AGGREGATIONS = Sets.newHashSet();
+
+    static {
+        BUILT_IN_AGGREGATIONS.add(FUNC_COUNT);
+        BUILT_IN_AGGREGATIONS.add(FUNC_MAX);
+        BUILT_IN_AGGREGATIONS.add(FUNC_MIN);
+        BUILT_IN_AGGREGATIONS.add(FUNC_SUM);
+        BUILT_IN_AGGREGATIONS.add(FUNC_COUNT_DISTINCT);
+    }
 
     public static final String PARAMETER_TYPE_CONSTANT = "constant";
     public static final String PARAMETER_TYPE_COLUMN = "column";
@@ -91,23 +103,41 @@ public class FunctionDesc {
         throw new IllegalStateException("Column is not found in any table from the model: " + columnName);
     }
 
+    private void reInitMeasureType() {
+        if (isDimensionAsMetric && isCountDistinct()) {
+            // create DimCountDis
+            measureType = MeasureTypeFactory.createNoRewriteFieldsMeasureType(getExpression(), getReturnDataType());
+        } else {
+            measureType = MeasureTypeFactory.create(getExpression(), getReturnDataType());
+        }
+    }
+
     public MeasureType<?> getMeasureType() {
-        if (isDimensionAsMetric)
+        if (isDimensionAsMetric && !isCountDistinct()) {
             return null;
+        }
 
         if (measureType == null) {
-            measureType = MeasureTypeFactory.create(getExpression(), getReturnDataType());
+            reInitMeasureType();
         }
+
         return measureType;
     }
 
     public boolean needRewrite() {
-        if (isDimensionAsMetric)
+        if (getMeasureType() == null)
             return false;
 
         return getMeasureType().needRewrite();
     }
 
+    public boolean needRewriteField() {
+        if (!needRewrite())
+            return false;
+
+        return getMeasureType().needRewriteField();
+    }
+
     public String getRewriteFieldName() {
         if (isSum()) {
             return getParameter().getValue();
@@ -153,6 +183,10 @@ public class FunctionDesc {
         return FUNC_COUNT.equalsIgnoreCase(expression);
     }
 
+    public boolean isCountDistinct() {
+        return FUNC_COUNT_DISTINCT.equalsIgnoreCase(expression);
+    }
+
     /**
      * Get Full Expression such as sum(amount), count(1), count(*)...
      */
@@ -172,6 +206,9 @@ public class FunctionDesc {
 
     public void setDimensionAsMetric(boolean isDimensionAsMetric) {
         this.isDimensionAsMetric = isDimensionAsMetric;
+        if (measureType != null) {
+            reInitMeasureType();
+        }
     }
 
     public String getExpression() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/kylin-it/src/test/resources/query/debug/query78.sql
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/resources/query/debug/query78.sql b/kylin-it/src/test/resources/query/debug/query78.sql
deleted file mode 100644
index 299f1a4..0000000
--- a/kylin-it/src/test/resources/query/debug/query78.sql
+++ /dev/null
@@ -1,22 +0,0 @@
---
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements.  See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership.  The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License.  You may obtain a copy of the License at
---
---     http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
-
-select count(*) as c,sum(PRICE) as GMV, LSTG_FORMAT_NAME as FORMAT_NAME
-from test_kylin_fact
-where (LSTG_FORMAT_NAME in ('ABIN')) or  (LSTG_FORMAT_NAME>='FP-GTC' and LSTG_FORMAT_NAME<='Others')
-group by LSTG_FORMAT_NAME

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
index 9414757..9a2fa5f 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
@@ -163,7 +163,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
         for (int i = 0; i < this.aggregations.size(); i++) {
             FunctionDesc aggFunc = this.aggregations.get(i);
             TblColRef aggCol = null;
-            if (aggFunc.needRewrite()) {
+            if (aggFunc.needRewriteField()) {
                 aggCol = buildRewriteColumn(aggFunc);
             } else {
                 AggregateCall aggCall = this.rewriteAggCalls.get(i);
@@ -179,7 +179,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
 
     private TblColRef buildRewriteColumn(FunctionDesc aggFunc) {
         TblColRef colRef;
-        if (aggFunc.needRewrite()) {
+        if (aggFunc.needRewriteField()) {
             ColumnDesc column = new ColumnDesc();
             column.setName(aggFunc.getRewriteFieldName());
             TableDesc table = this.context.firstTableScan.getOlapTable().getSourceTable();
@@ -234,7 +234,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
             translateAggregation();
             buildRewriteFieldsAndMetricsColumns();
         }
-        
+
         implementor.visitChild(this, getInput());
 
         // only rewrite the innermost aggregation
@@ -280,18 +280,18 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
 
     private void buildRewriteFieldsAndMetricsColumns() {
         fillbackOptimizedColumn();
-        
+
         ColumnRowType inputColumnRowType = ((OLAPRel) getInput()).getColumnRowType();
         RelDataTypeFactory typeFactory = getCluster().getTypeFactory();
         for (int i = 0; i < this.aggregations.size(); i++) {
             FunctionDesc aggFunc = this.aggregations.get(i);
-            
+
             if (aggFunc.isDimensionAsMetric()) {
                 this.context.groupByColumns.addAll(aggFunc.getParameter().getColRefs());
                 continue; // skip rewrite, let calcite handle
             }
-            
-            if (aggFunc.needRewrite()) {
+
+            if (aggFunc.needRewriteField()) {
                 String rewriteFieldName = aggFunc.getRewriteFieldName();
                 RelDataType rewriteFieldType = OLAPTable.createSqlType(typeFactory, aggFunc.getRewriteFieldType(), true);
                 this.context.rewriteFields.put(rewriteFieldName, rewriteFieldType);
@@ -327,12 +327,14 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel {
     }
 
     private AggregateCall rewriteAggregateCall(AggregateCall aggCall, FunctionDesc func) {
-
         // rebuild parameters
-        List<Integer> newArgList = new ArrayList<Integer>(1);
-        String fieldName = func.getRewriteFieldName();
-        RelDataTypeField field = getInput().getRowType().getField(fieldName, true, false);
-        newArgList.add(field.getIndex());
+        List<Integer> newArgList = Lists.newArrayListWithCapacity(1);
+        if (func.needRewriteField()) {
+            RelDataTypeField field = getInput().getRowType().getField(func.getRewriteFieldName(), true, false);
+            newArgList.add(field.getIndex());
+        } else {
+            newArgList = aggCall.getArgList();
+        }
 
         // rebuild function
         RelDataType fieldType = aggCall.getType();

http://git-wip-us.apache.org/repos/asf/kylin/blob/074f8bcd/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
index 59179df..f2b52a7 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPRel.java
@@ -127,8 +127,7 @@ public interface OLAPRel extends RelNode {
 
         public static boolean needRewrite(OLAPContext ctx) {
             boolean hasFactTable = ctx.hasJoin || ctx.firstTableScan.getTableName().equals(ctx.realization.getFactTable());
-            boolean hasRewriteFields = !ctx.rewriteFields.isEmpty();
-            return hasRewriteFields && hasFactTable;
+            return hasFactTable;
         }
     }