You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2019/08/09 15:01:13 UTC

[kylin] branch master updated: KYLIN-4117 Auto adjust data type of RelNode for intersect_count

This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/master by this push:
     new 6668b0d  KYLIN-4117 Auto adjust data type of RelNode for intersect_count
6668b0d is described below

commit 6668b0d095dc0f6753e41d8dbe352c0547717c3a
Author: XiaoxiangYu <hi...@126.com>
AuthorDate: Thu Aug 8 14:18:26 2019 +0800

    KYLIN-4117 Auto adjust data type of RelNode for intersect_count
    
    In query of intersect_count, says `intersect_count(OPS_USER_ID, part_dt, array[date '2012-01-01',date '2012-01-02'])`, we need to specific data type of each entry in array, if *date* miss, return value will be 0, which will mislead user. So I think we should auto adjust data type of each entry from string/char to real data type.
---
 .../query/sql_intersect_count/query01.sql          |  12 +--
 .../query/sql_intersect_count/query03.sql          |   4 +-
 .../apache/kylin/query/relnode/OLAPProjectRel.java | 117 ++++++++++++++++++++-
 3 files changed, 122 insertions(+), 11 deletions(-)

diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql
index 7fd5ca2..78e7528 100644
--- a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql
@@ -16,12 +16,12 @@
 -- limitations under the License.
 --
 select CAL_DT,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01']) as first_day,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-02']) as second_day,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-03']) as third_day,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01',date'2012-01-02']) as retention_oneday,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01',date'2012-01-02',date'2012-01-03']) as retention_twoday
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-02']) as second_day,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-03']) as third_day,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01','2012-01-02']) as retention_oneday,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01','2012-01-02','2012-01-03']) as retention_twoday
 from test_kylin_fact
-where CAL_DT in (date'2012-01-01',date'2012-01-02',date'2012-01-03')
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
 group by CAL_DT
 
diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql
index 3cd10b1..1c512bb 100644
--- a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql
@@ -16,8 +16,8 @@
 -- limitations under the License.
 --
 select LEAF_CATEG_ID,
-intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01']) as first_day
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day
 from test_kylin_fact
-where CAL_DT in (date'2012-01-01',date'2012-01-02',date'2012-01-03')
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
 group by LEAF_CATEG_ID
 
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPProjectRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPProjectRel.java
index de4b438..3163376 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPProjectRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPProjectRel.java
@@ -18,6 +18,8 @@
 
 package org.apache.kylin.query.relnode;
 
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -33,20 +35,29 @@ import org.apache.calcite.plan.RelTrait;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.RelWriter;
+import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory.FieldInfoBuilder;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rel.type.RelDataTypeFieldImpl;
+import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexOver;
 import org.apache.calcite.rex.RexProgram;
+import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.fun.SqlCaseOperator;
+import org.apache.calcite.sql.type.ArraySqlType;
+import org.apache.calcite.sql.type.BasicSqlType;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.tools.RelUtils;
 import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.measure.bitmap.BitmapMeasureType;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.expression.ColumnTupleExpression;
 import org.apache.kylin.metadata.expression.ExpressionColCollector;
@@ -54,8 +65,10 @@ import org.apache.kylin.metadata.expression.NoneTupleExpression;
 import org.apache.kylin.metadata.expression.NumberTupleExpression;
 import org.apache.kylin.metadata.expression.StringTupleExpression;
 import org.apache.kylin.metadata.expression.TupleExpression;
+import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.model.TblColRef.InnerDataTypeEnum;
+import org.apache.kylin.metadata.realization.IRealization;
 import org.apache.kylin.query.relnode.visitor.TupleExpressionVisitor;
 import org.apache.kylin.query.schema.OLAPTable;
 
@@ -77,17 +90,27 @@ public class OLAPProjectRel extends Project implements OLAPRel {
     boolean isMerelyPermutation = false;//project additionally added by OLAPJoinPushThroughJoinRule
     private int caseCount = 0;
 
+    private final BasicSqlType dateType = new BasicSqlType(getCluster().getTypeFactory().getTypeSystem(), SqlTypeName.DATE);
+    private final BasicSqlType timestampType = new BasicSqlType(getCluster().getTypeFactory().getTypeSystem(), SqlTypeName.TIMESTAMP);
+    private final ArraySqlType dateArrayType = new ArraySqlType(dateType, true);
+    private final ArraySqlType timestampArrayType = new ArraySqlType(timestampType, true);
+
+    /**
+     * A flag indicate whether has intersect_count in query
+     */
+    private boolean hasIntersect = false;
+
     public OLAPProjectRel(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, List<RexNode> exps,
             RelDataType rowType) {
         super(cluster, traitSet, child, exps, rowType);
         Preconditions.checkArgument(getConvention() == OLAPRel.CONVENTION);
         Preconditions.checkArgument(child.getConvention() == OLAPRel.CONVENTION);
-        this.rewriteProjects = exps;
+        this.rewriteProjects = new ArrayList<>(exps); // make is modifiable
         this.hasJoin = false;
         this.afterJoin = false;
         this.rowType = getRowType();
         for (RexNode exp : exps) {
-                caseCount += RelUtils.countOperatorCall(SqlCaseOperator.INSTANCE, exp);
+            caseCount += RelUtils.countOperatorCall(SqlCaseOperator.INSTANCE, exp);
         }
     }
 
@@ -137,6 +160,17 @@ public class OLAPProjectRel extends Project implements OLAPRel {
         this.afterAggregate = context.afterAggregate;
 
         this.columnRowType = buildColumnRowType();
+        RelNode parentNode = implementor.getParentNode();
+        if (parentNode instanceof OLAPAggregateRel) {
+            OLAPAggregateRel rel = (OLAPAggregateRel) parentNode;
+            for (AggregateCall call : rel.getRewriteAggCalls()) {
+                if (call.getAggregation().getName().equalsIgnoreCase(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)) {
+                    hasIntersect = true;
+                    logger.trace("Find intersect count in query.");
+                    break;
+                }
+            }
+        }
     }
 
     ColumnRowType buildColumnRowType() {
@@ -305,7 +339,7 @@ public class OLAPProjectRel extends Project implements OLAPRel {
 
         // rebuild columns
         this.columnRowType = this.buildColumnRowType();
-
+        rewriteProjectsForArrayDataType();
         this.rewriting = false;
     }
 
@@ -336,4 +370,81 @@ public class OLAPProjectRel extends Project implements OLAPRel {
         return super.explainTerms(pw).item("ctx",
                 context == null ? "" : String.valueOf(context.id) + "@" + context.realization);
     }
+
+    /**
+     * Change Array[String] to Array[Specific Type] for intersect_count
+     * https://github.com/apache/kylin/pull/785
+     */
+    private void rewriteProjectsForArrayDataType() {
+        if (hasIntersect) {
+            Set<TblColRef> tblColRefs = new HashSet<>(context.allColumns); // all column
+            IRealization realization = context.realization;
+            TblColRef groupBy = null;
+            DataType groupByType = null;
+            if (realization instanceof CubeInstance) {
+                CubeDesc cubeDesc = ((CubeInstance) realization).getDescriptor();
+                for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
+                    if (measureDesc.getFunction().getMeasureType() instanceof BitmapMeasureType) {
+                        TblColRef col1 = measureDesc.getFunction().getParameter().getColRef();
+                        tblColRefs.remove(col1); // Remove all column included in COUNT_DISTINCT
+                        logger.trace("Remove {}", col1);
+                    }
+                }
+                // After remove all columns included in COUNT_DISTINCT, last one should be a group by column
+                if (tblColRefs.size() == 1) {
+                    for (TblColRef colRef : tblColRefs) {
+                        groupBy = colRef;
+                        groupByType = groupBy.getType();
+                        logger.trace("Group By Column in intersect_count should be {}.", groupBy);
+                    }
+                    // only auto change to date/timestamp type from string type
+                    if (groupByType != null && groupByType.isDateTimeFamily()) {
+                        for (int i = 0; i < this.rewriteProjects.size(); i++) {
+                            RexNode rex = this.rewriteProjects.get(i);
+                            if (groupByType.isTimestamp()) {
+                                rewriteProjectForIntersect(rex, SqlTypeName.TIMESTAMP, timestampType,
+                                        timestampArrayType, i);
+                            } else if (groupByType.isDate()) {
+                                rewriteProjectForIntersect(rex, SqlTypeName.DATE, dateType, dateArrayType, i);
+                            }
+                        }
+                    }
+                } else {
+                    logger.trace("After remove, {}.", tblColRefs.size());
+                }
+            }
+        }
+    }
+
+    private void rewriteProjectForIntersect(RexNode rex, SqlTypeName sqlTypeName, BasicSqlType eleSqlType,
+            ArraySqlType arraySqlType, int idx) {
+        if (rex.isA(SqlKind.ARRAY_VALUE_CONSTRUCTOR)) { // somethings like ['2012-01-01', '2012-01-02', '2012-01-03']
+            List<RexNode> nodeList = ((RexCall) rex).getOperands();
+            RexLiteral newNode = null;
+            boolean needChange = true;
+            List<RexNode> newerList = new ArrayList<>();
+            if (!nodeList.isEmpty()) {
+                for (RexNode node : nodeList) {
+                    if (node instanceof RexLiteral) {
+                        RexLiteral literal = (RexLiteral) node;
+                        if (literal.getTypeName() == sqlTypeName) {
+                            needChange = false;
+                            break;
+                        } else {
+                            newNode = RexLiteral.fromJdbcString(eleSqlType, sqlTypeName,
+                                    literal.getValue2().toString());
+                        }
+                    }
+                    if (newNode != null) {
+                        newerList.add(newNode);
+                    }
+                    newNode = null;
+                }
+                if (needChange) {
+                    rewriteProjects.set(idx, ((RexCall) rex).clone(arraySqlType, newerList));
+                    logger.debug("Rewrite project REL {} for intersect count.", rewriteProjects.get(idx));
+                }
+            }
+        }
+    }
 }