You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/03/27 03:56:14 UTC

[kylin] branch 2.6.x updated: KYLIN-2620 Make the condition stricter to answer query with topN

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/2.6.x by this push:
     new d5c37dc  KYLIN-2620 Make the condition stricter to answer query with topN
d5c37dc is described below

commit d5c37dcaeaca6d08f2098a645d78b2c800482db4
Author: chao long <wa...@qq.com>
AuthorDate: Fri Mar 1 19:13:30 2019 +0800

    KYLIN-2620 Make the condition stricter to answer query with topN
---
 .../java/org/apache/kylin/cube/CubeInstance.java   |  3 +-
 .../apache/kylin/measure/topn/TopNMeasureType.java | 48 +++++++++++++++++++++-
 .../kylin/metadata/realization/SQLDigest.java      |  4 +-
 .../apache/kylin/storage/hbase/ITStorageTest.java  |  2 +-
 .../apache/kylin/query/relnode/OLAPContext.java    |  3 +-
 .../apache/kylin/query/relnode/OLAPLimitRel.java   |  1 +
 .../apache/kylin/query/relnode/OLAPSortRel.java    | 19 +++++----
 7 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
index 15c4812..7cbfa59 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
@@ -541,7 +541,8 @@ public class CubeInstance extends RootPersistentEntity implements IRealization,
         if (result.capable) {
             result.cost = getCost(digest);
             for (CapabilityInfluence i : result.influences) {
-                result.cost *= (i.suggestCostMultiplier() == 0) ? 1.0 : i.suggestCostMultiplier();
+                double suggestCost = i.suggestCostMultiplier();
+                result.cost *= (suggestCost == 0) ? 1.0 : suggestCost;
             }
         } else {
             result.cost = -1;
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
index d53a70a..194ba36 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.kylin.common.util.ByteArray;
@@ -51,6 +52,8 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
 
+import static org.apache.kylin.metadata.realization.SQLDigest.OrderEnum.DESCENDING;
+
 public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
 
     private static final Logger logger = LoggerFactory.getLogger(TopNMeasureType.class);
@@ -63,6 +66,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
     public static final String CONFIG_AGG = "topn.aggregation";
     public static final String CONFIG_ORDER = "topn.order";
 
+    private boolean cuboidCanAnswer;
+
     public static class Factory extends MeasureTypeFactory<TopNCounter<ByteArray>> {
 
         @Override
@@ -257,6 +262,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
         // TopN measure can (and only can) provide one numeric measure and one literal dimension
         // e.g. select seller, sum(gmv) from ... group by seller order by 2 desc limit 100
 
+        cuboidCanAnswer = true; // true: have cuboid can answer query, false: no cuboid can answer query
+
         List<TblColRef> literalCol = getTopNLiteralColumn(topN.getFunction());
         for (TblColRef colRef : literalCol) {
             if (digest.filterColumns.contains(colRef) == true) {
@@ -268,6 +275,12 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
         if (digest.groupbyColumns.containsAll(literalCol) == false)
             return null;
 
+        List retainList = unmatchedDimensions.stream().filter(colRef -> literalCol.contains(colRef)).collect(Collectors.toList());
+
+        if (retainList.size() > 0){
+            cuboidCanAnswer = false;
+        }
+
         // check digest requires only one measure
         if (digest.aggregations.size() == 1) {
 
@@ -278,10 +291,17 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
 
             unmatchedDimensions.removeAll(literalCol);
             unmatchedAggregations.remove(onlyFunction);
+
             return new CapabilityInfluence() {
                 @Override
                 public double suggestCostMultiplier() {
-                    return 0.3; // make sure TopN get ahead of other matched realizations
+                    if (totallyMatchTopN(digest)) {
+                        return 0.3; // make sure TopN get ahead of other matched realizations
+                    } else if (cuboidCanAnswer) {
+                        return 1.3; // fuzzy topN match, but have cuboid can answer query
+                    } else {
+                        return 2;
+                    }
                 }
 
                 @Override
@@ -312,6 +332,25 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
         return null;
     }
 
+    private boolean hasOneElement(List<? extends Object> list) {
+        return list != null && list.size() == 1;
+    }
+
+    private boolean totallyMatchTopN(SQLDigest digest) {
+        boolean sortColumnMatch = false;
+        if (hasOneElement(digest.sortColumns)) {
+            TblColRef sortColumn = digest.sortColumns.get(0);
+            if (!digest.groupbyColumns.contains(sortColumn)) {
+                // only have one aggregation
+                sortColumnMatch = sortColumn.getColumnDesc().getZeroBasedIndex() == 0;
+            }
+        }
+
+        return sortColumnMatch
+                && hasOneElement(digest.sortOrders) && DESCENDING.equals(digest.sortOrders.get(0))
+                && digest.hasLimit;
+    }
+
     private boolean isTopNCompatibleSum(FunctionDesc topN, FunctionDesc sum) {
         if (sum == null)
             return false;
@@ -372,9 +411,16 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
                     continue;
                 }
 
+                // topN not totally match, but have cuboid can answer, not use topN to adjust
+                // topN totally match or (topN fuzzy match, but no cuboid can answer), use topN to adjust
+                if (!totallyMatchTopN(sqlDigest) && cuboidCanAnswer) {
+                    continue;
+                }
+
                 logger.info("Rewrite function " + origFunc + " to " + topnFunc);
             }
 
+
             sqlDigest.aggregations = Lists.newArrayList(topnFunc);
             sqlDigest.groupbyColumns.removeAll(topnLiteralCol);
             sqlDigest.metricColumns.addAll(topnLiteralCol);
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
index fa7d1e5..78f0adc 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
@@ -81,6 +81,7 @@ public class SQLDigest {
     public List<OrderEnum> sortOrders;
     public boolean isRawQuery;
     public boolean limitPrecedesAggr;
+    public boolean hasLimit;
 
     public Set<MeasureDesc> involvedMeasure;
 
@@ -91,7 +92,7 @@ public class SQLDigest {
             List<DynamicFunctionDesc> dynAggregations, //
             Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, // dynamic col related columns
             Set<TblColRef> filterColumns, TupleFilter filter, TupleFilter havingFilter, // filter
-            List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, // sort & limit
+            List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, boolean hasLimit, // sort & limit
             Set<MeasureDesc> involvedMeasure
     ) {
         this.factTable = factTable;
@@ -121,6 +122,7 @@ public class SQLDigest {
         this.sortOrders = sortOrders;
         this.isRawQuery = isRawQuery();
         this.limitPrecedesAggr = limitPrecedesAggr;
+        this.hasLimit = hasLimit;
 
         this.involvedMeasure = involvedMeasure;
 
diff --git a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
index 3f8dccc..9022016 100644
--- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
@@ -148,7 +148,7 @@ public class ITStorageTest extends HBaseMetadataTestCase {
                     /*runtimeDimensionColumns*/ Collections.<TblColRef> emptySet(), //
                     /*runtimeMetricColumns*/ Collections.<TblColRef> emptySet(), //
                     /*filter col*/ Collections.<TblColRef> emptySet(), filter, null, //
-                    /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, new HashSet<MeasureDesc>());
+                    /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, false, new HashSet<MeasureDesc>());
             iterator = storageEngine.search(context, sqlDigest, mockup.newTupleInfo(groups, aggregations));
             while (iterator.hasNext()) {
                 ITuple tuple = iterator.next();
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
index 340967c..b8ddd02 100755
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
@@ -132,6 +132,7 @@ public class OLAPContext {
     public boolean limitPrecedesAggr = false;
     public boolean afterJoin = false;
     public boolean hasJoin = false;
+    public boolean hasLimit = false;
     public boolean hasWindow = false;
     public boolean groupByExpression = false; // checkout if group by column has operator
     public boolean afterOuterAggregate = false;
@@ -197,7 +198,7 @@ public class OLAPContext {
                     metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // aggregation
                     rtDimColumns, rtMetricColumns, // runtime related columns
                     filterColumns, filter, havingFilter, // filter
-                    sortColumns, sortOrders, limitPrecedesAggr, // sort & limit
+                    sortColumns, sortOrders, limitPrecedesAggr, hasLimit, // sort & limit
                     involvedMeasure);
         }
         return sqlDigest;
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
index 1d0654c..8e04859 100755
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
@@ -78,6 +78,7 @@ public class OLAPLimitRel extends SingleRel implements OLAPRel {
 
         this.columnRowType = buildColumnRowType();
         this.context = implementor.getContext();
+        this.context.hasLimit = true;
 
         // ignore limit after having clause
         // ignore limit after another limit, e.g. select A, count(*) from (select A,B from fact group by A,B limit 100) limit 10
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
index 6432875..de05005 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
@@ -73,6 +73,16 @@ public class OLAPSortRel extends Sort implements OLAPRel {
 
         this.context = implementor.getContext();
         this.columnRowType = buildColumnRowType();
+
+        for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) {
+            int index = fieldCollation.getFieldIndex();
+            SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection());
+            OLAPRel olapChild = (OLAPRel) this.getInput();
+            TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index);
+            this.context.addSort(orderCol, order);
+            this.context.storageContext.markSort();
+
+        }
     }
 
     ColumnRowType buildColumnRowType() {
@@ -90,15 +100,6 @@ public class OLAPSortRel extends Sort implements OLAPRel {
         if (this.context.realization == null)
             return;
 
-        for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) {
-            int index = fieldCollation.getFieldIndex();
-            SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection());
-            OLAPRel olapChild = (OLAPRel) this.getInput();
-            TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index);
-            this.context.addSort(orderCol, order);
-            this.context.storageContext.markSort();
-        }
-
         this.rowType = this.deriveRowType();
         this.columnRowType = buildColumnRowType();
     }