You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2019/03/27 03:56:14 UTC
[kylin] branch 2.6.x updated: KYLIN-2620 Make the condition
stricter to answer query with topN
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push:
new d5c37dc KYLIN-2620 Make the condition stricter to answer query with topN
d5c37dc is described below
commit d5c37dcaeaca6d08f2098a645d78b2c800482db4
Author: chao long <wa...@qq.com>
AuthorDate: Fri Mar 1 19:13:30 2019 +0800
KYLIN-2620 Make the condition stricter to answer query with topN
---
.../java/org/apache/kylin/cube/CubeInstance.java | 3 +-
.../apache/kylin/measure/topn/TopNMeasureType.java | 48 +++++++++++++++++++++-
.../kylin/metadata/realization/SQLDigest.java | 4 +-
.../apache/kylin/storage/hbase/ITStorageTest.java | 2 +-
.../apache/kylin/query/relnode/OLAPContext.java | 3 +-
.../apache/kylin/query/relnode/OLAPLimitRel.java | 1 +
.../apache/kylin/query/relnode/OLAPSortRel.java | 19 +++++----
7 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
index 15c4812..7cbfa59 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
@@ -541,7 +541,8 @@ public class CubeInstance extends RootPersistentEntity implements IRealization,
if (result.capable) {
result.cost = getCost(digest);
for (CapabilityInfluence i : result.influences) {
- result.cost *= (i.suggestCostMultiplier() == 0) ? 1.0 : i.suggestCostMultiplier();
+ double suggestCost = i.suggestCostMultiplier();
+ result.cost *= (suggestCost == 0) ? 1.0 : suggestCost;
}
} else {
result.cost = -1;
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
index d53a70a..194ba36 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.util.ByteArray;
@@ -51,6 +52,8 @@ import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
+import static org.apache.kylin.metadata.realization.SQLDigest.OrderEnum.DESCENDING;
+
public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
private static final Logger logger = LoggerFactory.getLogger(TopNMeasureType.class);
@@ -63,6 +66,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
public static final String CONFIG_AGG = "topn.aggregation";
public static final String CONFIG_ORDER = "topn.order";
+ private boolean cuboidCanAnswer;
+
public static class Factory extends MeasureTypeFactory<TopNCounter<ByteArray>> {
@Override
@@ -257,6 +262,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
// TopN measure can (and only can) provide one numeric measure and one literal dimension
// e.g. select seller, sum(gmv) from ... group by seller order by 2 desc limit 100
+ cuboidCanAnswer = true; // true: have cuboid can answer query, false: no cuboid can answer query
+
List<TblColRef> literalCol = getTopNLiteralColumn(topN.getFunction());
for (TblColRef colRef : literalCol) {
if (digest.filterColumns.contains(colRef) == true) {
@@ -268,6 +275,12 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
if (digest.groupbyColumns.containsAll(literalCol) == false)
return null;
+ List retainList = unmatchedDimensions.stream().filter(colRef -> literalCol.contains(colRef)).collect(Collectors.toList());
+
+ if (retainList.size() > 0){
+ cuboidCanAnswer = false;
+ }
+
// check digest requires only one measure
if (digest.aggregations.size() == 1) {
@@ -278,10 +291,17 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
unmatchedDimensions.removeAll(literalCol);
unmatchedAggregations.remove(onlyFunction);
+
return new CapabilityInfluence() {
@Override
public double suggestCostMultiplier() {
- return 0.3; // make sure TopN get ahead of other matched realizations
+ if (totallyMatchTopN(digest)) {
+ return 0.3; // make sure TopN get ahead of other matched realizations
+ } else if (cuboidCanAnswer) {
+ return 1.3; // fuzzy topN match, but have cuboid can answer query
+ } else {
+ return 2;
+ }
}
@Override
@@ -312,6 +332,25 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
return null;
}
+ private boolean hasOneElement(List<? extends Object> list) {
+ return list != null && list.size() == 1;
+ }
+
+ private boolean totallyMatchTopN(SQLDigest digest) {
+ boolean sortColumnMatch = false;
+ if (hasOneElement(digest.sortColumns)) {
+ TblColRef sortColumn = digest.sortColumns.get(0);
+ if (!digest.groupbyColumns.contains(sortColumn)) {
+ // only have one aggregation
+ sortColumnMatch = sortColumn.getColumnDesc().getZeroBasedIndex() == 0;
+ }
+ }
+
+ return sortColumnMatch
+ && hasOneElement(digest.sortOrders) && DESCENDING.equals(digest.sortOrders.get(0))
+ && digest.hasLimit;
+ }
+
private boolean isTopNCompatibleSum(FunctionDesc topN, FunctionDesc sum) {
if (sum == null)
return false;
@@ -372,9 +411,16 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> {
continue;
}
+ // topN not totally match, but have cuboid can answer, not use topN to adjust
+ // topN totally match or (topN fuzzy match, but no cuboid can answer), use topN to adjust
+ if (!totallyMatchTopN(sqlDigest) && cuboidCanAnswer) {
+ continue;
+ }
+
logger.info("Rewrite function " + origFunc + " to " + topnFunc);
}
+
sqlDigest.aggregations = Lists.newArrayList(topnFunc);
sqlDigest.groupbyColumns.removeAll(topnLiteralCol);
sqlDigest.metricColumns.addAll(topnLiteralCol);
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
index fa7d1e5..78f0adc 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
@@ -81,6 +81,7 @@ public class SQLDigest {
public List<OrderEnum> sortOrders;
public boolean isRawQuery;
public boolean limitPrecedesAggr;
+ public boolean hasLimit;
public Set<MeasureDesc> involvedMeasure;
@@ -91,7 +92,7 @@ public class SQLDigest {
List<DynamicFunctionDesc> dynAggregations, //
Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, // dynamic col related columns
Set<TblColRef> filterColumns, TupleFilter filter, TupleFilter havingFilter, // filter
- List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, // sort & limit
+ List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, boolean hasLimit, // sort & limit
Set<MeasureDesc> involvedMeasure
) {
this.factTable = factTable;
@@ -121,6 +122,7 @@ public class SQLDigest {
this.sortOrders = sortOrders;
this.isRawQuery = isRawQuery();
this.limitPrecedesAggr = limitPrecedesAggr;
+ this.hasLimit = hasLimit;
this.involvedMeasure = involvedMeasure;
diff --git a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
index 3f8dccc..9022016 100644
--- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
@@ -148,7 +148,7 @@ public class ITStorageTest extends HBaseMetadataTestCase {
/*runtimeDimensionColumns*/ Collections.<TblColRef> emptySet(), //
/*runtimeMetricColumns*/ Collections.<TblColRef> emptySet(), //
/*filter col*/ Collections.<TblColRef> emptySet(), filter, null, //
- /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, new HashSet<MeasureDesc>());
+ /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, false, new HashSet<MeasureDesc>());
iterator = storageEngine.search(context, sqlDigest, mockup.newTupleInfo(groups, aggregations));
while (iterator.hasNext()) {
ITuple tuple = iterator.next();
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
index 340967c..b8ddd02 100755
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
@@ -132,6 +132,7 @@ public class OLAPContext {
public boolean limitPrecedesAggr = false;
public boolean afterJoin = false;
public boolean hasJoin = false;
+ public boolean hasLimit = false;
public boolean hasWindow = false;
public boolean groupByExpression = false; // checkout if group by column has operator
public boolean afterOuterAggregate = false;
@@ -197,7 +198,7 @@ public class OLAPContext {
metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // aggregation
rtDimColumns, rtMetricColumns, // runtime related columns
filterColumns, filter, havingFilter, // filter
- sortColumns, sortOrders, limitPrecedesAggr, // sort & limit
+ sortColumns, sortOrders, limitPrecedesAggr, hasLimit, // sort & limit
involvedMeasure);
}
return sqlDigest;
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
index 1d0654c..8e04859 100755
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java
@@ -78,6 +78,7 @@ public class OLAPLimitRel extends SingleRel implements OLAPRel {
this.columnRowType = buildColumnRowType();
this.context = implementor.getContext();
+ this.context.hasLimit = true;
// ignore limit after having clause
// ignore limit after another limit, e.g. select A, count(*) from (select A,B from fact group by A,B limit 100) limit 10
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
index 6432875..de05005 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java
@@ -73,6 +73,16 @@ public class OLAPSortRel extends Sort implements OLAPRel {
this.context = implementor.getContext();
this.columnRowType = buildColumnRowType();
+
+ for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) {
+ int index = fieldCollation.getFieldIndex();
+ SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection());
+ OLAPRel olapChild = (OLAPRel) this.getInput();
+ TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index);
+ this.context.addSort(orderCol, order);
+ this.context.storageContext.markSort();
+
+ }
}
ColumnRowType buildColumnRowType() {
@@ -90,15 +100,6 @@ public class OLAPSortRel extends Sort implements OLAPRel {
if (this.context.realization == null)
return;
- for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) {
- int index = fieldCollation.getFieldIndex();
- SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection());
- OLAPRel olapChild = (OLAPRel) this.getInput();
- TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index);
- this.context.addSort(orderCol, order);
- this.context.storageContext.markSort();
- }
-
this.rowType = this.deriveRowType();
this.columnRowType = buildColumnRowType();
}