You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@kylin.apache.org by GitBox <gi...@apache.org> on 2018/12/11 05:50:26 UTC

[GitHub] shaofengshi closed pull request #380: KYLIN-3416 Group by with expression can not aggregate exactly

shaofengshi closed pull request #380: KYLIN-3416 Group by with expression can not aggregate exactly
URL: https://github.com/apache/kylin/pull/380
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
index 0b23e48eed..fa7d1e5afc 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
@@ -60,6 +60,7 @@ public SQLCall(String function, Iterable<Object> args) {
     public Set<TblColRef> subqueryJoinParticipants;
 
     public Map<TblColRef, TupleExpression> dynGroupbyColumns;
+    public boolean groupByExpression;
 
     // aggregation
     public Set<TblColRef> metricColumns;
@@ -85,7 +86,7 @@ public SQLCall(String function, Iterable<Object> args) {
 
     public SQLDigest(String factTable, Set<TblColRef> allColumns, List<JoinDesc> joinDescs, // model
             List<TblColRef> groupbyColumns, Set<TblColRef> subqueryJoinParticipants,
-            Map<TblColRef, TupleExpression> dynGroupByColumns, // group by
+            Map<TblColRef, TupleExpression> dynGroupByColumns, boolean groupByExpression, // group by
             Set<TblColRef> metricColumns, List<FunctionDesc> aggregations, List<SQLCall> aggrSqlCalls, // aggregation
             List<DynamicFunctionDesc> dynAggregations, //
             Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, // dynamic col related columns
@@ -101,6 +102,7 @@ public SQLDigest(String factTable, Set<TblColRef> allColumns, List<JoinDesc> joi
         this.subqueryJoinParticipants = subqueryJoinParticipants;
 
         this.dynGroupbyColumns = dynGroupByColumns;
+        this.groupByExpression = groupByExpression;
 
         this.metricColumns = metricColumns;
         this.aggregations = aggregations;
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
index 68c4113793..8d82873e00 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
@@ -163,7 +163,7 @@ public GTCubeStorageQueryRequest getStorageQueryRequest(StorageContext context,
 
         // exactAggregation mean: needn't aggregation at storage and query engine both.
         boolean exactAggregation = isExactAggregation(context, cuboid, groups, otherDimsD, singleValuesD,
-                derivedPostAggregation, sqlDigest.aggregations, sqlDigest.aggrSqlCalls);
+                derivedPostAggregation, sqlDigest.aggregations, sqlDigest.aggrSqlCalls, sqlDigest.groupByExpression);
         context.setExactAggregation(exactAggregation);
 
         // replace derived columns in filter with host columns; columns on loosened condition must be added to group by
@@ -557,7 +557,7 @@ private TupleFilter checkHavingCanPushDown(TupleFilter havingFilter, Set<TblColR
 
     private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups,
             Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation,
-            Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls) {
+            Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) {
         if (context.isNeedStorageAggregation()) {
             logger.info("exactAggregation is false because need storage aggregation");
             return false;
@@ -605,6 +605,12 @@ private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collec
             }
         }
 
+        // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly
+        if (groupByExpression) {
+            logger.info("exactAggregation is false because group by expression");
+            return false;
+        }
+
         logger.info("exactAggregation is true, cuboid id is {0}", String.valueOf(cuboid.getId()));
         return true;
     }
diff --git a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
index 61aa5602e2..3f8dccc35a 100644
--- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
@@ -142,6 +142,7 @@ private int search(List<TblColRef> groups, List<FunctionDesc> aggregations, Tupl
             SQLDigest sqlDigest = new SQLDigest("default.test_kylin_fact", /*allCol*/ Collections.<TblColRef> emptySet(), /*join*/ null, //
                     groups, /*subqueryJoinParticipants*/ Sets.<TblColRef> newHashSet(), //
                     /*dynamicGroupByColumns*/ Collections.<TblColRef, TupleExpression> emptyMap(), //
+                    /*groupByExpression*/ false, //
                     /*metricCol*/ Collections.<TblColRef> emptySet(), aggregations, /*aggrSqlCalls*/ Collections.<SQLCall> emptyList(), //
                     /*dynamicAggregations*/ Collections.<DynamicFunctionDesc> emptyList(), //
                     /*runtimeDimensionColumns*/ Collections.<TblColRef> emptySet(), //
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
index 5ea043746a..fabd854e2b 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
@@ -85,6 +85,7 @@
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+import static org.apache.kylin.metadata.expression.TupleExpression.ExpressionOperatorEnum.COLUMN;
 /**
  */
 public class OLAPAggregateRel extends Aggregate implements OLAPRel {
@@ -267,6 +268,12 @@ void buildGroups() {
         this.groups = Lists.newArrayList();
         for (int i = getGroupSet().nextSetBit(0); i >= 0; i = getGroupSet().nextSetBit(i + 1)) {
             TupleExpression tupleExpression = inputColumnRowType.getSourceColumnsByIndex(i);
+
+            // group by column with operator
+            if (this.context.groupByExpression == false && !(COLUMN.equals(tupleExpression.getOperator()) && tupleExpression.getChildren().isEmpty())) {
+                this.context.groupByExpression = true;
+            }
+
             TblColRef groupOutCol = inputColumnRowType.getColumnByIndex(i);
             if (tupleExpression instanceof ColumnTupleExpression) {
                 this.groups.add(((ColumnTupleExpression) tupleExpression).getColumn());
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
index e530b725c2..c0d010b931 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
@@ -133,6 +133,7 @@ public OLAPContext(int seq) {
     public boolean afterJoin = false;
     public boolean hasJoin = false;
     public boolean hasWindow = false;
+    public boolean groupByExpression = false; // checkout if group by column has operator
 
     // cube metadata
     public IRealization realization;
@@ -190,7 +191,7 @@ public SQLDigest getSQLDigest() {
                 }
             }
             sqlDigest = new SQLDigest(firstTableScan.getTableName(), allColumns, joins, // model
-                    groupByColumns, subqueryJoinParticipants, dynGroupBy, // group by
+                    groupByColumns, subqueryJoinParticipants, dynGroupBy, groupByExpression, // group by
                     metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // aggregation
                     rtDimColumns, rtMetricColumns, // runtime related columns
                     filterColumns, filter, havingFilter, // filter


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services