You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by da...@apache.org on 2019/10/14 02:01:50 UTC

[calcite] branch master updated: [CALCITE-3404] In AggregateExpandDistinctAggregatesRule, treat all the agg expressions as distinct if they have the same arguments and the non-distinct expressions distinct constraints can be ignored

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f5f251  [CALCITE-3404] In AggregateExpandDistinctAggregatesRule, treat all the agg expressions as distinct if they have the same arguments and the non-distinct expressions distinct constraints can be ignored
8f5f251 is described below

commit 8f5f251123ee9cc036cada902c4795db3869ffa0
Author: yuzhao.cyz <yu...@alibaba-inc.com>
AuthorDate: Sat Oct 12 11:51:58 2019 +0800

    [CALCITE-3404] In AggregateExpandDistinctAggregatesRule, treat all the agg expressions as distinct if they have the same arguments and the non-distinct expressions distinct constraints can be ignored
    
    In AggregateExpandDistinctAggregatesRule, if all of the agg expressions
    have the same arguments, and all the non-distinct agg expressions are
    with Optionality.IGNORED, then all the agg expressions can be treated as
    distinct and the plan can be promoted.
---
 .../AggregateExpandDistinctAggregatesRule.java     | 109 +++++++++++++--------
 .../org/apache/calcite/test/RelOptRulesTest.java   |  47 +++++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    |  80 +++++++++++++++
 3 files changed, 193 insertions(+), 43 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java b/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
index 1a75136..0cdd0dd 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
@@ -40,6 +40,7 @@ import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.tools.RelBuilderFactory;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Optionality;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.Util;
 
@@ -49,6 +50,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -58,6 +60,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Planner rule that expands distinct aggregates
@@ -125,44 +129,65 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
     }
 
     // Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
-    int nonDistinctAggCallCount = 0;  // find all aggregate calls without distinct
-    int filterCount = 0;
-    int unsupportedNonDistinctAggCallCount = 0;
-    final Set<Pair<List<Integer>, Integer>> argLists = new LinkedHashSet<>();
-    for (AggregateCall aggCall : aggregate.getAggCallList()) {
-      if (aggCall.filterArg >= 0) {
-        ++filterCount;
-      }
-      if (!aggCall.isDistinct()) {
-        ++nonDistinctAggCallCount;
-        final SqlKind aggCallKind = aggCall.getAggregation().getKind();
-        // We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
-        switch (aggCallKind) {
-        case COUNT:
-        case SUM:
-        case SUM0:
-        case MIN:
-        case MAX:
-          break;
-        default:
-          ++unsupportedNonDistinctAggCallCount;
-        }
-      } else {
-        argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
-      }
-    }
-
-    final int distinctAggCallCount =
-        aggregate.getAggCallList().size() - nonDistinctAggCallCount;
-    Preconditions.checkState(argLists.size() > 0, "containsDistinctCall lied");
+    final List<AggregateCall> aggCalls = aggregate.getAggCallList();
+    // Find all aggregate calls with distinct
+    final List<AggregateCall> distinctAggCalls = aggCalls.stream()
+        .filter(AggregateCall::isDistinct).collect(Collectors.toList());
+    // Find all aggregate calls without distinct
+    final List<AggregateCall> nonDistinctAggCalls = aggCalls.stream()
+        .filter(aggCall -> !aggCall.isDistinct()).collect(Collectors.toList());
+    final long filterCount = aggCalls.stream()
+        .filter(aggCall -> aggCall.filterArg >= 0).count();
+    final long unsupportedNonDistinctAggCallCount = nonDistinctAggCalls.stream()
+        .filter(aggCall -> {
+          final SqlKind aggCallKind = aggCall.getAggregation().getKind();
+          // We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
+          switch (aggCallKind) {
+          case COUNT:
+          case SUM:
+          case SUM0:
+          case MIN:
+          case MAX:
+            return false;
+          default:
+            return true;
+          }
+        }).count();
+    // Argument list of distinct agg calls.
+    final Set<Pair<List<Integer>, Integer>> distinctCallArgLists = distinctAggCalls.stream()
+        .map(aggCall -> Pair.of(aggCall.getArgList(), aggCall.filterArg))
+        .collect(Collectors.toCollection(LinkedHashSet::new));
+
+    Preconditions.checkState(distinctCallArgLists.size() > 0,
+        "containsDistinctCall lied");
 
     // If all of the agg expressions are distinct and have the same
     // arguments then we can use a more efficient form.
-    if (nonDistinctAggCallCount == 0
-        && argLists.size() == 1
+
+    // MAX, MIN, BIT_AND, BIT_OR always ignore distinct attribute,
+    // when they are mixed in with other distinct agg calls,
+    // we can still use this promotion.
+
+    // Treat the agg expression with Optionality.IGNORED as distinct and
+    // re-statistic the non-distinct agg call count and the distinct agg
+    // call arguments.
+    final List<AggregateCall> nonDistinctAggCallsOfIgnoredOptionality =
+        nonDistinctAggCalls.stream().filter(aggCall ->
+            aggCall.getAggregation().getDistinctOptionality() == Optionality.IGNORED)
+            .collect(Collectors.toList());
+    // Different with distinctCallArgLists, this list also contains args that come from
+    // agg call which can ignore the distinct constraint.
+    final Set<Pair<List<Integer>, Integer>> distinctCallArgLists2 =
+        Stream.of(distinctAggCalls, nonDistinctAggCallsOfIgnoredOptionality)
+            .flatMap(Collection::stream)
+            .map(aggCall -> Pair.of(aggCall.getArgList(), aggCall.filterArg))
+            .collect(Collectors.toCollection(LinkedHashSet::new));
+
+    if ((nonDistinctAggCalls.size() - nonDistinctAggCallsOfIgnoredOptionality.size()) == 0
+        && distinctCallArgLists2.size() == 1
         && aggregate.getGroupType() == Group.SIMPLE) {
       final Pair<List<Integer>, Integer> pair =
-          Iterables.getOnlyElement(argLists);
+          Iterables.getOnlyElement(distinctCallArgLists2);
       final RelBuilder relBuilder = call.builder();
       convertMonopole(relBuilder, aggregate, pair.left, pair.right);
       call.transformTo(relBuilder.build());
@@ -176,12 +201,12 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
 
     // If only one distinct aggregate and one or more non-distinct aggregates,
     // we can generate multi-phase aggregates
-    if (distinctAggCallCount == 1 // one distinct aggregate
+    if (distinctAggCalls.size() == 1 // one distinct aggregate
         && filterCount == 0 // no filter
         && unsupportedNonDistinctAggCallCount == 0 // sum/min/max/count in non-distinct aggregate
-        && nonDistinctAggCallCount > 0) { // one or more non-distinct aggregates
+        && nonDistinctAggCalls.size() > 0) { // one or more non-distinct aggregates
       final RelBuilder relBuilder = call.builder();
-      convertSingletonDistinct(relBuilder, aggregate, argLists);
+      convertSingletonDistinct(relBuilder, aggregate, distinctCallArgLists);
       call.transformTo(relBuilder.build());
       return;
     }
@@ -229,7 +254,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
 
     // For each set of operands, find and rewrite all calls which have that
     // set of operands.
-    for (Pair<List<Integer>, Integer> argList : argLists) {
+    for (Pair<List<Integer>, Integer> argList : distinctCallArgLists) {
       doRewrite(relBuilder, aggregate, n++, argList.left, argList.right, refs);
     }
 
@@ -274,8 +299,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
 
     // Add the distinct aggregate column(s) to the group-by columns,
     // if not already a part of the group-by
-    final SortedSet<Integer> bottomGroups = new TreeSet<>();
-    bottomGroups.addAll(aggregate.getGroupSet().asList());
+    final SortedSet<Integer> bottomGroups = new TreeSet<>(aggregate.getGroupSet().asList());
     for (AggregateCall aggCall : originalAggCalls) {
       if (aggCall.isDistinct()) {
         bottomGroups.addAll(aggCall.getArgList());
@@ -736,7 +760,8 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
       // arguments. If we're rewriting aggregates whose args are {sal}, we will
       // rewrite COUNT(DISTINCT sal) and SUM(DISTINCT sal) but ignore
       // COUNT(DISTINCT gender) or SUM(sal).
-      if (!aggCall.isDistinct()) {
+      if (!aggCall.isDistinct()
+          && aggCall.getAggregation().getDistinctOptionality() != Optionality.IGNORED) {
         continue;
       }
       if (!aggCall.getArgList().equals(argList)) {
@@ -823,9 +848,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
         RexNode condition =
             rexBuilder.makeCall(SqlStdOperatorTable.CASE, filterRef,
                 argRef.left,
-                rexBuilder.ensureType(argRef.left.getType(),
-                    rexBuilder.makeNullLiteral(argRef.left.getType()),
-                    true));
+                rexBuilder.makeNullLiteral(argRef.left.getType()));
         sourceOf.put(arg, projects.size());
         projects.add(Pair.of(condition, "i$" + argRef.right));
         continue;
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 4d7a4b9..5a852aa 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -6497,6 +6497,53 @@ public class RelOptRulesTest extends RelOptTestBase {
     String planAfter = NL + RelOptUtil.toString(relAfter);
     getDiffRepos().assertEquals("planAfter", "${planAfter}", planAfter);
   }
+
+  /**
+   * Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-3404">[CALCITE-3404]
+   * Treat agg expressions that can ignore distinct constraint as distinct
+   * in AggregateExpandDistinctAggregatesRule
+   * when all the other agg expressions are distinct and have same arguments</a>
+   */
+  @Test public void testMaxReuseDistinctAttrWithMixedOptionality() {
+    final String sql = "select sum(distinct deptno), count(distinct deptno), "
+        + "max(deptno) from emp";
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+        .build();
+    sql(sql).with(program).check();
+  }
+
+  @Test public void testMinReuseDistinctAttrWithMixedOptionality() {
+    final String sql = "select sum(distinct deptno), count(distinct deptno), "
+        + "min(deptno) from emp";
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+        .build();
+    sql(sql).with(program).check();
+  }
+
+  @Test public void testBitAndReuseDistinctAttrWithMixedOptionality() {
+    final String sql = "select sum(distinct deptno), count(distinct deptno), "
+        + "bit_and(deptno) from emp";
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+        .build();
+    sql(sql).with(program).check();
+  }
+
+  @Test public void testBitOrReuseDistinctAttrWithMixedOptionality() {
+    final String sql = "select sum(distinct deptno), count(distinct deptno), "
+        + "bit_or(deptno) from emp";
+
+    HepProgram program = new HepProgramBuilder()
+        .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+        .build();
+    sql(sql).with(program).check();
+  }
 }
 
 // End RelOptRulesTest.java
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 3ecd662..d4f5ef5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -11833,4 +11833,84 @@ EnumerableProject(FNAME=[$1], LNAME=[$2])
 ]]>
         </Resource>
     </TestCase>
+    <TestCase name="testMaxReuseDistinctAttrWithMixedOptionality">
+        <Resource name="sql">
+            <![CDATA[select sum(distinct deptno), count(distinct deptno), max(deptno) from emp]]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[MAX($0)])
+  LogicalProject(DEPTNO=[$7])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[MAX($0)])
+  LogicalAggregate(group=[{0}])
+    LogicalProject(DEPTNO=[$7])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+    </TestCase>
+    <TestCase name="testMinReuseDistinctAttrWithMixedOptionality">
+        <Resource name="sql">
+            <![CDATA[select sum(distinct deptno), count(distinct deptno), min(deptno) from emp]]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[MIN($0)])
+  LogicalProject(DEPTNO=[$7])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[MIN($0)])
+  LogicalAggregate(group=[{0}])
+    LogicalProject(DEPTNO=[$7])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+    </TestCase>
+    <TestCase name="testBitAndReuseDistinctAttrWithMixedOptionality">
+        <Resource name="sql">
+            <![CDATA[select sum(distinct deptno), count(distinct deptno), bit_and(deptno) from emp]]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[BIT_AND($0)])
+  LogicalProject(DEPTNO=[$7])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[BIT_AND($0)])
+  LogicalAggregate(group=[{0}])
+    LogicalProject(DEPTNO=[$7])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+    </TestCase>
+    <TestCase name="testBitOrReuseDistinctAttrWithMixedOptionality">
+        <Resource name="sql">
+            <![CDATA[select sum(distinct deptno), count(distinct deptno), bit_or(deptno) from emp]]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[BIT_OR($0)])
+  LogicalProject(DEPTNO=[$7])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[BIT_OR($0)])
+  LogicalAggregate(group=[{0}])
+    LogicalProject(DEPTNO=[$7])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+    </TestCase>
 </Root>