You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by da...@apache.org on 2019/10/14 02:01:50 UTC
[calcite] branch master updated: [CALCITE-3404] In
AggregateExpandDistinctAggregatesRule,
treat all the agg expressions as distinct if they have the same arguments
and the non-distinct expressions distinct constraints can be ignored
This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 8f5f251 [CALCITE-3404] In AggregateExpandDistinctAggregatesRule, treat all the agg expressions as distinct if they have the same arguments and the non-distinct expressions distinct constraints can be ignored
8f5f251 is described below
commit 8f5f251123ee9cc036cada902c4795db3869ffa0
Author: yuzhao.cyz <yu...@alibaba-inc.com>
AuthorDate: Sat Oct 12 11:51:58 2019 +0800
[CALCITE-3404] In AggregateExpandDistinctAggregatesRule, treat all the agg expressions as distinct if they have the same arguments and the non-distinct expressions distinct constraints can be ignored
In AggregateExpandDistinctAggregatesRule, if all of the agg expressions
have the same arguments, and all the non-distinct agg expressions are
with Optionality.IGNORED, then all the agg expressions can be treated as
distinct and the plan can be promoted.
---
.../AggregateExpandDistinctAggregatesRule.java | 109 +++++++++++++--------
.../org/apache/calcite/test/RelOptRulesTest.java | 47 +++++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 80 +++++++++++++++
3 files changed, 193 insertions(+), 43 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java b/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
index 1a75136..0cdd0dd 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/AggregateExpandDistinctAggregatesRule.java
@@ -40,6 +40,7 @@ import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Optionality;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;
@@ -49,6 +50,7 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
@@ -58,6 +60,8 @@ import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Planner rule that expands distinct aggregates
@@ -125,44 +129,65 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
}
// Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
- int nonDistinctAggCallCount = 0; // find all aggregate calls without distinct
- int filterCount = 0;
- int unsupportedNonDistinctAggCallCount = 0;
- final Set<Pair<List<Integer>, Integer>> argLists = new LinkedHashSet<>();
- for (AggregateCall aggCall : aggregate.getAggCallList()) {
- if (aggCall.filterArg >= 0) {
- ++filterCount;
- }
- if (!aggCall.isDistinct()) {
- ++nonDistinctAggCallCount;
- final SqlKind aggCallKind = aggCall.getAggregation().getKind();
- // We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
- switch (aggCallKind) {
- case COUNT:
- case SUM:
- case SUM0:
- case MIN:
- case MAX:
- break;
- default:
- ++unsupportedNonDistinctAggCallCount;
- }
- } else {
- argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
- }
- }
-
- final int distinctAggCallCount =
- aggregate.getAggCallList().size() - nonDistinctAggCallCount;
- Preconditions.checkState(argLists.size() > 0, "containsDistinctCall lied");
+ final List<AggregateCall> aggCalls = aggregate.getAggCallList();
+ // Find all aggregate calls with distinct
+ final List<AggregateCall> distinctAggCalls = aggCalls.stream()
+ .filter(AggregateCall::isDistinct).collect(Collectors.toList());
+ // Find all aggregate calls without distinct
+ final List<AggregateCall> nonDistinctAggCalls = aggCalls.stream()
+ .filter(aggCall -> !aggCall.isDistinct()).collect(Collectors.toList());
+ final long filterCount = aggCalls.stream()
+ .filter(aggCall -> aggCall.filterArg >= 0).count();
+ final long unsupportedNonDistinctAggCallCount = nonDistinctAggCalls.stream()
+ .filter(aggCall -> {
+ final SqlKind aggCallKind = aggCall.getAggregation().getKind();
+ // We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
+ switch (aggCallKind) {
+ case COUNT:
+ case SUM:
+ case SUM0:
+ case MIN:
+ case MAX:
+ return false;
+ default:
+ return true;
+ }
+ }).count();
+ // Argument list of distinct agg calls.
+ final Set<Pair<List<Integer>, Integer>> distinctCallArgLists = distinctAggCalls.stream()
+ .map(aggCall -> Pair.of(aggCall.getArgList(), aggCall.filterArg))
+ .collect(Collectors.toCollection(LinkedHashSet::new));
+
+ Preconditions.checkState(distinctCallArgLists.size() > 0,
+ "containsDistinctCall lied");
// If all of the agg expressions are distinct and have the same
// arguments then we can use a more efficient form.
- if (nonDistinctAggCallCount == 0
- && argLists.size() == 1
+
+ // MAX, MIN, BIT_AND, BIT_OR always ignore distinct attribute,
+ // when they are mixed in with other distinct agg calls,
+ // we can still use this promotion.
+
+ // Treat the agg expression with Optionality.IGNORED as distinct and
+ // re-statistic the non-distinct agg call count and the distinct agg
+ // call arguments.
+ final List<AggregateCall> nonDistinctAggCallsOfIgnoredOptionality =
+ nonDistinctAggCalls.stream().filter(aggCall ->
+ aggCall.getAggregation().getDistinctOptionality() == Optionality.IGNORED)
+ .collect(Collectors.toList());
+ // Different with distinctCallArgLists, this list also contains args that come from
+ // agg call which can ignore the distinct constraint.
+ final Set<Pair<List<Integer>, Integer>> distinctCallArgLists2 =
+ Stream.of(distinctAggCalls, nonDistinctAggCallsOfIgnoredOptionality)
+ .flatMap(Collection::stream)
+ .map(aggCall -> Pair.of(aggCall.getArgList(), aggCall.filterArg))
+ .collect(Collectors.toCollection(LinkedHashSet::new));
+
+ if ((nonDistinctAggCalls.size() - nonDistinctAggCallsOfIgnoredOptionality.size()) == 0
+ && distinctCallArgLists2.size() == 1
&& aggregate.getGroupType() == Group.SIMPLE) {
final Pair<List<Integer>, Integer> pair =
- Iterables.getOnlyElement(argLists);
+ Iterables.getOnlyElement(distinctCallArgLists2);
final RelBuilder relBuilder = call.builder();
convertMonopole(relBuilder, aggregate, pair.left, pair.right);
call.transformTo(relBuilder.build());
@@ -176,12 +201,12 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
// If only one distinct aggregate and one or more non-distinct aggregates,
// we can generate multi-phase aggregates
- if (distinctAggCallCount == 1 // one distinct aggregate
+ if (distinctAggCalls.size() == 1 // one distinct aggregate
&& filterCount == 0 // no filter
&& unsupportedNonDistinctAggCallCount == 0 // sum/min/max/count in non-distinct aggregate
- && nonDistinctAggCallCount > 0) { // one or more non-distinct aggregates
+ && nonDistinctAggCalls.size() > 0) { // one or more non-distinct aggregates
final RelBuilder relBuilder = call.builder();
- convertSingletonDistinct(relBuilder, aggregate, argLists);
+ convertSingletonDistinct(relBuilder, aggregate, distinctCallArgLists);
call.transformTo(relBuilder.build());
return;
}
@@ -229,7 +254,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
// For each set of operands, find and rewrite all calls which have that
// set of operands.
- for (Pair<List<Integer>, Integer> argList : argLists) {
+ for (Pair<List<Integer>, Integer> argList : distinctCallArgLists) {
doRewrite(relBuilder, aggregate, n++, argList.left, argList.right, refs);
}
@@ -274,8 +299,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
// Add the distinct aggregate column(s) to the group-by columns,
// if not already a part of the group-by
- final SortedSet<Integer> bottomGroups = new TreeSet<>();
- bottomGroups.addAll(aggregate.getGroupSet().asList());
+ final SortedSet<Integer> bottomGroups = new TreeSet<>(aggregate.getGroupSet().asList());
for (AggregateCall aggCall : originalAggCalls) {
if (aggCall.isDistinct()) {
bottomGroups.addAll(aggCall.getArgList());
@@ -736,7 +760,8 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
// arguments. If we're rewriting aggregates whose args are {sal}, we will
// rewrite COUNT(DISTINCT sal) and SUM(DISTINCT sal) but ignore
// COUNT(DISTINCT gender) or SUM(sal).
- if (!aggCall.isDistinct()) {
+ if (!aggCall.isDistinct()
+ && aggCall.getAggregation().getDistinctOptionality() != Optionality.IGNORED) {
continue;
}
if (!aggCall.getArgList().equals(argList)) {
@@ -823,9 +848,7 @@ public final class AggregateExpandDistinctAggregatesRule extends RelOptRule {
RexNode condition =
rexBuilder.makeCall(SqlStdOperatorTable.CASE, filterRef,
argRef.left,
- rexBuilder.ensureType(argRef.left.getType(),
- rexBuilder.makeNullLiteral(argRef.left.getType()),
- true));
+ rexBuilder.makeNullLiteral(argRef.left.getType()));
sourceOf.put(arg, projects.size());
projects.add(Pair.of(condition, "i$" + argRef.right));
continue;
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 4d7a4b9..5a852aa 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -6497,6 +6497,53 @@ public class RelOptRulesTest extends RelOptTestBase {
String planAfter = NL + RelOptUtil.toString(relAfter);
getDiffRepos().assertEquals("planAfter", "${planAfter}", planAfter);
}
+
+ /**
+ * Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-3404">[CALCITE-3404]
+ * Treat agg expressions that can ignore distinct constraint as distinct
+ * in AggregateExpandDistinctAggregatesRule
+ * when all the other agg expressions are distinct and have same arguments</a>
+ */
+ @Test public void testMaxReuseDistinctAttrWithMixedOptionality() {
+ final String sql = "select sum(distinct deptno), count(distinct deptno), "
+ + "max(deptno) from emp";
+
+ HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+ .build();
+ sql(sql).with(program).check();
+ }
+
+ @Test public void testMinReuseDistinctAttrWithMixedOptionality() {
+ final String sql = "select sum(distinct deptno), count(distinct deptno), "
+ + "min(deptno) from emp";
+
+ HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+ .build();
+ sql(sql).with(program).check();
+ }
+
+ @Test public void testBitAndReuseDistinctAttrWithMixedOptionality() {
+ final String sql = "select sum(distinct deptno), count(distinct deptno), "
+ + "bit_and(deptno) from emp";
+
+ HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+ .build();
+ sql(sql).with(program).check();
+ }
+
+ @Test public void testBitOrReuseDistinctAttrWithMixedOptionality() {
+ final String sql = "select sum(distinct deptno), count(distinct deptno), "
+ + "bit_or(deptno) from emp";
+
+ HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(AggregateExpandDistinctAggregatesRule.INSTANCE)
+ .build();
+ sql(sql).with(program).check();
+ }
}
// End RelOptRulesTest.java
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 3ecd662..d4f5ef5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -11833,4 +11833,84 @@ EnumerableProject(FNAME=[$1], LNAME=[$2])
]]>
</Resource>
</TestCase>
+ <TestCase name="testMaxReuseDistinctAttrWithMixedOptionality">
+ <Resource name="sql">
+ <![CDATA[select sum(distinct deptno), count(distinct deptno), max(deptno) from emp]]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[MAX($0)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[MAX($0)])
+ LogicalAggregate(group=[{0}])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testMinReuseDistinctAttrWithMixedOptionality">
+ <Resource name="sql">
+ <![CDATA[select sum(distinct deptno), count(distinct deptno), min(deptno) from emp]]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[MIN($0)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[MIN($0)])
+ LogicalAggregate(group=[{0}])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testBitAndReuseDistinctAttrWithMixedOptionality">
+ <Resource name="sql">
+ <![CDATA[select sum(distinct deptno), count(distinct deptno), bit_and(deptno) from emp]]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[BIT_AND($0)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[BIT_AND($0)])
+ LogicalAggregate(group=[{0}])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testBitOrReuseDistinctAttrWithMixedOptionality">
+ <Resource name="sql">
+ <![CDATA[select sum(distinct deptno), count(distinct deptno), bit_or(deptno) from emp]]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM(DISTINCT $0)], EXPR$1=[COUNT(DISTINCT $0)], EXPR$2=[BIT_OR($0)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{}], EXPR$0=[SUM($0)], EXPR$1=[COUNT($0)], EXPR$2=[BIT_OR($0)])
+ LogicalAggregate(group=[{0}])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
</Root>