You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/03/26 17:04:20 UTC

[hive] branch master updated: HIVE-21395: Refactor HiveSemiJoinRule (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d07cf0d  HIVE-21395: Refactor HiveSemiJoinRule (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
d07cf0d is described below

commit d07cf0d46d805738c034c24f7df978beff9f2b18
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Tue Mar 26 10:03:32 2019 -0700

    HIVE-21395: Refactor HiveSemiJoinRule (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
---
 .../optimizer/calcite/rules/HiveSemiJoinRule.java  | 76 ++++++++++++----------
 1 file changed, 41 insertions(+), 35 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
index 07ce762..e63f163 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
@@ -22,7 +22,6 @@ import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptRuleOperand;
 import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.AggregateCall;
@@ -67,32 +66,33 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
   public static final HiveAggregateToSemiJoinRule INSTANCE_AGGREGATE =
       new HiveAggregateToSemiJoinRule(HiveRelFactories.HIVE_BUILDER);
 
-  private HiveSemiJoinRule(RelOptRuleOperand operand, RelBuilderFactory relBuilder) {
+  private HiveSemiJoinRule(final RelOptRuleOperand operand, final RelBuilderFactory relBuilder) {
     super(operand, relBuilder, null);
   }
 
-  private RelNode buildProject(final Aggregate aggregate, RexBuilder rexBuilder, RelBuilder relBuilder) {
-    assert(!aggregate.indicator && aggregate.getAggCallList().isEmpty());
+  private RelNode buildProject(final Aggregate aggregate, final RexBuilder rexBuilder,
+                               final RelBuilder relBuilder) {
+    assert (!aggregate.indicator && aggregate.getAggCallList().isEmpty());
     RelNode input = aggregate.getInput();
     List<Integer> groupingKeys = aggregate.getGroupSet().asList();
     List<RexNode> projects = new ArrayList<>();
-    for(Integer keys:groupingKeys) {
+    for (Integer keys:groupingKeys) {
       projects.add(rexBuilder.makeInputRef(input, keys.intValue()));
     }
     return relBuilder.push(aggregate.getInput()).project(projects).build();
   }
 
   private boolean needProject(final RelNode input, final RelNode aggregate) {
-    if((input instanceof HepRelVertex
-        && ((HepRelVertex)input).getCurrentRel() instanceof  Join)
+    if (input instanceof Join
         || input.getRowType().getFieldCount() != aggregate.getRowType().getFieldCount()) {
       return true;
     }
     return false;
   }
 
-  protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs,
-                         RelNode topOperator, Join join, RelNode left, Aggregate aggregate) {
+  protected void perform(final RelOptRuleCall call, final ImmutableBitSet topRefs,
+                         final RelNode topOperator, final Join join, final RelNode left,
+                         final Aggregate aggregate, final RelNode aggregateInput) {
     LOG.debug("Matched HiveSemiJoinRule");
     final RelOptCluster cluster = join.getCluster();
     final RexBuilder rexBuilder = cluster.getRexBuilder();
@@ -109,7 +109,11 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
       // By the way, neither a super-set nor a sub-set would work.
       return;
     }
-    if(join.getJoinType() == JoinRelType.LEFT) {
+    if (!joinInfo.isEqui()) {
+      return;
+    }
+
+    if (join.getJoinType() == JoinRelType.LEFT) {
       // since for LEFT join we are only interested in rows from LEFT we can get rid of right side
       call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left)));
       return;
@@ -117,18 +121,15 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
     if (join.getJoinType() != JoinRelType.INNER) {
       return;
     }
-    if (!joinInfo.isEqui()) {
-      return;
-    }
+
     LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
     final List<Integer> newRightKeyBuilder = Lists.newArrayList();
     final List<Integer> aggregateKeys = aggregate.getGroupSet().asList();
     for (int key : joinInfo.rightKeys) {
       newRightKeyBuilder.add(aggregateKeys.get(key));
     }
-    RelNode input = aggregate.getInput();
-    final RelNode newRight = needProject(input, aggregate) ?
-        buildProject(aggregate, rexBuilder, call.builder()) : input;
+    final RelNode newRight = needProject(aggregateInput, aggregate)
+        ? buildProject(aggregate, rexBuilder, call.builder()) : aggregateInput;
     final RexNode newCondition =
         RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight,
                                            joinInfo.rightKeys, rexBuilder);
@@ -142,43 +143,45 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
   public static class HiveProjectToSemiJoinRule extends HiveSemiJoinRule {
 
     /** Creates a HiveProjectToSemiJoinRule. */
-    public HiveProjectToSemiJoinRule(RelBuilderFactory relBuilder) {
+    public HiveProjectToSemiJoinRule(final RelBuilderFactory relBuilder) {
       super(
           operand(Project.class,
-                  some(operand(Join.class,
+                  operand(Join.class,
                                some(
                                    operand(RelNode.class, any()),
-                                   operand(Aggregate.class, any()))))),
+                                   operand(Aggregate.class,
+                                           operand(RelNode.class, any()))))),
           relBuilder);
     }
 
-    @Override public void onMatch(RelOptRuleCall call) {
+    @Override public void onMatch(final RelOptRuleCall call) {
       final Project project = call.rel(0);
       final Join join = call.rel(1);
       final RelNode left = call.rel(2);
       final Aggregate aggregate = call.rel(3);
       final ImmutableBitSet topRefs =
           RelOptUtil.InputFinder.bits(project.getChildExps(), null);
-      perform(call, topRefs, project, join, left, aggregate);
+      final RelNode aggregateInput = call.rel(4);
+      perform(call, topRefs, project, join, left, aggregate, aggregateInput);
     }
   }
 
   /** SemiJoinRule that matches a Project on top of a Join with an Aggregate
-   * as its right child. */
+   * as its left child. */
   public static class HiveProjectToSemiJoinRuleSwapInputs extends HiveSemiJoinRule {
 
     /** Creates a HiveProjectToSemiJoinRule. */
-    public HiveProjectToSemiJoinRuleSwapInputs(RelBuilderFactory relBuilder) {
+    public HiveProjectToSemiJoinRuleSwapInputs(final RelBuilderFactory relBuilder) {
       super(
           operand(Project.class,
-                  some(operand(Join.class,
+                  operand(Join.class,
                                some(
-                                   operand(Aggregate.class, any()),
-                                   operand(RelNode.class, any()))))),
+                                   operand(Aggregate.class, operand(RelNode.class, any())),
+                                   operand(RelNode.class, any())))),
           relBuilder);
     }
 
-    private Project swapInputs(Join join, Project topProject, RelBuilder builder) {
+    private Project swapInputs(final Join join, final Project topProject, final RelBuilder builder) {
       RexBuilder rexBuilder = join.getCluster().getRexBuilder();
 
       int rightInputSize = join.getRight().getRowType().getFieldCount();
@@ -188,7 +191,7 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
       //swap the join inputs
       //adjust join condition
       int[] condAdjustments = new int[joinFields.size()];
-      for(int i=0; i<joinFields.size(); i++) {
+      for (int i=0; i<joinFields.size(); i++) {
         if(i < leftInputSize) {
           //left side refs need to be moved by right input size
           condAdjustments[i] = rightInputSize;
@@ -216,8 +219,9 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
     @Override public void onMatch(RelOptRuleCall call) {
       final Project project = call.rel(0);
       final Join join = call.rel(1);
-      final RelNode right = call.rel(3);
       final Aggregate aggregate = call.rel(2);
+      final RelNode aggreagateInput = call.rel(3);
+      final RelNode right = call.rel(4);
 
       // make sure the following conditions are met
       //  Join is INNER
@@ -250,7 +254,7 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
       final ImmutableBitSet swappedTopRefs =
           RelOptUtil.InputFinder.bits(swappedProject.getChildExps(), null);
 
-      perform(call, swappedTopRefs, swappedProject, (Join)swappedJoin, right, aggregate);
+      perform(call, swappedTopRefs, swappedProject, (Join)swappedJoin, right, aggregate, aggreagateInput);
     }
   }
 
@@ -259,21 +263,23 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
   public static class HiveAggregateToSemiJoinRule extends HiveSemiJoinRule {
 
     /** Creates a HiveAggregateToSemiJoinRule. */
-    public HiveAggregateToSemiJoinRule(RelBuilderFactory relBuilder) {
+    public HiveAggregateToSemiJoinRule(final RelBuilderFactory relBuilder) {
       super(
           operand(Aggregate.class,
-                  some(operand(Join.class,
+                  operand(Join.class,
                                some(
                                    operand(RelNode.class, any()),
-                                   operand(Aggregate.class, any()))))),
+                                   operand(Aggregate.class, operand(RelNode.class, any()))))),
           relBuilder);
     }
 
-    @Override public void onMatch(RelOptRuleCall call) {
+    @Override public void onMatch(final RelOptRuleCall call) {
       final Aggregate topAggregate = call.rel(0);
       final Join join = call.rel(1);
       final RelNode left = call.rel(2);
       final Aggregate aggregate = call.rel(3);
+      final RelNode aggregateInput = call.rel(4);
+
       // Gather columns used by aggregate operator
       final ImmutableBitSet.Builder topRefs = ImmutableBitSet.builder();
       topRefs.addAll(topAggregate.getGroupSet());
@@ -283,7 +289,7 @@ public abstract class HiveSemiJoinRule extends RelOptRule {
           topRefs.set(aggCall.filterArg);
         }
       }
-      perform(call, topRefs.build(), topAggregate, join, left, aggregate);
+      perform(call, topRefs.build(), topAggregate, join, left, aggregate, aggregateInput);
     }
   }