You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/07/18 12:15:19 UTC
hive git commit: HIVE-11284: Fix cbo_rp_join0 failure on master
(Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 7b17df150 -> a5cc034bf
HIVE-11284: Fix cbo_rp_join0 failure on master (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a5cc034b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a5cc034b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a5cc034b
Branch: refs/heads/master
Commit: a5cc034bfa5e0ef7ce17f537abca3b35b23ecd16
Parents: 7b17df1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Fri Jul 17 18:35:14 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Jul 18 11:14:13 2015 +0100
----------------------------------------------------------------------
.../rules/HiveJoinProjectTransposeRule.java | 223 +++++++++++++++++++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 1 +
.../results/clientpositive/cbo_rp_join0.q.out | 4 +-
3 files changed, 226 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a5cc034b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
index 40bf043..fd8f5cb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
@@ -17,10 +17,27 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
import org.apache.calcite.rel.rules.JoinProjectTransposeRule;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLocalRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexProgram;
+import org.apache.calcite.rex.RexProgramBuilder;
+import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
@@ -50,11 +67,217 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
"JoinProjectTransposeRule(Other-Project)",
HiveProject.DEFAULT_PROJECT_FACTORY);
+ private final ProjectFactory projectFactory;
+
private HiveJoinProjectTransposeRule(
RelOptRuleOperand operand,
String description, ProjectFactory pFactory) {
super(operand, description, pFactory);
+ this.projectFactory = pFactory;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Join joinRel = call.rel(0);
+ JoinRelType joinType = joinRel.getJoinType();
+
+ Project leftProj;
+ Project rightProj;
+ RelNode leftJoinChild;
+ RelNode rightJoinChild;
+
+ // see if at least one input's projection doesn't generate nulls
+ if (hasLeftChild(call)) {
+ leftProj = call.rel(1);
+ leftJoinChild = getProjectChild(call, leftProj, true);
+ } else {
+ leftProj = null;
+ leftJoinChild = call.rel(1);
+ }
+ if (hasRightChild(call)) {
+ rightProj = getRightChild(call);
+ rightJoinChild = getProjectChild(call, rightProj, false);
+ } else {
+ rightProj = null;
+ rightJoinChild = joinRel.getRight();
+ }
+ if ((leftProj == null) && (rightProj == null)) {
+ return;
+ }
+
+ // Construct two RexPrograms and combine them. The bottom program
+ // is a join of the projection expressions from the left and/or
+ // right projects that feed into the join. The top program contains
+ // the join condition.
+
+ // Create a row type representing a concatenation of the inputs
+ // underneath the projects that feed into the join. This is the input
+ // into the bottom RexProgram. Note that the join type is an inner
+ // join because the inputs haven't actually been joined yet.
+ RelDataType joinChildrenRowType =
+ Join.deriveJoinRowType(
+ leftJoinChild.getRowType(),
+ rightJoinChild.getRowType(),
+ JoinRelType.INNER,
+ joinRel.getCluster().getTypeFactory(),
+ null,
+ Collections.<RelDataTypeField>emptyList());
+
+ // Create projection expressions, combining the projection expressions
+ // from the projects that feed into the join. For the RHS projection
+ // expressions, shift them to the right by the number of fields on
+ // the LHS. If the join input was not a projection, simply create
+ // references to the inputs.
+ int nProjExprs = joinRel.getRowType().getFieldCount();
+ List<Pair<RexNode, String>> projects =
+ new ArrayList<Pair<RexNode, String>>();
+ RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+
+ createProjectExprs(
+ leftProj,
+ leftJoinChild,
+ 0,
+ rexBuilder,
+ joinChildrenRowType.getFieldList(),
+ projects);
+
+ List<RelDataTypeField> leftFields =
+ leftJoinChild.getRowType().getFieldList();
+ int nFieldsLeft = leftFields.size();
+ createProjectExprs(
+ rightProj,
+ rightJoinChild,
+ nFieldsLeft,
+ rexBuilder,
+ joinChildrenRowType.getFieldList(),
+ projects);
+
+ List<RelDataType> projTypes = new ArrayList<RelDataType>();
+ for (int i = 0; i < nProjExprs; i++) {
+ projTypes.add(projects.get(i).left.getType());
+ }
+ RelDataType projRowType =
+ rexBuilder.getTypeFactory().createStructType(
+ projTypes,
+ Pair.right(projects));
+
+ // create the RexPrograms and merge them
+ RexProgram bottomProgram =
+ RexProgram.create(
+ joinChildrenRowType,
+ Pair.left(projects),
+ null,
+ projRowType,
+ rexBuilder);
+ RexProgramBuilder topProgramBuilder =
+ new RexProgramBuilder(
+ projRowType,
+ rexBuilder);
+ topProgramBuilder.addIdentity();
+ topProgramBuilder.addCondition(joinRel.getCondition());
+ RexProgram topProgram = topProgramBuilder.getProgram();
+ RexProgram mergedProgram =
+ RexProgramBuilder.mergePrograms(
+ topProgram,
+ bottomProgram,
+ rexBuilder);
+
+ // expand out the join condition and construct a new LogicalJoin that
+ // directly references the join children without the intervening
+ // ProjectRels
+ RexNode newCondition =
+ mergedProgram.expandLocalRef(
+ mergedProgram.getCondition());
+ Join newJoinRel =
+ joinRel.copy(joinRel.getTraitSet(), newCondition,
+ leftJoinChild, rightJoinChild, joinRel.getJoinType(),
+ joinRel.isSemiJoinDone());
+
+ // expand out the new projection expressions; if the join is an
+ // outer join, modify the expressions to reference the join output
+ List<RexNode> newProjExprs = new ArrayList<RexNode>();
+ List<RexLocalRef> projList = mergedProgram.getProjectList();
+ List<RelDataTypeField> newJoinFields =
+ newJoinRel.getRowType().getFieldList();
+ int nJoinFields = newJoinFields.size();
+ int[] adjustments = new int[nJoinFields];
+ for (int i = 0; i < nProjExprs; i++) {
+ RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i));
+ if (joinType != JoinRelType.INNER) {
+ newExpr =
+ newExpr.accept(
+ new RelOptUtil.RexInputConverter(
+ rexBuilder,
+ joinChildrenRowType.getFieldList(),
+ newJoinFields,
+ adjustments));
+ }
+ newProjExprs.add(newExpr);
+ }
+
+ // finally, create the projection on top of the join
+ RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs,
+ joinRel.getRowType().getFieldNames());
+
+ call.transformTo(newProjRel);
}
+ /**
+ * Creates projection expressions corresponding to one of the inputs into
+ * the join
+ *
+ * @param projRel the projection input into the join (if it exists)
+ * @param joinChild the child of the projection input (if there is a
+ * projection); otherwise, this is the join input
+ * @param adjustmentAmount the amount the expressions need to be shifted by
+ * @param rexBuilder rex builder
+ * @param joinChildrenFields concatenation of the fields from the left and
+ * right join inputs (once the projections have been
+ * removed)
+ * @param projects Projection expressions & names to be created
+ */
+ private void createProjectExprs(
+ Project projRel,
+ RelNode joinChild,
+ int adjustmentAmount,
+ RexBuilder rexBuilder,
+ List<RelDataTypeField> joinChildrenFields,
+ List<Pair<RexNode, String>> projects) {
+ List<RelDataTypeField> childFields =
+ joinChild.getRowType().getFieldList();
+ if (projRel != null) {
+ List<Pair<RexNode, String>> namedProjects =
+ projRel.getNamedProjects();
+ int nChildFields = childFields.size();
+ int[] adjustments = new int[nChildFields];
+ for (int i = 0; i < nChildFields; i++) {
+ adjustments[i] = adjustmentAmount;
+ }
+ for (Pair<RexNode, String> pair : namedProjects) {
+ RexNode e = pair.left;
+ if (adjustmentAmount != 0) {
+ // shift the references by the adjustment amount
+ e = e.accept(
+ new RelOptUtil.RexInputConverter(
+ rexBuilder,
+ childFields,
+ joinChildrenFields,
+ adjustments));
+ }
+ projects.add(Pair.of(e, pair.right));
+ }
+ } else {
+ // no projection; just create references to the inputs
+ for (int i = 0; i < childFields.size(); i++) {
+ final RelDataTypeField field = childFields.get(i);
+ projects.add(
+ Pair.of(
+ (RexNode) rexBuilder.makeInputRef(
+ field.getType(),
+ i + adjustmentAmount),
+ field.getName()));
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a5cc034b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 1ea236b..5b469e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -876,6 +876,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 6.1. Merge join into multijoin operators (if possible)
calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(),
HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT,
+ HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT,
HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
// The previous rules can pull up projections through join operators,
// thus we run the field trimmer again to push them back down
http://git-wip-us.apache.org/repos/asf/hive/blob/a5cc034b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
index 93fed08..a8bcc90 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
@@ -738,10 +738,10 @@ STAGE PLANS:
1 key (type: string)
2 key (type: string)
3 key (type: string)
- outputColumnNames: key, c_int, key0, c_int0, key1, c_int1
+ outputColumnNames: key, c_int, key0, c_int0, key1, c_int2
Statistics: Num rows: 1458 Data size: 389286 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int1 (type: int)
+ expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int2 (type: int)
outputColumnNames: key, c_int, p, q, x, b
Statistics: Num rows: 1458 Data size: 389286 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator