You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2019/11/18 02:41:05 UTC

[GitHub] [flink] TsReaper commented on a change in pull request #10174: [FLINK-14625][table-planner-blink] Add a rule to eliminate cross join as much as possible without statistics

TsReaper commented on a change in pull request #10174: [FLINK-14625][table-planner-blink] Add a rule to eliminate cross join as much as possible without statistics
URL: https://github.com/apache/flink/pull/10174#discussion_r347183638
 
 

 ##########
 File path: flink-table/flink-table-planner-blink/src/main/java/org/apache/flink/table/planner/plan/rules/logical/EliminateCrossJoinRule.java
 ##########
 @@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.plan.rules.logical;
+
+import org.apache.flink.util.Preconditions;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.rules.LoptMultiJoin;
+import org.apache.calcite.rel.rules.MultiJoin;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mappings;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * This rule tries to eliminate cross joins by reordering joins.
+ * The new order of joins are determined with the following steps:
+ *
+ * <p>1. The inputs related with an equi-join filter (= or IS NOT DISTINCT FROM) will be joined first.
+ *       Inputs with smaller indices has higher priority.
+ *
+ * <p>2. The inputs related with other join filters will then be joined.
+ *
+ * <p>3. If not all inner join inputs are joined, they will be joined in input order.
+ *
+ * <p>4. Outer joins are added.
+ */
+public class EliminateCrossJoinRule extends RelOptRule {
+
+	public static final EliminateCrossJoinRule INSTANCE = new EliminateCrossJoinRule();
+
+	private EliminateCrossJoinRule() {
+		super(operand(MultiJoin.class, any()), "EliminateCrossJoinRule");
+	}
+
+	@Override
+	public void onMatch(RelOptRuleCall call) {
+		MultiJoin join = call.rel(0);
+		RelBuilder relBuilder = call.builder();
+
+		if (join.isFullOuterJoin()) {
+			// full outer join, do not reorder joins
+			Preconditions.checkArgument(
+				join.getInputs().size() == 2,
+				"Full outer join must have exactly 2 inputs. This is a bug.");
+
+			relBuilder
+				.push(join.getInput(0))
+				.push(join.getInput(1))
+				.join(JoinRelType.FULL, join.getJoinFilter());
+			if (join.getPostJoinFilter() != null) {
+				relBuilder.filter(join.getPostJoinFilter());
+			}
+		} else {
+			int outerJoinCount = 0;
+			for (int i = 0; i < join.getInputs().size(); i++) {
+				if (join.getJoinTypes().get(i) != JoinRelType.INNER) {
+					outerJoinCount++;
+				}
+			}
+			Preconditions.checkState(
+				outerJoinCount <= 1,
+				"EliminateCrossJoinRule assumes that there is at most 1 outer join " +
+					"in a layer of multi-join, but " + outerJoinCount + " outer joins were found.");
+			if (outerJoinCount == 1) {
+				int numInputs = join.getInputs().size();
+				Preconditions.checkState(
+					join.getJoinTypes().get(0) == JoinRelType.RIGHT ||
+						join.getJoinTypes().get(numInputs - 1) == JoinRelType.LEFT,
+					"EliminateCrossJoinRule assumes that " +
+						"the only left outer join input must locate at the end, or" +
+						"the only right outer join input must locate at the beginning");
+			}
+
+			LoptMultiJoin loptMultiJoin = new LoptMultiJoin(join);
+
+			// try to eliminate cross join
+			Vertex joinVertexTree = multiJoinToJoinVertexTree(loptMultiJoin);
+			Mappings.TargetMapping mapping = joinVertexTreeToJoinRelTree(joinVertexTree, loptMultiJoin, relBuilder);
+
+			// apply post-join filters
+			if (join.getPostJoinFilter() != null) {
+				RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+				relBuilder.filter(mapFilter(join.getPostJoinFilter(), mapping, rexBuilder));
+			}
+
+			// use projections to keep the output of the join unchanged
+			List<RexNode> projects = generateProjection(join, mapping);
+			relBuilder.project(projects);
+		}
+
+		RelNode rel = relBuilder.build();
+		call.transformTo(rel);
+	}
+
+	private Vertex multiJoinToJoinVertexTree(LoptMultiJoin multiJoin) {
+		JoinVertexTreeBuilder builder = new JoinVertexTreeBuilder(
+			multiJoin,
+			(left, right) -> {
+				boolean leftIsEqui = isEquiFilter(left.filter);
+				boolean rightIsEqui = isEquiFilter(right.filter);
+				if (leftIsEqui ^ rightIsEqui) {
+					// one of the filter is not an equi-filter
+					// equi-filter has higher priority
+					return leftIsEqui ? -1 : 1;
+				} else {
+					// both or none of the filter is an equi-filter
+					// the one with the smallest input wins
+					int a = -1;
+					int b = -1;
+					do {
+						a = left.inputBitSet.nextSetBit(a + 1);
+						b = right.inputBitSet.nextSetBit(b + 1);
+					} while (a == b && a >= 0);
+
+					if (a >= 0 && b >= 0) {
+						return a - b;
+					} else if (a < 0 && b < 0) {
+						return 0;
+					} else {
+						return a;
+					}
+				}
+			});
+
+		JoinFilter bestFilter;
+		while ((bestFilter = builder.getBestFilter()) != null) {
+			builder.innerJoin(bestFilter.inputBitSet);
+		}
+
+		return builder.toJoinVertexTree();
+	}
+
+	private boolean isEquiFilter(RexNode filter) {
+		return filter.isA(SqlKind.EQUALS) || filter.isA(SqlKind.IS_NOT_DISTINCT_FROM);
 
 Review comment:
   No, but after this checking we will also check if the left input and right input are both input refs. This will distinguish `a = b` from `a = 1`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services