You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2023/04/26 09:24:10 UTC

[hive] branch master updated: HIVE-27278: Simplify correlated queries with empty inputs (Stamatis Zampetakis reviewed by Krisztian Kasa)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 69d824e186c HIVE-27278: Simplify correlated queries with empty inputs (Stamatis Zampetakis reviewed by Krisztian Kasa)
69d824e186c is described below

commit 69d824e186ca983b8bb1ff9501b069522391dc44
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Thu Apr 20 15:56:33 2023 +0200

    HIVE-27278: Simplify correlated queries with empty inputs (Stamatis Zampetakis reviewed by Krisztian Kasa)
    
    Add new pruning rules in CBO to remove correlate when inputs are empty
    to avoid exceptions and failures at compile time.
    
    Before
    ```
    HiveProject(id=[$0])
      LogicalCorrelate(correlation=[$cor0], joinType=[semi], requiredColumns=[{}])
        HiveTableScan(table=[[default, t1]], table:alias=[t1])
        HiveValues(tuples=[[]])
    ```
    
    After
    ```
    HiveValues(tuples=[[]])
    ```
    
    Under normal circumstances correlate should never be present in the
    after decorrelation. However, HiveRelDecorrelator does not cope well
    with values so these new rules are a safety net that can prune trivial
    correlates later on.
    
    Closes #4253
---
 .../hadoop/hive/ql/optimizer/calcite/Bug.java      |   5 +
 .../calcite/rules/HiveRemoveEmptySingleRules.java  | 105 +++++++++++++++++----
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |   4 +-
 .../clientpositive/empty_result_correlate.q        |   4 +
 .../llap/empty_result_correlate.q.out              |  29 ++++++
 5 files changed, 128 insertions(+), 19 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/Bug.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/Bug.java
index 91877060ad0..268521ecc22 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/Bug.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/Bug.java
@@ -83,4 +83,9 @@ public final class Bug {
    * Whether <a href="https://issues.apache.org/jira/browse/CALCITE-5337">CALCITE-5337</a> is fixed.
    */
   public static final boolean CALCITE_5337_FIXED = false;
+
+  /**
+   * Whether <a href="https://issues.apache.org/jira/browse/CALCITE-5669">CALCITE-5669</a> is fixed.
+   */
+  public static final boolean CALCITE_5669_FIXED = false;
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java
index dddab8fa2d6..2cdc0d2ff65 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java
@@ -25,11 +25,13 @@ import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.plan.volcano.RelSubset;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Correlate;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.Union;
 import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.hadoop.hive.ql.optimizer.calcite.Bug;
@@ -105,21 +107,13 @@ public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
           }
 
           final Join join = call.rel(0);
-          final Values empty = call.rel(1);
           final RelNode right = call.rel(2);
           final RelBuilder relBuilder = call.builder();
           if (join.getJoinType().generatesNullsOnLeft()) {
             // If "emp" is empty, "select * from emp right join dept" will have
             // the same number of rows as "dept", and null values for the
             // columns from "emp". The left side of the join can be removed.
-            final List<RexNode> nullLiterals =
-                    Collections.nCopies(empty.getRowType().getFieldCount(),
-                            relBuilder.literal(null));
-            call.transformTo(
-                    relBuilder.push(right)
-                            .project(concat(nullLiterals, relBuilder.fields()))
-                            .convert(join.getRowType(), true)
-                            .build());
+            call.transformTo(padWithNulls(relBuilder, right, join.getRowType(), true));
             return;
           }
           call.transformTo(relBuilder.push(join).empty().build());
@@ -165,20 +159,12 @@ public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
 
           final Join join = call.rel(0);
           final RelNode left = call.rel(1);
-          final Values empty = call.rel(2);
           final RelBuilder relBuilder = call.builder();
           if (join.getJoinType().generatesNullsOnRight()) {
             // If "dept" is empty, "select * from emp left join dept" will have
             // the same number of rows as "emp", and null values for the
             // columns from "dept". The right side of the join can be removed.
-            final List<RexNode> nullLiterals =
-                    Collections.nCopies(empty.getRowType().getFieldCount(),
-                            relBuilder.literal(null));
-            call.transformTo(
-                    relBuilder.push(left)
-                            .project(concat(relBuilder.fields(), nullLiterals))
-                            .convert(join.getRowType(), true)
-                            .build());
+            call.transformTo(padWithNulls(relBuilder, left, join.getRowType(), false));
             return;
           }
           if (join.getJoinType() == JoinRelType.ANTI) {
@@ -192,6 +178,89 @@ public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
     }
   }
 
+  private static RelNode padWithNulls(RelBuilder builder, RelNode input, RelDataType resultType,
+      boolean leftPadding) {
+    int padding = resultType.getFieldCount() - input.getRowType().getFieldCount();
+    List<RexNode> nullLiterals = Collections.nCopies(padding, builder.literal(null));
+    builder.push(input);
+    if (leftPadding) {
+      builder.project(concat(nullLiterals, builder.fields()));
+    } else {
+      builder.project(concat(builder.fields(), nullLiterals));
+    }
+    return builder.convert(resultType, true).build();
+  }
+
+  public static final RelOptRule CORRELATE_RIGHT_INSTANCE = RelRule.Config.EMPTY
+      .withOperandSupplier(b0 ->
+          b0.operand(Correlate.class).inputs(
+              b1 -> b1.operand(RelNode.class).anyInputs(),
+              b2 -> b2.operand(Values.class).predicate(Values::isEmpty).noInputs()))
+      .withDescription("PruneEmptyCorrelate(right)")
+      .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+      .as(CorrelateRightEmptyRuleConfig.class)
+      .toRule();
+  public static final RelOptRule CORRELATE_LEFT_INSTANCE = RelRule.Config.EMPTY
+      .withOperandSupplier(b0 ->
+          b0.operand(Correlate.class).inputs(
+              b1 -> b1.operand(Values.class).predicate(Values::isEmpty).noInputs(),
+              b2 -> b2.operand(RelNode.class).anyInputs()))
+      .withDescription("PruneEmptyCorrelate(left)")
+      .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+      .as(CorrelateLeftEmptyRuleConfig.class)
+      .toRule();
+
+  /** Configuration for rule that prunes a correlate if left input is empty. */
+  public interface CorrelateLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override
+    default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override
+        public void onMatch(RelOptRuleCall call) {
+          if (Bug.CALCITE_5669_FIXED) {
+            throw new IllegalStateException("Class is redundant after fix is merged into Calcite");
+          }
+          final Correlate corr = call.rel(0);
+          call.transformTo(call.builder().push(corr).empty().build());
+        }
+      };
+    }
+  }
+
+  /** Configuration for rule that prunes a correlate if right input is empty. */
+  public interface CorrelateRightEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override
+    default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override
+        public void onMatch(RelOptRuleCall call) {
+          if (Bug.CALCITE_5669_FIXED) {
+            throw new IllegalStateException("Class is redundant after fix is merged into Calcite");
+          }
+          final Correlate corr = call.rel(0);
+          final RelNode left = call.rel(1);
+          final RelBuilder b = call.builder();
+          final RelNode newRel;
+          switch (corr.getJoinType()) {
+          case LEFT:
+            newRel = padWithNulls(b, left, corr.getRowType(), false);
+            break;
+          case INNER:
+          case SEMI:
+            newRel = b.push(corr).empty().build();
+            break;
+          case ANTI:
+            newRel = left;
+            break;
+          default:
+            throw new IllegalStateException("Correlate does not support " + corr.getJoinType());
+          }
+          call.transformTo(newRel);
+        }
+      };
+    }
+  }
+
   public static final RelOptRule SORT_INSTANCE =
           RelRule.Config.EMPTY
                   .withDescription("HivePruneEmptySort")
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d8850ffbafb..4da0ff0869b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1956,7 +1956,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
               HiveRemoveEmptySingleRules.SORT_INSTANCE,
               HiveRemoveEmptySingleRules.SORT_FETCH_ZERO_INSTANCE,
               HiveRemoveEmptySingleRules.AGGREGATE_INSTANCE,
-              HiveRemoveEmptySingleRules.UNION_INSTANCE);
+              HiveRemoveEmptySingleRules.UNION_INSTANCE,
+              HiveRemoveEmptySingleRules.CORRELATE_LEFT_INSTANCE,
+              HiveRemoveEmptySingleRules.CORRELATE_RIGHT_INSTANCE);
 
       // Trigger program
       perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
diff --git a/ql/src/test/queries/clientpositive/empty_result_correlate.q b/ql/src/test/queries/clientpositive/empty_result_correlate.q
new file mode 100644
index 00000000000..6bd4b4b041d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/empty_result_correlate.q
@@ -0,0 +1,4 @@
+create table t1 (id int, val varchar(10));
+create table t2 (id int, val varchar(10));
+
+EXPLAIN CBO SELECT id FROM t1 WHERE NULL IN (SELECT NULL FROM t2 where t1.id = t2.id);
diff --git a/ql/src/test/results/clientpositive/llap/empty_result_correlate.q.out b/ql/src/test/results/clientpositive/llap/empty_result_correlate.q.out
new file mode 100644
index 00000000000..440bc2bdb0b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/empty_result_correlate.q.out
@@ -0,0 +1,29 @@
+PREHOOK: query: create table t1 (id int, val varchar(10))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (id int, val varchar(10))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (id int, val varchar(10))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (id int, val varchar(10))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: EXPLAIN CBO SELECT id FROM t1 WHERE NULL IN (SELECT NULL FROM t2 where t1.id = t2.id)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT id FROM t1 WHERE NULL IN (SELECT NULL FROM t2 where t1.id = t2.id)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveValues(tuples=[[]])
+