You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2022/09/21 17:12:21 UTC
[GitHub] [hive] asolimando commented on a diff in pull request #3588: HIVE-26524: Use Calcite to remove sections of a query plan known never produces rows

asolimando commented on code in PR #3588:
URL: https://github.com/apache/hive/pull/3588#discussion_r976632375


##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java:
##########
@@ -308,17 +390,26 @@ private void convertOrderLimitToASTNode(HiveSortLimit hiveSortLimit) {
 
   private void convertSortToASTNode(HiveSortExchange hiveSortExchange) {
     List<RelFieldCollation> fieldCollations = hiveSortExchange.getCollation().getFieldCollations();
-    convertFieldCollationsToASTNode(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations,
+    convertFieldCollationsAndSetOrderAST(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations,
             null, HiveParser.TOK_SORTBY, "TOK_SORTBY");
   }
 
-  private void convertFieldCollationsToASTNode(
+  private void convertFieldCollationsAndSetOrderAST(
           RelNode node, Schema schema, List<RelFieldCollation> fieldCollations, Map<Integer, RexNode> obRefToCallMap,
           int astToken, String astText) {
+
     if (fieldCollations.isEmpty()) {
       return;
     }
 
+    hiveAST.order = convertFieldCollationsToASTNode(
+            node.getCluster().getRexBuilder(), schema, fieldCollations, obRefToCallMap, astToken, astText);
+  }
+
+  public static ASTNode convertFieldCollationsToASTNode(

Review Comment:
   Does it have to be `public`?



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java:
##########
@@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project project, String sketchClass)
 
       @Override
       boolean isApplicable(AggregateCall aggCall) {
-        if ((aggInput instanceof Project)
-            && !aggCall.isDistinct() && aggCall.getArgList().size() == 4
+        if ((aggInput != null)

Review Comment:
   We don't need the parentheses anymore:
   ```suggestion
           if (aggInput != null
   ```



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+  public static final RelOptRule PROJECT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyProject")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Project.class, project -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule FILTER_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyFilter")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Filter.class, singleRel -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule JOIN_LEFT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Join.class).inputs(
+                                  b1 -> b1.operand(Values.class)
+                                          .predicate(Values::isEmpty).noInputs(),
+                                  b2 -> b2.operand(RelNode.class).anyInputs()))
+                  .withDescription("HivePruneEmptyJoin(left)")
+                  .as(JoinLeftEmptyRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+   * In case of right outer join if the left branch is empty the join operator can be removed
+   * and take the right branch only.
+   *
+   * select * from (select * from emp where 1=0) right join dept
+   * to
+   * select null as emp.col0 ... null as emp.coln, dept.* from dept
+   */
+  public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          Join join = call.rel(0);
+          HiveValues empty = call.rel(1);
+          RelNode right = call.rel(2);
+          RexBuilder rexBuilder = call.builder().getRexBuilder();
+          if (join.getJoinType().generatesNullsOnLeft()) {
+            // "select * from emp right join dept" is not necessarily empty if
+            // emp is empty
+            List<RexNode> projects = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            List<String> columnNames = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            // left
+            addNullLiterals(rexBuilder, empty, projects, columnNames);
+            // right
+            copyProjects(rexBuilder, right.getRowType(),
+                    join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames);
+
+            RelNode project = call.builder().push(right).project(projects, columnNames).build();
+            call.transformTo(project);
+            return;
+          }
+          call.transformTo(call.builder().push(join).empty().build());
+        }
+      };
+    }
+  }
+
+  private static void addNullLiterals(
+          RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) {
+    for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+      RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i);
+      RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType());
+      projectFields.add(nullLiteral);
+      newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+    }
+  }
+
+  public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,

Review Comment:
   I guess this methods can be `private`. If not, please add some javadoc to it.



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+  public static final RelOptRule PROJECT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyProject")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Project.class, project -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule FILTER_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyFilter")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Filter.class, singleRel -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule JOIN_LEFT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Join.class).inputs(
+                                  b1 -> b1.operand(Values.class)
+                                          .predicate(Values::isEmpty).noInputs(),
+                                  b2 -> b2.operand(RelNode.class).anyInputs()))
+                  .withDescription("HivePruneEmptyJoin(left)")
+                  .as(JoinLeftEmptyRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+   * In case of right outer join if the left branch is empty the join operator can be removed
+   * and take the right branch only.
+   *
+   * select * from (select * from emp where 1=0) right join dept
+   * to
+   * select null as emp.col0 ... null as emp.coln, dept.* from dept
+   */
+  public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          Join join = call.rel(0);
+          HiveValues empty = call.rel(1);
+          RelNode right = call.rel(2);
+          RexBuilder rexBuilder = call.builder().getRexBuilder();
+          if (join.getJoinType().generatesNullsOnLeft()) {
+            // "select * from emp right join dept" is not necessarily empty if
+            // emp is empty
+            List<RexNode> projects = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            List<String> columnNames = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            // left
+            addNullLiterals(rexBuilder, empty, projects, columnNames);
+            // right
+            copyProjects(rexBuilder, right.getRowType(),
+                    join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames);
+
+            RelNode project = call.builder().push(right).project(projects, columnNames).build();
+            call.transformTo(project);
+            return;
+          }
+          call.transformTo(call.builder().push(join).empty().build());
+        }
+      };
+    }
+  }
+
+  private static void addNullLiterals(
+          RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) {
+    for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+      RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i);
+      RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType());
+      projectFields.add(nullLiteral);
+      newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+    }
+  }
+
+  public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
+                                  RelDataType castRowType, int castRowTypeOffset,
+                                  List<RexNode> outProjects, List<String> outProjectNames) {
+
+    for (int i = 0; i < inRowType.getFieldCount(); ++i) {
+      RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i);
+      RexInputRef inputRef = rexBuilder.makeInputRef(relDataTypeField.getType(), i);
+      RexNode cast = rexBuilder.makeCast(
+              castRowType.getFieldList().get(castRowTypeOffset + i).getType(), inputRef);
+      outProjects.add(cast);
+      outProjectNames.add(relDataTypeField.getName());
+    }
+  }
+
+  public static final RelOptRule JOIN_RIGHT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Join.class).inputs(
+                                  b1 -> b1.operand(RelNode.class).anyInputs(),
+                                  b2 -> b2.operand(Values.class).predicate(Values::isEmpty)
+                                          .noInputs()))
+                  .withDescription("HivePruneEmptyJoin(right)")
+                  .as(JoinRightEmptyRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Improved version of Calcite's {@link PruneEmptyRules.JoinRightEmptyRuleConfig}.
+   * In case of left outer join if the right branch is empty the join operator can be removed
+   * and take the left branch only.
+   *
+   * select * from emp right join (select * from dept where 1=0)
+   * to
+   * select emp.*, null as dept.col0 ... null as dept.coln from emp
+   */
+  public interface JoinRightEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          Join join = call.rel(0);
+          RelNode left = call.rel(1);
+          HiveValues empty = call.rel(2);
+          RexBuilder rexBuilder = call.builder().getRexBuilder();
+          if (join.getJoinType().generatesNullsOnRight()) {
+            // "select * from emp left join dept" is not necessarily empty if
+            // dept is empty
+            List<RexNode> projects = new ArrayList<>(
+                    left.getRowType().getFieldCount() + empty.getRowType().getFieldCount());
+            List<String> columnNames = new ArrayList<>(
+                    left.getRowType().getFieldCount() + empty.getRowType().getFieldCount());
+            // left
+            copyProjects(rexBuilder, left.getRowType(), join.getRowType(), 0, projects, columnNames);
+            // right
+            addNullLiterals(rexBuilder, empty, projects, columnNames);
+
+            RelNode project = call.builder().push(left).project(projects, columnNames).build();
+            call.transformTo(project);
+            return;
+          }
+          if (join.getJoinType() == JoinRelType.ANTI) {
+            // In case of anti join: Join(X, Empty, ANTI) becomes X
+            call.transformTo(join.getLeft());
+            return;
+          }
+          call.transformTo(call.builder().push(join).empty().build());
+        }
+      };
+    }
+  }
+
+  public static final RelOptRule SORT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptySort")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Sort.class, singleRel -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule SORT_FETCH_ZERO_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b ->
+                          b.operand(Sort.class).anyInputs())
+                  .withDescription("HivePruneSortLimit0")
+                  .as(PruneEmptyRules.SortFetchZeroRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule AGGREGATE_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyAggregate")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Aggregate.class, Aggregate::isNotGrandTotal)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule UNION_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Union.class).unorderedInputs(b1 ->
+                                  b1.operand(Values.class)
+                                          .predicate(Values::isEmpty).noInputs()))
+                  .withDescription("HivePruneEmptyUnionBranch")
+                  .as(HiveUnionEmptyPruneRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Copy of {@link PruneEmptyRules.UnionEmptyPruneRuleConfig} but this version expects {@link Union}.
+   */
+  public interface HiveUnionEmptyPruneRuleConfig extends PruneEmptyRules.PruneEmptyRule.Config {
+    @Override default PruneEmptyRules.PruneEmptyRule toRule() {
+      return new PruneEmptyRules.PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          final Union union = call.rel(0);
+          final List<RelNode> inputs = union.getInputs();
+          assert inputs != null;
+          final RelBuilder builder = call.builder();
+          int nonEmptyInputs = 0;
+          for (RelNode input : inputs) {
+            if (!isEmpty(input)) {
+              builder.push(input);
+              nonEmptyInputs++;
+            }
+          }
+          assert nonEmptyInputs < inputs.size()
+                  : "planner promised us at least one Empty child: "
+                  + RelOptUtil.toString(union);
+          if (nonEmptyInputs == 0) {
+            builder.push(union).empty();
+          } else {
+            builder.union(union.all, nonEmptyInputs);
+            builder.convert(union.getRowType(), true);
+          }
+          call.transformTo(builder.build());
+        }
+      };
+    }
+  }
+
+  private static boolean isEmpty(RelNode node) {
+    if (node instanceof Values) {
+      return ((Values) node).getTuples().isEmpty();
+    }
+    if (node instanceof HepRelVertex) {
+      return isEmpty(((HepRelVertex) node).getCurrentRel());
+    }
+    // Note: relation input might be a RelSubset, so we just iterate over the relations
+    // in order to check if the subset is equivalent to an empty relation.
+    if (!(node instanceof RelSubset)) {
+      return false;
+    }
+    RelSubset subset = (RelSubset) node;
+    for (RelNode rel : subset.getRels()) {
+      if (isEmpty(rel)) {
+        return true;
+      }
+    }
+    return false;
+  }

Review Comment:
   Minor: What if we got rid of the negated condition in this way?
   
   ```suggestion
       if (node instanceof RelSubset) {
         RelSubset subset = (RelSubset) node;
         for (RelNode rel : subset.getRels()) {
           if (isEmpty(rel)) {
             return true;
           }
         }
       }
       return false;
     }
   ```



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveValues.java:
##########
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexLiteral;
+
+import java.util.List;
+
+/**
+ * Subclass of {@link org.apache.calcite.rel.core.Values}.
+ * Specialized to Hive engine.
+ */
+public class HiveValues extends Values {
+
+  public HiveValues(
+          RelOptCluster cluster,
+          RelDataType rowType,
+          ImmutableList<ImmutableList<RexLiteral>> tuples,
+          RelTraitSet traits) {

Review Comment:
   Minor: I don't think we align parameters vertically in Hive in general, we could also try to have them fit a single line like:
   
   ```suggestion
     public HiveValues(
         RelOptCluster cluster, RelDataType rowType, ImmutableList<ImmutableList<RexLiteral>> tuples, RelTraitSet traits) {



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java:
##########
@@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project project, String sketchClass)
 
       @Override
       boolean isApplicable(AggregateCall aggCall) {
-        if ((aggInput instanceof Project)
-            && !aggCall.isDistinct() && aggCall.getArgList().size() == 4
+        if ((aggInput != null)
+            && !aggCall.isDistinct() && aggCall.getArgList().size() == 1

Review Comment:
   I am having a hard time understanding this change, do you have a high-level explanation that might help? 
   
   Since others down the line might be puzzled, we could probably add few comments here.



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java:
##########
@@ -717,9 +719,12 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se
         final RexNode filterArg = aggCall.filterArg < 0 ? null
             : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg));
         RelBuilder.AggCall newAggCall =
-            relBuilder.aggregateCall(aggCall.getAggregation(),
-                aggCall.isDistinct(), aggCall.isApproximate(),
-                filterArg, aggCall.name, args);
+                relBuilder.aggregateCall(aggCall.getAggregation(), args)
+                        .distinct(aggCall.isDistinct())
+                        .filter(filterArg)
+                        .approximate(aggCall.isApproximate())
+                        .sort(relBuilder.fields(aggCall.collation))
+                        .as(aggCall.name);

Review Comment:
   Can you give a high level overview of the change here?



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+  public static final RelOptRule PROJECT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyProject")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Project.class, project -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule FILTER_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyFilter")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Filter.class, singleRel -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule JOIN_LEFT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Join.class).inputs(
+                                  b1 -> b1.operand(Values.class)
+                                          .predicate(Values::isEmpty).noInputs(),
+                                  b2 -> b2.operand(RelNode.class).anyInputs()))
+                  .withDescription("HivePruneEmptyJoin(left)")
+                  .as(JoinLeftEmptyRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+   * In case of right outer join if the left branch is empty the join operator can be removed
+   * and take the right branch only.
+   *
+   * select * from (select * from emp where 1=0) right join dept
+   * to
+   * select null as emp.col0 ... null as emp.coln, dept.* from dept
+   */
+  public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          Join join = call.rel(0);
+          HiveValues empty = call.rel(1);
+          RelNode right = call.rel(2);
+          RexBuilder rexBuilder = call.builder().getRexBuilder();
+          if (join.getJoinType().generatesNullsOnLeft()) {
+            // "select * from emp right join dept" is not necessarily empty if
+            // emp is empty
+            List<RexNode> projects = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            List<String> columnNames = new ArrayList<>(
+                    empty.getRowType().getFieldCount() + right.getRowType().getFieldCount());
+            // left
+            addNullLiterals(rexBuilder, empty, projects, columnNames);
+            // right
+            copyProjects(rexBuilder, right.getRowType(),
+                    join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames);
+
+            RelNode project = call.builder().push(right).project(projects, columnNames).build();
+            call.transformTo(project);
+            return;
+          }
+          call.transformTo(call.builder().push(join).empty().build());
+        }
+      };
+    }
+  }
+
+  private static void addNullLiterals(
+          RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) {
+    for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+      RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i);
+      RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType());
+      projectFields.add(nullLiteral);
+      newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+    }
+  }
+
+  public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
+                                  RelDataType castRowType, int castRowTypeOffset,
+                                  List<RexNode> outProjects, List<String> outProjectNames) {
+
+    for (int i = 0; i < inRowType.getFieldCount(); ++i) {
+      RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i);
+      RexInputRef inputRef = rexBuilder.makeInputRef(relDataTypeField.getType(), i);
+      RexNode cast = rexBuilder.makeCast(
+              castRowType.getFieldList().get(castRowTypeOffset + i).getType(), inputRef);

Review Comment:
   Since those two methods are used for both left and right join rules, I'd rather place them before the two rules or after them, not in the middle.



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java:
##########
@@ -121,7 +127,83 @@ public static ASTNode convert(final RelNode relNode, List<FieldSchema> resultSch
     return c.convert();
   }
 
+  //    TOK_QUERY
+  //      TOK_INSERT
+  //         TOK_DESTINATION
+  //            TOK_DIR
+  //               TOK_TMP_FILE
+  //         TOK_SELECT
+  //            TOK_SELEXPR
+  //               TOK_FUNCTION
+  //                  TOK_<type>
+  //                  TOK_NULL
+  //               alias0
+  //            ...
+  //            TOK_SELEXPR
+  //               TOK_FUNCTION
+  //                  TOK_<type>
+  //                  TOK_NULL
+  //               aliasn
+  //         TOK_LIMIT
+  //            0
+  //            0
+  private static ASTNode emptyPlan(RelDataType dataType) {

Review Comment:
   As discussed off-line, can we have a unit test covering the translation (especially the typed null bit)?
   
   I am positive that the method is doing what it should, I am more worried about future regressions if it gets touched.



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveValuesVisitor.java:
##########
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer;
+
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+class HiveValuesVisitor extends HiveRelNodeVisitor<HiveValues> {
+  HiveValuesVisitor(HiveOpConverter hiveOpConverter) {
+    super(hiveOpConverter);
+  }
+
+  @Override
+  OpAttr visit(HiveValues valuesRel) throws SemanticException {
+
+    LOG.debug("Translating operator rel#{}:{} with row type: [{}]",
+            valuesRel.getId(), valuesRel.getRelTypeName(), valuesRel.getRowType());
+    LOG.debug("Operator rel#{}:{} has {} tuples.",
+            valuesRel.getId(), valuesRel.getRelTypeName(), valuesRel.tuples.size());
+
+    if (!Values.isEmpty(valuesRel)) {
+      LOG.error("Empty {} operator translation not supported yet in return path.",
+              valuesRel.getClass().getCanonicalName());
+      return null;
+    }
+
+    // 1. collect columns for project row schema
+    List<String> columnNames = new ArrayList<>();
+    List<ExprNodeDesc> exprNodeDescList = new ArrayList<>();
+    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
+
+    ArrayList<ColumnInfo> colInfoList = new ArrayList<>();

Review Comment:
   Nitpick, prefer the interface if  possible:
   ```suggestion
       List<ColumnInfo> colInfoList = new ArrayList<>();
   ```



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+  public static final RelOptRule PROJECT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyProject")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Project.class, project -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule FILTER_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withDescription("HivePruneEmptyFilter")
+                  .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+                  .withOperandFor(Filter.class, singleRel -> true)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  public static final RelOptRule JOIN_LEFT_INSTANCE =
+          RelRule.Config.EMPTY
+                  .withOperandSupplier(b0 ->
+                          b0.operand(Join.class).inputs(
+                                  b1 -> b1.operand(Values.class)
+                                          .predicate(Values::isEmpty).noInputs(),
+                                  b2 -> b2.operand(RelNode.class).anyInputs()))
+                  .withDescription("HivePruneEmptyJoin(left)")
+                  .as(JoinLeftEmptyRuleConfig.class)
+                  .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+                  .toRule();
+
+  /**
+   * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+   * In case of right outer join if the left branch is empty the join operator can be removed
+   * and take the right branch only.
+   *
+   * select * from (select * from emp where 1=0) right join dept
+   * to
+   * select null as emp.col0 ... null as emp.coln, dept.* from dept
+   */
+  public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+    @Override default PruneEmptyRule toRule() {
+      return new PruneEmptyRule(this) {
+        @Override public void onMatch(RelOptRuleCall call) {
+          Join join = call.rel(0);
+          HiveValues empty = call.rel(1);
+          RelNode right = call.rel(2);
+          RexBuilder rexBuilder = call.builder().getRexBuilder();
+          if (join.getJoinType().generatesNullsOnLeft()) {
+            // "select * from emp right join dept" is not necessarily empty if

Review Comment:
   Can you update following Ruben's suggestion from the Calcite's PR here and for the corresponding "right" version of the rule?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org