You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2020/02/06 00:07:49 UTC

[hive] branch master updated: HIVE-22808: HiveRelFieldTrimmer does not handle HiveTableFunctionScan (Krisztian Kasa reviewed by Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dabfa86  HIVE-22808: HiveRelFieldTrimmer does not handle HiveTableFunctionScan (Krisztian Kasa reviewed by Vineet Garg)
dabfa86 is described below

commit dabfa861564da447f00d897538b621d58ca5af11
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Wed Feb 5 16:06:33 2020 -0800

    HIVE-22808: HiveRelFieldTrimmer does not handle HiveTableFunctionScan (Krisztian Kasa reviewed by Vineet Garg)
---
 .../calcite/rules/HiveRelFieldTrimmer.java         | 44 ++++++++++++++++++++++
 .../test/results/clientpositive/except_all.q.out   | 16 ++++----
 .../results/clientpositive/intersect_all_rj.q.out  |  8 ++--
 .../clientpositive/llap/intersect_all_rj.q.out     |  8 ++--
 4 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 9bf42ed..d218fac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -36,6 +36,7 @@ import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.TableFunctionScan;
 import org.apache.calcite.rel.core.TableScan;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
@@ -68,6 +69,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
 import org.slf4j.Logger;
@@ -745,4 +747,46 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
   protected TrimResult result(RelNode r, final Mapping mapping) {
     return new TrimResult(r, mapping);
   }
+
+  /**
+   * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for {@link HiveTableFunctionScan}.
+   * Copied {@link org.apache.calcite.sql2rel.RelFieldTrimmer#trimFields(
+   * org.apache.calcite.rel.logical.LogicalTableFunctionScan, ImmutableBitSet, Set)}
+   * and replaced <code>tabFun</code> to {@link HiveTableFunctionScan}.
+   * Proper fix would be implement this in calcite.
+   */
+  public TrimResult trimFields(
+          HiveTableFunctionScan tabFun,
+          ImmutableBitSet fieldsUsed,
+          Set<RelDataTypeField> extraFields) {
+    final RelDataType rowType = tabFun.getRowType();
+    final int fieldCount = rowType.getFieldCount();
+    final List<RelNode> newInputs = new ArrayList<>();
+
+    for (RelNode input : tabFun.getInputs()) {
+      final int inputFieldCount = input.getRowType().getFieldCount();
+      ImmutableBitSet inputFieldsUsed = ImmutableBitSet.range(inputFieldCount);
+
+      // Create input with trimmed columns.
+      final Set<RelDataTypeField> inputExtraFields =
+              Collections.emptySet();
+      TrimResult trimResult =
+              trimChildRestore(
+                      tabFun, input, inputFieldsUsed, inputExtraFields);
+      assert trimResult.right.isIdentity();
+      newInputs.add(trimResult.left);
+    }
+
+    TableFunctionScan newTabFun = tabFun;
+    if (!tabFun.getInputs().equals(newInputs)) {
+      newTabFun = tabFun.copy(tabFun.getTraitSet(), newInputs,
+              tabFun.getCall(), tabFun.getElementType(), tabFun.getRowType(),
+              tabFun.getColumnMappings());
+    }
+    assert newTabFun.getClass() == tabFun.getClass();
+
+    // Always project all fields.
+    Mapping mapping = Mappings.createIdentity(fieldCount);
+    return result(newTabFun, mapping);
+  }
 }
diff --git a/ql/src/test/results/clientpositive/except_all.q.out b/ql/src/test/results/clientpositive/except_all.q.out
index 020cba4..5d1dc22 100644
--- a/ql/src/test/results/clientpositive/except_all.q.out
+++ b/ql/src/test/results/clientpositive/except_all.q.out
@@ -276,10 +276,10 @@ STAGE PLANS:
               Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint)
-                outputColumnNames: _col0, _col1, _col3, _col4
+                outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  aggregations: sum(_col4), sum(_col3)
+                  aggregations: sum(_col3), sum(_col2)
                   keys: _col0 (type: string), _col1 (type: string)
                   minReductionHashAggr: 0.99
                   mode: hash
@@ -297,10 +297,10 @@ STAGE PLANS:
               Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint)
-                outputColumnNames: _col0, _col1, _col3, _col4
+                outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  aggregations: sum(_col4), sum(_col3)
+                  aggregations: sum(_col3), sum(_col2)
                   keys: _col0 (type: string), _col1 (type: string)
                   minReductionHashAggr: 0.99
                   mode: hash
@@ -467,10 +467,10 @@ STAGE PLANS:
               Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint)
-                outputColumnNames: _col0, _col1, _col3, _col4
+                outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  aggregations: sum(_col4), sum(_col3)
+                  aggregations: sum(_col3), sum(_col2)
                   keys: _col0 (type: string), _col1 (type: string)
                   minReductionHashAggr: 0.99
                   mode: hash
@@ -488,10 +488,10 @@ STAGE PLANS:
               Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint)
-                outputColumnNames: _col0, _col1, _col3, _col4
+                outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  aggregations: sum(_col4), sum(_col3)
+                  aggregations: sum(_col3), sum(_col2)
                   keys: _col0 (type: string), _col1 (type: string)
                   minReductionHashAggr: 0.99
                   mode: hash
diff --git a/ql/src/test/results/clientpositive/intersect_all_rj.q.out b/ql/src/test/results/clientpositive/intersect_all_rj.q.out
index b8ff98a..427b841 100644
--- a/ql/src/test/results/clientpositive/intersect_all_rj.q.out
+++ b/ql/src/test/results/clientpositive/intersect_all_rj.q.out
@@ -180,12 +180,12 @@ HiveProject($f0=[$1])
                 HiveAggregate(group=[{0}], agg#0=[count()])
                   HiveProject($f0=[$0])
                     HiveAggregate(group=[{0}])
-                      HiveProject($f0=[CASE(IS NOT NULL($7), $7, if($5, $8, $6))])
-                        HiveJoin(condition=[>=($1, $13)], joinType=[inner], algorithm=[none], cost=[not available])
-                          HiveProject(int_col_10=[$0], bigint_col_3=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)])
+                      HiveProject($f0=[CASE(IS NOT NULL($3), $3, if($1, $4, $2))])
+                        HiveJoin(condition=[>=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+                          HiveProject(bigint_col_3=[$1])
                             HiveFilter(condition=[IS NOT NULL($1)])
                               HiveTableScan(table=[[default, table_7]], table:alias=[a3])
-                          HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], int_col_18=[$4], BLOCK__OFFSET__INSIDE__FILE=[$5], INPUT__FILE__NAME=[$6], ROW__ID=[$7], CAST=[CAST($4):BIGINT])
+                          HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], CAST=[CAST($4):BIGINT])
                             HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)])
                               HiveTableScan(table=[[default, table_10]], table:alias=[a4])
               HiveProject($f0=[$0], $f1=[$1])
diff --git a/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out b/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
index cdfbc22..c47452f 100644
--- a/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
+++ b/ql/src/test/results/clientpositive/llap/intersect_all_rj.q.out
@@ -180,12 +180,12 @@ HiveProject($f0=[$1])
                 HiveAggregate(group=[{0}], agg#0=[count()])
                   HiveProject($f0=[$0])
                     HiveAggregate(group=[{0}])
-                      HiveProject($f0=[CASE(IS NOT NULL($7), $7, if($5, $8, $6))])
-                        HiveJoin(condition=[>=($1, $13)], joinType=[inner], algorithm=[none], cost=[not available])
-                          HiveProject(int_col_10=[$0], bigint_col_3=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)])
+                      HiveProject($f0=[CASE(IS NOT NULL($3), $3, if($1, $4, $2))])
+                        HiveJoin(condition=[>=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+                          HiveProject(bigint_col_3=[$1])
                             HiveFilter(condition=[IS NOT NULL($1)])
                               HiveTableScan(table=[[default, table_7]], table:alias=[a3])
-                          HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], int_col_18=[$4], BLOCK__OFFSET__INSIDE__FILE=[$5], INPUT__FILE__NAME=[$6], ROW__ID=[$7], CAST=[CAST($4):BIGINT])
+                          HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], CAST=[CAST($4):BIGINT])
                             HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)])
                               HiveTableScan(table=[[default, table_10]], table:alias=[a4])
               HiveProject($f0=[$0], $f1=[$1])