You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ku...@apache.org on 2021/07/20 20:04:23 UTC
[hive] 01/01: Revert "HIVE-25276: Enable automatic statistics generation for Iceberg tables (Peter Vary reviewed by Marton Bod and Adam Szita)"

This is an automated email from the ASF dual-hosted git repository.

kuczoram pushed a commit to branch revert-2419-HIVE-25276
in repository https://gitbox.apache.org/repos/asf/hive.git

commit b7c809d79146e4fa09a88f23be7b928bde48ea95
Author: kuczoram <ku...@gmail.com>
AuthorDate: Tue Jul 20 22:04:06 2021 +0200

    Revert "HIVE-25276: Enable automatic statistics generation for Iceberg tables (Peter Vary reviewed by Marton Bod and Adam Szita)"
    
    This reverts commit 76c49b9df957c8c05b81a4016282c03648b728b9.
---
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 33 +++-----
 .../TestHiveIcebergStorageHandlerWithEngine.java   | 97 ----------------------
 .../org/apache/iceberg/mr/hive/TestHiveShell.java  |  9 +-
 .../results/positive/vectorized_iceberg_read.q.out | 10 +--
 .../hive/ql/ddl/table/create/CreateTableDesc.java  | 30 ++-----
 .../apache/hadoop/hive/ql/exec/tez/DagUtils.java   |  5 +-
 .../hadoop/hive/ql/parse/PartitionTransform.java   | 32 ++-----
 .../hive/ql/parse/PartitionTransformSpec.java      |  9 --
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  3 +-
 .../hive/ql/exec/tez/TestTezOutputCommitter.java   |  1 -
 10 files changed, 39 insertions(+), 190 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 4a4271b..648b688 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -39,14 +39,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.parse.PartitionTransform;
-import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
-import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.BaseTable;
@@ -143,7 +139,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     // Iceberg schema and specification generated by the code
 
     Schema schema = schema(catalogProperties, hmsTable);
-    PartitionSpec spec = spec(conf, schema, hmsTable);
+    PartitionSpec spec = spec(conf, schema, catalogProperties, hmsTable);
 
     // If there are partition keys specified remove them from the HMS table and add them to the column list
     if (hmsTable.isSetPartitionKeys()) {
@@ -241,21 +237,15 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
       preAlterTableProperties.tableLocation = sd.getLocation();
       preAlterTableProperties.format = sd.getInputFormat();
       preAlterTableProperties.schema = schema(catalogProperties, hmsTable);
+      preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, catalogProperties, hmsTable);
       preAlterTableProperties.partitionKeys = hmsTable.getPartitionKeys();
 
       context.getProperties().put(HiveMetaHook.ALLOW_PARTITION_KEY_CHANGE, "true");
       // If there are partition keys specified remove them from the HMS table and add them to the column list
-      if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
-        List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
-        if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
-          throw new MetaException("Query state attached to Session state must be not null. " +
-              "Partition transform metadata cannot be saved.");
-        }
+      if (hmsTable.isSetPartitionKeys()) {
         hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
         hmsTable.setPartitionKeysIsSet(false);
       }
-      preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, hmsTable);
-
       sd.setInputFormat(HiveIcebergInputFormat.class.getCanonicalName());
       sd.setOutputFormat(HiveIcebergOutputFormat.class.getCanonicalName());
       sd.setSerdeInfo(new SerDeInfo("icebergSerde", HiveIcebergSerDe.class.getCanonicalName(),
@@ -297,6 +287,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
       HiveTableUtil.importFiles(preAlterTableProperties.tableLocation, preAlterTableProperties.format,
           partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf);
     } else if (currentAlterTableOp != null) {
+      Map<String, String> contextProperties = context.getProperties();
       switch (currentAlterTableOp) {
         case REPLACE_COLUMNS:
         case RENAME_COLUMN:
@@ -307,7 +298,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
           break;
         case ADDPROPS:
         case DROPPROPS:
-          alterTableProperties(hmsTable, context.getProperties());
+          alterTableProperties(hmsTable, contextProperties);
           break;
         case SETPARTITIONSPEC:
           IcebergTableUtil.updateSpec(conf, icebergTable);
@@ -469,23 +460,25 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     }
   }
 
-  private static PartitionSpec spec(Configuration configuration, Schema schema,
+  private static PartitionSpec spec(Configuration configuration, Schema schema, Properties properties,
       org.apache.hadoop.hive.metastore.api.Table hmsTable) {
 
-    Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(),
-        "We can only handle non-partitioned Hive tables. The Iceberg schema should be in " +
-            InputFormatConfig.PARTITION_SPEC + " or already converted to a partition transform ");
-
     PartitionSpec spec = IcebergTableUtil.spec(configuration, schema);
     if (spec != null) {
-      Preconditions.checkArgument(hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC) == null,
+      Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(),
           "Provide only one of the following: Hive partition transform specification, or the " +
               InputFormatConfig.PARTITION_SPEC + " property");
       return spec;
     }
 
     if (hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC) != null) {
+      Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(),
+          "Provide only one of the following: Hive partition specification, or the " +
+              InputFormatConfig.PARTITION_SPEC + " property");
       return PartitionSpecParser.fromJson(schema, hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC));
+    } else if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
+      // If the table is partitioned then generate the identity partition definitions for the Iceberg table
+      return HiveSchemaUtil.spec(schema, hmsTable.getPartitionKeys());
     } else {
       return PartitionSpec.unpartitioned();
     }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
index 37ca700..8585fb6 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
@@ -1875,103 +1875,6 @@ public class TestHiveIcebergStorageHandlerWithEngine {
     HiveIcebergTestUtils.validateData(expectedResults, HiveIcebergTestUtils.valueForRow(schemaForResultSet, rows), 0);
   }
 
-  @Test
-  public void testStatWithInsert() {
-    TableIdentifier identifier = TableIdentifier.of("default", "customers");
-
-    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
-    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
-        PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of());
-
-    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
-      // If the location is set and we have to gather stats, then we have to update the table stats now
-      shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
-    }
-
-    String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
-    shell.executeStatement(insert);
-
-    checkColStat(identifier.name(), "customer_id");
-  }
-
-  @Test
-  public void testStatWithInsertOverwrite() {
-    TableIdentifier identifier = TableIdentifier.of("default", "customers");
-
-    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
-    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
-        PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of());
-
-    String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, true);
-    shell.executeStatement(insert);
-
-    checkColStat(identifier.name(), "customer_id");
-  }
-
-  @Test
-  public void testStatWithPartitionedInsert() {
-    TableIdentifier identifier = TableIdentifier.of("default", "customers");
-    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
-        .identity("last_name").build();
-
-    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
-    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec,
-        fileFormat, ImmutableList.of());
-
-    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
-      // If the location is set and we have to gather stats, then we have to update the table stats now
-      shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
-    }
-
-    String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
-    shell.executeStatement(insert);
-
-    checkColStat("customers", "customer_id");
-    checkColStat("customers", "first_name");
-  }
-
-  @Test
-  public void testStatWithCTAS() {
-    Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG);
-
-    shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC");
-    shell.executeStatement(testTables.getInsertQuery(
-        HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of("default", "source"), false));
-
-    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
-    shell.executeStatement(String.format(
-        "CREATE TABLE target STORED BY ICEBERG %s TBLPROPERTIES ('%s'='%s') AS SELECT * FROM source",
-        testTables.locationForCreateTableSQL(TableIdentifier.of("default", "target")),
-        TableProperties.DEFAULT_FILE_FORMAT, fileFormat));
-
-    checkColStat("target", "id");
-  }
-
-  @Test
-  public void testStatWithPartitionedCTAS() {
-    Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG);
-
-    shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC");
-    shell.executeStatement(testTables.getInsertQuery(
-        HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of("default", "source"), false));
-
-    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
-    shell.executeStatement(String.format(
-        "CREATE TABLE target PARTITIONED BY (dept, name) " +
-        "STORED BY ICEBERG TBLPROPERTIES ('%s'='%s') AS SELECT * FROM source s",
-        TableProperties.DEFAULT_FILE_FORMAT, fileFormat));
-
-    checkColStat("target", "id");
-    checkColStat("target", "dept");
-  }
-
-  private void checkColStat(String tableName, String colName) {
-    List<Object[]> rows = shell.executeStatement("DESCRIBE " + tableName + " " + colName);
-
-    Assert.assertEquals(2, rows.size());
-    Assert.assertEquals(StatsSetupConst.COLUMN_STATS_ACCURATE, rows.get(1)[0]);
-  }
-
   private void testComplexTypeWrite(Schema schema, List<Record> records) throws IOException {
     String tableName = "complex_table";
     Table table = testTables.createTable(shell, "complex_table", schema, fileFormat, ImmutableList.of());
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
index b3c9440..15b40cf 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
@@ -36,7 +36,6 @@ import org.apache.hive.service.cli.SessionHandle;
 import org.apache.hive.service.cli.session.HiveSession;
 import org.apache.hive.service.server.HiveServer2;
 import org.apache.iceberg.hive.TestHiveMetastore;
-import org.apache.iceberg.relocated.com.google.common.base.Joiner;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
@@ -158,13 +157,13 @@ public class TestHiveShell {
 
   /**
    * Used for debugging. Please do not remove even if unused in the codebase.
-   * @param statement The statement to execute
-   * @return The formatted statement output in a single String which is IDE friendly for viewing
+   * @param statement EXPLAIN statement
+   * @return EXPLAIN statement output in a single String which is IDE friendly for viewing
    */
-  public String executeAndStringify(String statement) {
+  public String executeExplain(String statement) {
     List<Object[]> objects = executeStatement(statement);
     return objects.stream()
-        .map(o -> Joiner.on("\t").useForNull("NULL").join(o))
+        .map(o -> (String) o[0])
         .collect(Collectors.joining("\n"));
   }
 
diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out
index dfb7b2c..d22042b 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out
@@ -129,17 +129,17 @@ Stage-0
     Stage-1
       Reducer 2 vectorized
       File Output Operator [FS_11]
-        Select Operator [SEL_10] (rows=1 width=372)
+        Select Operator [SEL_10] (rows=1 width=564)
           Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_9] (rows=1 width=372)
+          Group By Operator [GBY_9] (rows=1 width=564)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-              Group By Operator [GBY_7] (rows=1 width=372)
+              Group By Operator [GBY_7] (rows=1 width=564)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
-                TableScan [TS_0] (rows=1 width=372)
-                  default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+                TableScan [TS_0] (rows=1 width=564)
+                  default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:NONE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
 
 PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_orc_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
index 595dbab..be9cffa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
@@ -26,7 +26,6 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Optional;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.Path;
@@ -45,7 +44,6 @@ import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
 import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint;
 import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
 import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -58,8 +56,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PartitionTransform;
-import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
 import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.Explain;
@@ -67,7 +63,6 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ValidationUtility;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -745,6 +740,10 @@ public class CreateTableDesc implements DDLDesc, Serializable {
       tbl.getTTable().getParameters().putAll(getTblProps());
     }
 
+    if (getPartCols() != null) {
+      tbl.setPartCols(getPartCols());
+    }
+
     if (getNumBuckets() != -1) {
       tbl.setNumBuckets(getNumBuckets());
     }
@@ -805,26 +804,9 @@ public class CreateTableDesc implements DDLDesc, Serializable {
       }
     }
 
-    Optional<List<FieldSchema>> cols = Optional.ofNullable(getCols());
-    Optional<List<FieldSchema>> partCols = Optional.ofNullable(getPartCols());
-
-    if (storageHandler != null && storageHandler.alwaysUnpartitioned()) {
-      tbl.getSd().setCols(new ArrayList<>());
-      cols.ifPresent(c -> tbl.getSd().getCols().addAll(c));
-      if (partCols.isPresent() && !partCols.get().isEmpty()) {
-        // Add the partition columns to the normal columns and save the transform to the session state
-        tbl.getSd().getCols().addAll(partCols.get());
-        List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(partCols.get());
-        if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
-          throw new HiveException("Query state attached to Session state must be not null. " +
-                                      "Partition transform metadata cannot be saved.");
-        }
-      }
-    } else {
-      cols.ifPresent(c -> tbl.setFields(c));
-      partCols.ifPresent(c -> tbl.setPartCols(c));
+    if (getCols() != null) {
+      tbl.setFields(getCols());
     }
-
     if (getBucketCols() != null) {
       tbl.setBucketCols(getBucketCols());
     }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index f6b0bba..6398cf9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -1615,12 +1615,9 @@ public class DagUtils {
     } else {
       outputKlass = MROutput.class;
     }
-
-    // If there is a fileSink add a DataSink to the vertex
-    boolean hasFileSink = workUnit.getAllOperators().stream().anyMatch(o -> o instanceof FileSinkOperator);
     // final vertices need to have at least one output
     boolean endVertex = tezWork.getLeaves().contains(workUnit);
-    if (endVertex || hasFileSink) {
+    if (endVertex) {
       OutputCommitterDescriptor ocd = null;
       String committer = HiveConf.getVar(conf, ConfVars.TEZ_MAPREDUCE_OUTPUT_COMMITTER);
       if (committer != null && !committer.isEmpty()) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
index 8013ca0..117087a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
@@ -17,9 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec.TransformType;
-
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -29,26 +26,15 @@ import java.util.stream.Stream;
 
 public class PartitionTransform {
 
-  private static final Map<Integer, TransformType> TRANSFORMS = Stream
-      .of(new Object[][] { { HiveParser.TOK_IDENTITY, TransformType.IDENTITY },
-          { HiveParser.TOK_YEAR, TransformType.YEAR },
-          { HiveParser.TOK_MONTH, TransformType.MONTH },
-          { HiveParser.TOK_DAY, TransformType.DAY },
-          { HiveParser.TOK_HOUR, TransformType.HOUR },
-          { HiveParser.TOK_TRUNCATE, TransformType.TRUNCATE },
-          { HiveParser.TOK_BUCKET, TransformType.BUCKET } })
-      .collect(Collectors.toMap(e -> (Integer) e[0], e -> (TransformType) e[1]));
-
-  /**
-   * Get the identity transform specification based on the partition columns
-   * @param fields The partition column fields
-   * @return list of partition transforms
-   */
-  public static List<PartitionTransformSpec> getPartitionTransformSpec(List<FieldSchema> fields) {
-    return fields.stream()
-               .map(field -> new PartitionTransformSpec(field.getName(), TransformType.IDENTITY, Optional.empty()))
-               .collect(Collectors.toList());
-  }
+  private static final Map<Integer, PartitionTransformSpec.TransformType> TRANSFORMS = Stream
+      .of(new Object[][] { { HiveParser.TOK_IDENTITY, PartitionTransformSpec.TransformType.IDENTITY },
+          { HiveParser.TOK_YEAR, PartitionTransformSpec.TransformType.YEAR },
+          { HiveParser.TOK_MONTH, PartitionTransformSpec.TransformType.MONTH },
+          { HiveParser.TOK_DAY, PartitionTransformSpec.TransformType.DAY },
+          { HiveParser.TOK_HOUR, PartitionTransformSpec.TransformType.HOUR },
+          { HiveParser.TOK_TRUNCATE, PartitionTransformSpec.TransformType.TRUNCATE },
+          { HiveParser.TOK_BUCKET, PartitionTransformSpec.TransformType.BUCKET } })
+      .collect(Collectors.toMap(e -> (Integer) e[0], e -> (PartitionTransformSpec.TransformType) e[1]));
 
   /**
    * Parse the partition transform specifications from the AST Tree node.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
index 108a006..f97752c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
@@ -29,15 +29,6 @@ public class PartitionTransformSpec {
   private TransformType transformType;
   private Optional<Integer> transformParam;
 
-  public PartitionTransformSpec() {
-  }
-
-  public PartitionTransformSpec(String columnName, TransformType transformType, Optional<Integer> transformParam) {
-    this.columnName = columnName;
-    this.transformType = transformType;
-    this.transformParam = transformParam;
-  }
-
   public String getColumnName() {
     return columnName;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d9b5f81..c17fa4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7868,8 +7868,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     // and it is an insert overwrite or insert into table
     if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
         && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER)
-        && destinationTable != null
-        && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask())
+        && destinationTable != null && !destinationTable.isNonNative()
         && !destTableIsTemporary && !destTableIsMaterialization
         && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
       if (destType == QBMetaData.DEST_TABLE) {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java
index 01df5bc..5bed3d5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java
@@ -122,7 +122,6 @@ public class TestTezOutputCommitter {
     conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
         "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
     conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
-    conf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
     conf.setInt("tez.am.task.max.failed.attempts", MAX_TASK_ATTEMPTS);
     conf.set("mapred.output.committer.class", committerClass);