You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/13 21:49:36 UTC

[impala] 01/02: IMPALA-8839: Remove COLUMN_STATS_ACCURATE from properties

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dfae1aea540edf75061fb5ba8b44a49b6cb93590
Author: Yongzhi Chen <yc...@cloudera.com>
AuthorDate: Wed Aug 7 23:07:43 2019 -0400

    IMPALA-8839: Remove COLUMN_STATS_ACCURATE from properties
    
    Hive depends on property COLUMN_STATS_ACCURATE to tell if the
    stored statistics accurate. After Impala inserts data, it does
    not set statistics values up-to-date(for example numRows).
    Impala should unset COLUMN_STATS_ACCURATE to tell Hive the
    stored stats are no longer accurate.
    The patch impletes:
    After Impala insert data,
    Remove COLUMN_STATS_ACCURATE from table properties if it exists
    Remove COLUMN_STATS_ACCURATE from partition params if it exists
    Add helper methods to handle alter table/partition for acid
    tables.
    
    Implements the stats changes above for both acid/non-acid tables.
    
    Tests:
    Manual tests.
    Run core tests.
    Add ee tests to test interop with Hive for acid/external tables.
    
    Change-Id: I13f4a77022a7112e10a07314359f927eae083deb
    Reviewed-on: http://gerrit.cloudera.org:8080/14037
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/compat/MetastoreShim.java    | 24 ++++++
 .../org/apache/impala/compat/MetastoreShim.java    | 92 ++++++++++++++++++++-
 .../apache/impala/service/CatalogOpExecutor.java   | 94 +++++++++++++++++++++-
 .../QueryTest/acid-clear-statsaccurate.test        | 51 ++++++++++++
 .../queries/QueryTest/clear-statsaccurate.test     | 45 +++++++++++
 tests/query_test/test_acid.py                      | 39 ++++++++-
 6 files changed, 340 insertions(+), 5 deletions(-)

diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
index 96bd997..0c02cc1 100644
--- a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
@@ -74,6 +74,14 @@ public class MetastoreShim {
   }
 
   /**
+   * Hive-3 only function
+   */
+  public static void alterTableWithTransaction(IMetaStoreClient client,
+      Table tbl, long txnId) {
+    throw new UnsupportedOperationException("alterTableWithTransaction");
+  }
+
+  /**
    * Wrapper around IMetaStoreClient.alter_partition() to deal with added
    * arguments.
    */
@@ -93,6 +101,16 @@ public class MetastoreShim {
     client.alter_partitions(dbName, tableName, partitions, null);
   }
 
+
+ /**
+  * Hive-3 only function
+  */
+  public static void alterPartitionsWithTransaction(IMetaStoreClient client,
+      String dbName, String tblName, List<Partition> partitions,
+      long tblWriteId, long txnId) {
+    throw new UnsupportedOperationException("alterTableWithTransaction");
+  }
+
   /**
    * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
    * arguments.
@@ -280,6 +298,12 @@ public class MetastoreShim {
 
   /**
    *  Hive-3 only function
+   */
+  public static void setWriteIdForMSPartition(Partition partition, long writeId) {
+  }
+
+  /**
+   *  Hive-3 only function
    *  -1 means undefined
    */
   public static long getWriteIdFromMSTable(Table msTbl) {
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index 18cd0a7..7abdf22 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -30,11 +30,14 @@ import com.google.common.collect.ImmutableMap;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.EnumSet;
 import java.util.List;
 
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
@@ -52,6 +55,7 @@ import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
 import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
 import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.messaging.AlterTableMessage;
@@ -83,7 +87,7 @@ import org.apache.log4j.Logger;
 import org.apache.thrift.TException;
 
 import com.google.common.base.Preconditions;
-
+import com.google.common.collect.Lists;
 /**
  * A wrapper around some of Hive's Metastore API's to abstract away differences
  * between major versions of Hive. This implements the shimmed methods for Hive 3.
@@ -119,6 +123,21 @@ public class MetastoreShim {
   }
 
   /**
+   * Wrapper around IMetaStoreClient.alter_table with validWriteIds as a param.
+   */
+  public static void alterTableWithTransaction(IMetaStoreClient client,
+     Table tbl, long txnId)
+     throws InvalidOperationException, MetaException, TException {
+    // Get ValidWriteIdList to pass Hive verify when set table property
+    // COLUMN_STATS_ACCURATE
+    String validWriteIds = getValidWriteIdListInTxn(client, tbl.getDbName(),
+        tbl.getTableName(), txnId);
+    client.alter_table(null, tbl.getDbName(), tbl.getTableName(),
+        tbl, null, validWriteIds);
+  }
+
+
+  /**
    * Wrapper around IMetaStoreClient.alter_partition() to deal with added
    * arguments.
    */
@@ -139,6 +158,19 @@ public class MetastoreShim {
   }
 
   /**
+   * Wrapper around IMetaStoreClient.alter_partitions with transaction information
+   */
+  public static void alterPartitionsWithTransaction(IMetaStoreClient client,
+    String dbName, String tblName, List<Partition> partitions, long tblWriteId,
+    long txnId) throws InvalidOperationException, MetaException, TException {
+    // Get ValidWriteIdList to pass Hive verify  when set
+    // property(COLUMN_STATS_ACCURATE). Correct validWriteIdList is also needed
+    // to commit the alter partitions operation in hms side.
+    String validWriteIds = getValidWriteIdListInTxn(client, dbName, tblName, txnId);
+    client.alter_partitions(dbName, tblName, partitions, null,
+         validWriteIds, tblWriteId);
+  }
+  /**
    * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
    * arguments.
    */
@@ -445,6 +477,28 @@ public class MetastoreShim {
     return new ValidReaderWriteIdList(validWriteIds);
   }
 
+
+  /**
+   * Get validWriteIds in string with txnId and table name
+   * arguments.
+   */
+  private static String getValidWriteIdListInTxn(IMetaStoreClient client, String dbName,
+      String tblName, long txnId)
+      throws InvalidOperationException, MetaException, TException {
+    ValidTxnList txns = client.getValidTxns(txnId);
+    String tableFullName = dbName + "." + tblName;
+    List<TableValidWriteIds> writeIdsObj = client.getValidWriteIds(
+        Lists.newArrayList(tableFullName), txns.toString());
+    ValidTxnWriteIdList validTxnWriteIdList = new ValidTxnWriteIdList(txnId);
+    for (TableValidWriteIds tableWriteIds : writeIdsObj) {
+      validTxnWriteIdList.addTableValidWriteIdList(
+          createValidReaderWriteIdList(tableWriteIds));
+    }
+    String validWriteIds =
+        validTxnWriteIdList.getTableValidWriteIdList(tableFullName).writeToString();
+    return validWriteIds;
+  }
+
   /**
    * Wrapper around HMS Partition object to get writeID
    * WriteID is introduced in ACID 2
@@ -456,6 +510,14 @@ public class MetastoreShim {
   }
 
   /**
+   * Set write ID to HMS partition.
+   */
+  public static void setWriteIdForMSPartition(Partition partition, long writeId) {
+    Preconditions.checkNotNull(partition);
+    partition.setWriteId(writeId);
+  }
+
+  /**
    * Wrapper around HMS Table object to get writeID
    * Per table writeId is introduced in ACID 2
    * It is used to detect changes of the table
@@ -720,4 +782,32 @@ public class MetastoreShim {
   public static long getMajorVersion() {
     return MAJOR_VERSION;
   }
+
+    /**
+     * Borrowed code from hive.
+     * This assumes that the caller intends to
+     * read the files, and thus treats both open and aborted write ids as invalid.
+     * @param tableWriteIds valid write ids for the given table from the metastore
+     * @return a valid write IDs list for the input table
+     */
+  private static ValidReaderWriteIdList createValidReaderWriteIdList(
+      TableValidWriteIds tableWriteIds) {
+     String fullTableName = tableWriteIds.getFullTableName();
+     long highWater = tableWriteIds.getWriteIdHighWaterMark();
+     List<Long> invalids = tableWriteIds.getInvalidWriteIds();
+     BitSet abortedBits = BitSet.valueOf(tableWriteIds.getAbortedBits());
+     long[] exceptions = new long[invalids.size()];
+     int i = 0;
+     for (long writeId : invalids) {
+       exceptions[i++] = writeId;
+     }
+     if (tableWriteIds.isSetMinOpenWriteId()) {
+       return new ValidReaderWriteIdList(fullTableName, exceptions, abortedBits,
+         highWater, tableWriteIds.getMinOpenWriteId());
+     } else {
+       return new ValidReaderWriteIdList(fullTableName, exceptions, abortedBits,
+         highWater);
+     }
+  }
+
 }
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 643a551..cfc5bca 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -3738,6 +3738,12 @@ public class CatalogOpExecutor {
     }
     final Timer.Context context
         = table.getMetrics().getTimer(HdfsTable.CATALOG_UPDATE_DURATION_METRIC).time();
+
+    long transactionId = -1;
+    if (update.isSetTransaction_id()) transactionId = update.getTransaction_id();
+    long tableWriteId = -1;
+    boolean isAcid = false;
+
     try {
       // Get new catalog version for table in insert.
       long newCatalogVersion = catalog_.incrementAndGetCatalogVersion();
@@ -3760,6 +3766,8 @@ public class CatalogOpExecutor {
       Collection<? extends FeFsPartition> parts =
           FeCatalogUtils.loadAllPartitions((HdfsTable) table);
       List<FeFsPartition> affectedExistingPartitions = new ArrayList<>();
+      List<org.apache.hadoop.hive.metastore.api.Partition> hmsPartitionsStatsUnset =
+          Lists.newArrayList();
       if (table.getNumClusteringCols() > 0) {
         // Set of all partition names targeted by the insert that need to be created
         // in the Metastore (partitions that do not currently exist in the catalog).
@@ -3769,16 +3777,35 @@ public class CatalogOpExecutor {
         HashSet<String> partsToCreate =
             Sets.newHashSet(update.getCreated_partitions());
         partsToLoadMetadata = Sets.newHashSet(partsToCreate);
+        if (AcidUtils.isTransactionalTable(table.getMetaStoreTable().getParameters())) {
+          isAcid = true;
+          try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+            tableWriteId = MetastoreShim.allocateTableWriteId(
+                msClient.getHiveClient(), transactionId,
+                table.getDb().getName(), table.getName());
+          }
+        }
         for (FeFsPartition partition: parts) {
           // TODO: In the BE we build partition names without a trailing char. In FE
           // we build partition name with a trailing char. We should make this
           // consistent.
           String partName = partition.getPartitionName() + "/";
-
           // Attempt to remove this partition name from partsToCreate. If remove
           // returns true, it indicates the partition already exists.
           if (partsToCreate.remove(partName)) {
             affectedExistingPartitions.add(partition);
+            // For existing partitions, we need to unset column_stats_accurate to
+            // tell hive the statistics is not accurate any longer.
+            if (partition.getParameters() != null &&  partition.getParameters()
+                .containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+              org.apache.hadoop.hive.metastore.api.Partition hmsPartition =
+                  ((HdfsPartition) partition).toHmsPartition();
+              hmsPartition.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
+              if (isAcid) {
+                MetastoreShim.setWriteIdForMSPartition(hmsPartition, tableWriteId);
+              }
+              hmsPartitionsStatsUnset.add(hmsPartition);
+            }
             if (partition.isMarkedCached()) {
               // The partition was targeted by the insert and is also cached. Since
               // data was written to the partition, a watch needs to be placed on the
@@ -3832,6 +3859,7 @@ public class CatalogOpExecutor {
                   try {
                     cacheDirIds.add(HdfsCachingUtil.submitCachePartitionDirective(
                         part, cachePoolName, cacheReplication));
+                    StatsSetupConst.setBasicStatsState(part.getParameters(), "false");
                     cachedHmsParts.add(part);
                   } catch (ImpalaRuntimeException e) {
                     String msg = String.format("Partition %s.%s(%s): State: Not " +
@@ -3872,11 +3900,19 @@ public class CatalogOpExecutor {
             throw new InternalException("Error adding partitions", e);
           }
         }
+
+        // Unset COLUMN_STATS_ACCURATE by calling alter partition to hms.
+        if (!hmsPartitionsStatsUnset.isEmpty()) {
+          unsetPartitionsColStats(table.getMetaStoreTable(), hmsPartitionsStatsUnset,
+              tableWriteId, transactionId);
+        }
       } else {
         // For non-partitioned table, only single part exists
         FeFsPartition singlePart = Iterables.getOnlyElement((List<FeFsPartition>) parts);
         affectedExistingPartitions.add(singlePart);
+
       }
+      unsetTableColStats(table.getMetaStoreTable(), transactionId);
       // Submit the watch request for the given cache directives.
       if (!cacheDirIds.isEmpty()) {
         catalog_.watchCacheDirs(cacheDirIds, tblName.toThrift(),
@@ -3894,6 +3930,7 @@ public class CatalogOpExecutor {
         response.getResult().setStatus(
             new TStatus(TErrorCode.OK, new ArrayList<String>()));
       }
+
       // Commit transactional inserts on success. We don't abort the transaction
       // here in case of failures, because the client, i.e. query coordinator, is
       // always responsible for aborting transactions when queries hit errors.
@@ -4262,4 +4299,59 @@ public class CatalogOpExecutor {
       MetastoreShim.commitTransaction(msClient.getHiveClient(), transactionId);
     }
   }
+
+  /**
+   * Update table properties to remove the COLUMN_STATS_ACCURATE entry if it exists.
+   */
+  private void unsetTableColStats(org.apache.hadoop.hive.metastore.api.Table msTable,
+      long txnId) throws ImpalaRuntimeException{
+    Map<String, String> params = msTable.getParameters();
+    if (params != null && params.containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+      params.remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
+      // In Hive 2, some alter table can drop stats, see HIVE-15653, set following
+      // property to true to avoid this happen.
+      // TODO: More research, and remove this property if Hive 3 fixed the problem.
+      msTable.putToParameters(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
+      try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+        try {
+          if (AcidUtils.isTransactionalTable(params)) {
+            MetastoreShim.alterTableWithTransaction(msClient.getHiveClient(),
+                msTable, txnId);
+          } else {
+            msClient.getHiveClient().alter_table(msTable.getDbName(),
+                msTable.getTableName(), msTable);
+          }
+        } catch (TException te) {
+          new ImpalaRuntimeException(
+              String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_table"), te);
+        }
+      }
+    }
+  }
+
+  /**
+   * Update partitions properties to remove the COLUMN_STATS_ACCURATE entry from HMS.
+   * This method assumes the partitions in the input hmsPartitionsStatsUnset already
+   * had the COLUMN_STATS_ACCURATE removed from their properties.
+   */
+  private void unsetPartitionsColStats(org.apache.hadoop.hive.metastore.api.Table msTable,
+      List<org.apache.hadoop.hive.metastore.api.Partition> hmsPartitionsStatsUnset,
+      long writeId, long txnId) throws ImpalaRuntimeException{
+    try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+      try {
+        if (AcidUtils.isTransactionalTable( msTable.getParameters())) {
+          MetastoreShim.alterPartitionsWithTransaction(
+              msClient.getHiveClient(), msTable.getDbName(), msTable.getTableName(),
+              hmsPartitionsStatsUnset,  writeId, txnId);
+        } else {
+          MetastoreShim.alterPartitions(msClient.getHiveClient(), msTable.getDbName(),
+              msTable.getTableName(), hmsPartitionsStatsUnset);
+        }
+      } catch (TException te) {
+        new ImpalaRuntimeException(
+            String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_partitions"), te);
+      }
+    }
+  }
+
 }
diff --git a/testdata/workloads/functional-query/queries/QueryTest/acid-clear-statsaccurate.test b/testdata/workloads/functional-query/queries/QueryTest/acid-clear-statsaccurate.test
new file mode 100644
index 0000000..09ea3f2
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/acid-clear-statsaccurate.test
@@ -0,0 +1,51 @@
+====
+---- HIVE_QUERY
+use $DATABASE;
+create table insertonly_nopart_colstats (x int) tblproperties (
+  'transactional'='true',
+  'transactional_properties'='insert_only');
+insert into insertonly_nopart_colstats values (1);
+analyze table insertonly_nopart_colstats compute statistics for columns;
+create table insertonly_nopart_colstatschg (x int) tblproperties (
+  'transactional'='true',
+  'transactional_properties'='insert_only');
+insert into insertonly_nopart_colstatschg values (1);
+analyze table insertonly_nopart_colstatschg compute statistics for columns;
+create table insertonly_part_colstats (x int) partitioned by (ds string) tblproperties (
+  'transactional'='true',
+  'transactional_properties'='insert_only');
+alter table insertonly_part_colstats add partition (ds='2010-01-01');
+alter table insertonly_part_colstats add partition (ds='2010-01-02');
+insert into insertonly_part_colstats partition (ds='2010-01-01') values (1);
+analyze table insertonly_part_colstats partition(ds='2010-01-01')
+    compute statistics for columns;
+====
+---- QUERY
+invalidate metadata insertonly_nopart_colstats;
+show create table insertonly_nopart_colstats;
+---- RESULTS
+row_regex: .*COLUMN_STATS_ACCURATE.*
+====
+---- QUERY
+invalidate metadata insertonly_nopart_colstatschg;
+insert into insertonly_nopart_colstatschg values (2);
+show create table insertonly_nopart_colstatschg;
+---- RESULTS
+row_regex: (?!.*COLUMN_STATS_ACCURATE)
+====
+---- QUERY
+select x from insertonly_nopart_colstatschg;
+---- RESULTS
+1
+2
+====
+---- QUERY
+invalidate metadata insertonly_part_colstats;
+insert into insertonly_part_colstats partition (ds='2010-01-01') values (2);
+select x from insertonly_part_colstats where ds='2010-01-01';
+---- RESULTS
+1
+2
+---- TYPES
+int
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/clear-statsaccurate.test b/testdata/workloads/functional-query/queries/QueryTest/clear-statsaccurate.test
new file mode 100644
index 0000000..7ccebb0
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/clear-statsaccurate.test
@@ -0,0 +1,45 @@
+====
+---- HIVE_QUERY
+use $DATABASE;
+create external table ext_nopart_colstats (x int);
+insert into ext_nopart_colstats values (1);
+analyze table ext_nopart_colstats compute statistics for columns;
+create external table ext_nopart_colstatschg (x int);
+insert into ext_nopart_colstatschg values (1);
+analyze table ext_nopart_colstatschg compute statistics for columns;
+create external table ext_part_colstats (x int) partitioned by (ds string);
+alter table ext_part_colstats add partition (ds='2010-01-01');
+alter table ext_part_colstats add partition (ds='2010-01-02');
+insert into ext_part_colstats partition (ds='2010-01-01') values (1);
+analyze table ext_part_colstats partition(ds='2010-01-01')
+    compute statistics for columns;
+====
+---- QUERY
+invalidate metadata ext_nopart_colstats;
+show create table ext_nopart_colstats;
+---- RESULTS
+row_regex: .*COLUMN_STATS_ACCURATE.*
+====
+---- QUERY
+invalidate metadata ext_nopart_colstatschg;
+insert into ext_nopart_colstatschg values (2);
+show create table ext_nopart_colstatschg;
+---- RESULTS
+row_regex: (?!.*COLUMN_STATS_ACCURATE)
+====
+---- QUERY
+select x from ext_nopart_colstatschg;
+---- RESULTS
+1
+2
+====
+---- QUERY
+invalidate metadata ext_part_colstats;
+insert into ext_part_colstats partition (ds='2010-01-01') values (2);
+select x from ext_part_colstats where ds='2010-01-01';
+---- RESULTS
+1
+2
+---- TYPES
+int
+====
diff --git a/tests/query_test/test_acid.py b/tests/query_test/test_acid.py
index 8b3ef75..e944f55 100644
--- a/tests/query_test/test_acid.py
+++ b/tests/query_test/test_acid.py
@@ -24,8 +24,6 @@ from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import (SkipIfHive2, SkipIfCatalogV2, SkipIfS3, SkipIfABFS,
                                SkipIfADLS, SkipIfIsilon, SkipIfLocal)
 from tests.common.test_dimensions import create_single_exec_option_dimension
-
-
 class TestAcid(ImpalaTestSuite):
   @classmethod
   def get_workload(self):
@@ -84,7 +82,42 @@ class TestAcid(ImpalaTestSuite):
   @SkipIfLocal.hive
   def test_acid_profile(self, vector, unique_database):
     self.run_test_case('QueryTest/acid-profile', vector, use_db=unique_database)
-# TODO(todd): further tests to write:
+
+  @SkipIfHive2.acid
+  @SkipIfS3.hive
+  @SkipIfABFS.hive
+  @SkipIfADLS.hive
+  @SkipIfIsilon.hive
+  @SkipIfLocal.hive
+  def test_acid_insert_statschg(self, vector, unique_database):
+    self.run_test_case('QueryTest/acid-clear-statsaccurate',
+        vector, use_db=unique_database)
+    result = self.run_stmt_in_hive("select count(*) from {0}.{1}".format(unique_database,
+        "insertonly_nopart_colstatschg"))
+    # The return from hive should look like '_c0\n2\n'
+    assert "2" in result
+    result = self.run_stmt_in_hive("select count(*) from {0}.{1} where ds='2010-01-01'"
+        .format(unique_database, "insertonly_part_colstats"))
+    assert "2" in result
+
+  @SkipIfS3.hive
+  @SkipIfABFS.hive
+  @SkipIfADLS.hive
+  @SkipIfIsilon.hive
+  @SkipIfLocal.hive
+  def test_ext_statschg(self, vector, unique_database):
+    self.run_test_case('QueryTest/clear-statsaccurate',
+        vector, use_db=unique_database)
+    result = self.run_stmt_in_hive("select count(*) from {0}.{1}".format(unique_database,
+        "ext_nopart_colstatschg"))
+    # Hive should return correct row count after Impala insert.
+    # The return from hive should look like '_c0\n2\n'
+    assert "2" in result
+    result = self.run_stmt_in_hive("select count(*) from {0}.{1} where ds='2010-01-01'"
+        .format(unique_database, "ext_part_colstats"))
+    assert "2" in result
+
+#  TODO(todd): further tests to write:
 #  TRUNCATE, once HIVE-20137 is implemented.
 #  INSERT OVERWRITE with empty result set, once HIVE-21750 is fixed.
 #  Negative test for LOAD DATA INPATH and all other SQL that we don't support.