You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/15 20:48:28 UTC

[impala] 02/02: IMPALA-8842 part 1: (Hive3) Use 'engine' field in HMS stat API

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 60ac7c07511015de130dbd01885421459e96bc3e
Author: Attila Jeges <at...@cloudera.com>
AuthorDate: Tue Aug 6 20:55:13 2019 +0200

    IMPALA-8842 part 1: (Hive3) Use 'engine' field in HMS stat API
    
    HIVE-22046 added 'engine' column to TAB_COL_STATS and PART_COL_STATS
    HMS tables. The new column is used to differentiate among column stats
    computed by different engines. The related HMS API calls were changed
    accordingly.
    
    This change is Step 2 in a series of steps to coordinate the
    introduction of HMS API changes to Hive3 and Impala. Step 4 is also
    Impala related, it will be covered in IMPALA-8842 part 2.
    
    The steps are as follows:
    
    1. Change in Hive3.
    We push new APIs so Impala can use them. New APIs will simply call old
    existing methods so there should not be any change of functionality
    there. Since there were many incompatible changes, new APIs are tagged
    method_name_V2.
    
    2. Change in Impala.
    Push changes to use new methods *V2.
    
    3. Change in Hive3.
    Push patch with complete functionality. *V2 methods contains the new
    logic. The old existing methods are not used anymore by Impala at this
    point, hence they can be removed. For every method_name_V2, I will
    create a corresponding method method_name that calls the former one.
    
    4. Change in Impala
    Replace *V2 calls by *.
    
    5. Change in Hive3.
    Remove *V2 methods from API.
    
    This change also adds exclusions to fe/pom.xml and shaded-deps/pom.xml
    to work around the Hive3 Hadoop2 dependency.
    
    TESTING:
    Only Step 1 has been merged in into Hive3 at this point, which means
    that there's no change in Impala and Hive3 functionality yet.
    
    Change-Id: I9a73f5eeac8e84d63b22aaed5dfbcd8ea39f0af4
    Reviewed-on: http://gerrit.cloudera.org:8080/14043
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               | 12 +++---
 fe/pom.xml                                         |  4 ++
 .../org/apache/impala/compat/MetastoreShim.java    | 33 ++++++++++++++++
 .../org/apache/impala/compat/MetastoreShim.java    | 44 ++++++++++++++++++++++
 .../main/java/org/apache/impala/catalog/Table.java |  3 +-
 .../impala/catalog/local/DirectMetaProvider.java   |  3 +-
 .../apache/impala/service/CatalogOpExecutor.java   |  7 ++--
 .../org/apache/impala/catalog/CatalogTest.java     | 11 +++---
 shaded-deps/pom.xml                                |  6 +++
 9 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index d795748..356eceb 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -168,19 +168,19 @@ fi
 export IMPALA_TOOLCHAIN_HOST
 export CDH_MAJOR_VERSION=6
 export CDH_BUILD_NUMBER=1173663
-export CDP_BUILD_NUMBER=1318335
+export CDP_BUILD_NUMBER=1352353
 export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
-export CDP_HADOOP_VERSION=3.1.1.7.0.0.0-365
+export CDP_HADOOP_VERSION=3.1.1.7.1.0.0-33
 export IMPALA_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_SENTRY_VERSION=2.1.0-cdh6.x-SNAPSHOT
-export IMPALA_RANGER_VERSION=1.2.0.7.0.0.0-365
+export IMPALA_RANGER_VERSION=1.2.0.7.1.0.0-33
 export IMPALA_PARQUET_VERSION=1.9.0-cdh6.x-SNAPSHOT
 export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
 export IMPALA_KUDU_JAVA_VERSION=1.10.0-cdh6.x-SNAPSHOT
 export CDH_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT
-export CDP_HIVE_VERSION=3.1.0.7.0.0.0-365
+export CDP_HIVE_VERSION=3.1.0.7.1.0.0-33
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
 # which will be substituted for the CDH platform label in bootstrap_toolchain.py
@@ -207,8 +207,8 @@ if $USE_CDP_HIVE; then
   # When USE_CDP_HIVE is set we use the CDP hive version to build as well as deploy in
   # the minicluster
   export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
-  export IMPALA_TEZ_VERSION=0.9.1.7.0.0.0-365
-  export IMPALA_KNOX_VERSION=1.0.0.7.0.0.0-365
+  export IMPALA_TEZ_VERSION=0.9.1.7.1.0.0-33
+  export IMPALA_KNOX_VERSION=1.0.0.7.1.0.0-33
   export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
   export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${CDP_HADOOP_VERSION}/"
 else
diff --git a/fe/pom.xml b/fe/pom.xml
index 5162b7a..945547d 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -999,6 +999,10 @@ under the License.
               <groupId>org.apache.hive</groupId>
               <artifactId>hive-metastore</artifactId>
             </exclusion>
+            <exclusion>
+              <groupId>org.apache.hive.shims</groupId>
+              <artifactId>hive-shims-0.20</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
         <dependency>
diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
index 0c02cc1..b556774 100644
--- a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
@@ -35,9 +35,14 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.LockComponent;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -112,6 +117,34 @@ public class MetastoreShim {
   }
 
   /**
+   * Wrapper around IMetaStoreClient.getTableColumnStatistics() to deal with added
+   * arguments.
+   */
+  public static List<ColumnStatisticsObj> getTableColumnStatistics(
+      IMetaStoreClient client, String dbName, String tableName, List<String> colNames)
+      throws NoSuchObjectException, MetaException, TException {
+    return client.getTableColumnStatistics(dbName, tableName, colNames);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.deleteTableColumnStatistics() to deal with added
+   * arguments.
+   */
+  public static boolean deleteTableColumnStatistics(IMetaStoreClient client,
+      String dbName, String tableName, String colName)
+      throws NoSuchObjectException, MetaException, InvalidObjectException, TException,
+             InvalidInputException {
+    return client.deleteTableColumnStatistics(dbName, tableName, colName);
+  }
+
+  /**
+   * Wrapper around ColumnStatistics c'tor to deal with the added engine property.
+   */
+  public static ColumnStatistics createNewHiveColStats() {
+    return new ColumnStatistics();
+  }
+
+  /**
    * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
    * arguments.
    */
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index 7abdf22..44046b6 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -44,7 +44,11 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.LockRequestBuilder;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.LockComponent;
 import org.apache.hadoop.hive.metastore.api.LockRequest;
@@ -52,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.LockResponse;
 import org.apache.hadoop.hive.metastore.api.LockState;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -116,6 +121,12 @@ public class MetastoreShim {
   private static final int LOCK_RETRY_WAIT_SECONDS = 3;
 
   /**
+   * Constant variable that stores engine value needed to store / access
+   * Impala column statistics.
+   */
+  protected static final String IMPALA_ENGINE = "impala";
+
+  /**
    * Wrapper around MetaStoreUtils.validateName() to deal with added arguments.
    */
   public static boolean validateName(String name) {
@@ -170,6 +181,39 @@ public class MetastoreShim {
     client.alter_partitions(dbName, tblName, partitions, null,
          validWriteIds, tblWriteId);
   }
+
+  /**
+   * Wrapper around IMetaStoreClient.getTableColumnStatistics() to deal with added
+   * arguments.
+   */
+  public static List<ColumnStatisticsObj> getTableColumnStatistics(
+      IMetaStoreClient client, String dbName, String tableName, List<String> colNames)
+      throws NoSuchObjectException, MetaException, TException {
+    return client.getTableColumnStatisticsV2(dbName, tableName, colNames,
+        /*engine*/IMPALA_ENGINE);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.deleteTableColumnStatistics() to deal with added
+   * arguments.
+   */
+  public static boolean deleteTableColumnStatistics(IMetaStoreClient client,
+      String dbName, String tableName, String colName)
+      throws NoSuchObjectException, MetaException, InvalidObjectException, TException,
+             InvalidInputException {
+    return client.deleteTableColumnStatisticsV2(dbName, tableName, colName,
+        /*engine*/IMPALA_ENGINE);
+  }
+
+  /**
+   * Wrapper around ColumnStatistics c'tor to deal with the added engine property.
+   */
+  public static ColumnStatistics createNewHiveColStats() {
+    ColumnStatistics colStats = new ColumnStatistics();
+    colStats.setEngine(IMPALA_ENGINE);
+    return colStats;
+  }
+
   /**
    * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
    * arguments.
diff --git a/fe/src/main/java/org/apache/impala/catalog/Table.java b/fe/src/main/java/org/apache/impala/catalog/Table.java
index b0264c2..24a6b95 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Table.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Table.java
@@ -272,7 +272,8 @@ public abstract class Table extends CatalogObjectImpl implements FeTable {
     List<String> colNames = getColumnNamesWithHmsStats();
 
     try {
-      colStats = client.getTableColumnStatistics(db_.getName(), name_, colNames);
+      colStats = MetastoreShim.getTableColumnStatistics(client, db_.getName(), name_,
+          colNames);
     } catch (Exception e) {
       LOG.warn("Could not load column statistics for: " + getFullName(), e);
       return;
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
index 83a5ea8..c732d8b 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
@@ -278,7 +278,8 @@ class DirectMetaProvider implements MetaProvider {
       List<String> colNames) throws TException {
     Preconditions.checkArgument(table instanceof TableMetaRefImpl);
     try (MetaStoreClient c = msClientPool_.getClient()) {
-      return c.getHiveClient().getTableColumnStatistics(
+      return MetastoreShim.getTableColumnStatistics(
+          c.getHiveClient(),
           ((TableMetaRefImpl)table).dbName_,
           ((TableMetaRefImpl)table).tableName_,
           colNames);
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index cfc5bca..5a19f0f 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -941,10 +941,9 @@ public class CatalogOpExecutor {
     }
 
     // Update column stats.
-    ColumnStatistics colStats = null;
     numUpdatedColumns.setRef(0L);
     if (params.isSetColumn_stats()) {
-      colStats = createHiveColStats(params, table);
+      ColumnStatistics colStats = createHiveColStats(params, table);
       if (colStats.getStatsObjSize() > 0) {
         try(MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
           msClient.getHiveClient().updateTableColumnStatistics(colStats);
@@ -1071,7 +1070,7 @@ public class CatalogOpExecutor {
       TAlterTableUpdateStatsParams params, Table table) {
     Preconditions.checkState(params.isSetColumn_stats());
     // Collection of column statistics objects to be returned.
-    ColumnStatistics colStats = new ColumnStatistics();
+    ColumnStatistics colStats = MetastoreShim.createNewHiveColStats();
     colStats.setStatsDesc(
         new ColumnStatisticsDesc(true, table.getDb().getName(), table.getName()));
     // Generate Hive column stats objects from the update stats params.
@@ -1364,7 +1363,7 @@ public class CatalogOpExecutor {
         if (!col.getStats().hasStats()) continue;
 
         try {
-          msClient.getHiveClient().deleteTableColumnStatistics(
+          MetastoreShim.deleteTableColumnStatistics(msClient.getHiveClient(),
               table.getDb().getName(), table.getName(), col.getName());
           ++numColsUpdated;
         } catch (NoSuchObjectException e) {
diff --git a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
index 73c47f7..563348e 100644
--- a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
@@ -54,6 +54,7 @@ import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.Reference;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.testutil.CatalogServiceTestCatalog;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TFunctionBinaryType;
@@ -560,9 +561,9 @@ public class CatalogTest {
     try (MetaStoreClient client = catalog_.getMetaStoreClient()) {
       // Load some string stats data and use it to update the stats of different
       // typed columns.
-      ColumnStatisticsData stringColStatsData = client.getHiveClient()
-          .getTableColumnStatistics("functional", "alltypesagg",
-           Lists.newArrayList("string_col")).get(0).getStatsData();
+      ColumnStatisticsData stringColStatsData = MetastoreShim.getTableColumnStatistics(
+          client.getHiveClient(), "functional", "alltypesagg",
+          Lists.newArrayList("string_col")).get(0).getStatsData();
 
       assertTrue(!table.getColumn("int_col").updateStats(stringColStatsData));
       assertStatsUnknown(table.getColumn("int_col"));
@@ -574,8 +575,8 @@ public class CatalogTest {
       assertStatsUnknown(table.getColumn("bool_col"));
 
       // Do the same thing, but apply bigint stats to a string column.
-      ColumnStatisticsData bigIntCol = client.getHiveClient()
-          .getTableColumnStatistics("functional", "alltypes",
+      ColumnStatisticsData bigIntCol = MetastoreShim.getTableColumnStatistics(
+          client.getHiveClient(), "functional", "alltypes",
           Lists.newArrayList("bigint_col")).get(0).getStatsData();
       assertTrue(!table.getColumn("string_col").updateStats(bigIntCol));
       assertStatsUnknown(table.getColumn("string_col"));
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 579758e..5870894 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -40,6 +40,12 @@ the same dependencies
       <groupId>org.apache.hive</groupId>
       <artifactId>hive-exec</artifactId>
       <version>${hive.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hive.shims</groupId>
+          <artifactId>hive-shims-0.20</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>
   <build>