You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2019/06/06 22:37:00 UTC

[impala] branch master updated: IMPALA-8506: Support RENAME TABLE statement with Kudu/HMS integration

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new b7077ff  IMPALA-8506: Support RENAME TABLE statement with Kudu/HMS integration
b7077ff is described below

commit b7077ffd524e31b768e1e8a28a907f995cdeb058
Author: Hao Hao <ha...@cloudera.com>
AuthorDate: Wed May 22 11:48:09 2019 -0700

    IMPALA-8506: Support RENAME TABLE statement with Kudu/HMS integration
    
    This commit intends to support the actual handling of ALTER/RENAME TABLE
    DDL for managed Kudu tables with Kudu's integration with the Hive
    Metastore. However, currently Kudu is considered as the source of
    truth of the table schema, so when ALTER TABLE (ADD/DROP COLUMN/RANGE_PARTITION),
    Impala always directly alters/loads the Kudu tables. Thus, this commit
    only updates RENAME TABLE DDL, so that after the table is renamed in the
    Kudu, relies on Kudu to rename the table in the HMS.
    
    Change-Id: If7155e0b385b8ad81eda0a84277bc85171a88269
    Reviewed-on: http://gerrit.cloudera.org:8080/13409
    Reviewed-by: Grant Henke <gr...@apache.org>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Reviewed-by: Thomas Marshall <tm...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/service/CatalogOpExecutor.java   | 41 +++++++++++++++-------
 .../queries/QueryTest/kudu_alter.test              |  4 +++
 .../{kudu_alter.test => kudu_hms_alter.test}       | 16 +++++----
 tests/custom_cluster/test_kudu.py                  |  7 +++-
 tests/query_test/test_kudu.py                      |  2 --
 5 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 3671f0d..c16bc38 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -1513,6 +1513,7 @@ public class CatalogOpExecutor {
           throws ImpalaRuntimeException {
     // Check if Kudu's integration with the Hive Metastore is enabled, and validate
     // the configuration.
+    Preconditions.checkState(KuduTable.isKuduTable(msTbl));
     String masterHosts = msTbl.getParameters().get(KuduTable.KEY_MASTER_HOSTS);
     String hmsUris;
     try {
@@ -2583,17 +2584,30 @@ public class CatalogOpExecutor {
     msTbl.setDbName(newTableName.getDb());
     msTbl.setTableName(newTableName.getTbl());
 
-    // If oldTbl is a managed Kudu table, rename the underlying Kudu table
-    if (oldTbl instanceof KuduTable && !Table.isExternalTable(msTbl)) {
+    // If oldTbl is a managed Kudu table, rename the underlying Kudu table.
+    boolean isManagedKuduTable = (oldTbl instanceof KuduTable) &&
+                                 !Table.isExternalTable(msTbl);
+    boolean altersHMSTable = true;
+    if (isManagedKuduTable) {
       Preconditions.checkState(KuduTable.isKuduTable(msTbl));
-      renameKuduTable((KuduTable) oldTbl, msTbl, newTableName);
+      boolean isKuduHmsIntegrationEnabled = isKuduHmsIntegrationEnabled(msTbl);
+      altersHMSTable = !isKuduHmsIntegrationEnabled;
+      renameManagedKuduTable((KuduTable) oldTbl, msTbl, newTableName,
+          isKuduHmsIntegrationEnabled);
     }
 
-    try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
-      msClient.getHiveClient().alter_table(tableName.getDb(), tableName.getTbl(), msTbl);
-    } catch (TException e) {
-      throw new ImpalaRuntimeException(
-          String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_table"), e);
+    // Always updates the HMS metadata for non-Kudu tables. For Kudu tables, when
+    // Kudu is not integrated with the Hive Metastore or if this is an external table,
+    // Kudu will not automatically update the HMS metadata, we have to do it
+    // manually.
+    if (altersHMSTable) {
+      try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+        msClient.getHiveClient().alter_table(
+            tableName.getDb(), tableName.getTbl(), msTbl);
+      } catch (TException e) {
+        throw new ImpalaRuntimeException(
+            String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_table"), e);
+      }
     }
     // Rename the table in the Catalog and get the resulting catalog object.
     // ALTER TABLE/VIEW RENAME is implemented as an ADD + DROP.
@@ -2617,16 +2631,16 @@ public class CatalogOpExecutor {
   }
 
   /**
-   * Renames the underlying Kudu table for the given Impala table. If the new Kudu
+   * Renames the underlying Kudu table for the given managed table. If the new Kudu
    * table name is the same as the old Kudu table name, this method does nothing.
    */
-  private void renameKuduTable(KuduTable oldTbl,
-      org.apache.hadoop.hive.metastore.api.Table oldMsTbl, TableName newTableName)
+  private void renameManagedKuduTable(KuduTable oldTbl,
+      org.apache.hadoop.hive.metastore.api.Table oldMsTbl,
+      TableName newTableName, boolean isHMSIntegrationEanbled)
       throws ImpalaRuntimeException {
-    // TODO: update it to properly handle HMS integration.
     String newKuduTableName = KuduUtil.getDefaultKuduTableName(
         newTableName.getDb(), newTableName.getTbl(),
-        /* isHMSIntegrationEnabled= */false);
+        isHMSIntegrationEanbled);
 
     // If the name of the Kudu table has not changed, do nothing
     if (oldTbl.getKuduTableName().equals(newKuduTableName)) return;
@@ -2796,6 +2810,7 @@ public class CatalogOpExecutor {
           if (KuduTable.isKuduTable(msTbl)) {
             // If 'kudu.table_name' is specified and this is a managed table, rename
             // the underlying Kudu table.
+            // TODO(IMPALA-8618): this should be disallowed since IMPALA-5654
             if (properties.containsKey(KuduTable.KEY_TABLE_NAME)
                 && !properties.get(KuduTable.KEY_TABLE_NAME).equals(
                     msTbl.getParameters().get(KuduTable.KEY_TABLE_NAME))
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 91289fd..cf84138 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -1,3 +1,7 @@
+# Test cases on Kudu tables that only expected to be ran without HMS integration enabled.
+# TODO(IMPALA-8614): The test is almost identical to kudu_hms_alter.test, with the
+# difference this test later contains tests with hardcoded legacy table naming 'impala::'.
+# Note that changes that are made to one file should be reflected in the other.
 ====
 ---- QUERY
 create table simple (id int primary key, name string, valf float, vali bigint)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_hms_alter.test
similarity index 95%
copy from testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
copy to testdata/workloads/functional-query/queries/QueryTest/kudu_hms_alter.test
index 91289fd..29efd26 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_hms_alter.test
@@ -1,3 +1,7 @@
+# Test cases on Kudu tables that only expected to be ran with HMS integration enabled.
+# TODO(IMPALA-8614): The test is almost identical to kudu_alter.test, with the difference
+# the later contains tests with hardcoded legacy table naming 'impala::'.
+# Note that changes that are made to one file should be reflected in the other.
 ====
 ---- QUERY
 create table simple (id int primary key, name string, valf float, vali bigint)
@@ -41,7 +45,7 @@ STRING
 # Try to use an invalid master address
 alter table simple set tblproperties ('kudu.master_addresses' = 'invalid_host')
 ---- CATCH
-ImpalaRuntimeException: Kudu table 'impala::$DATABASE.simple' does not exist on master 'invalid_host'
+ImpalaRuntimeException: Kudu table '$DATABASE.simple' does not exist on master 'invalid_host'
 ====
 ---- QUERY
 alter table simple rename to simple_new;
@@ -385,7 +389,7 @@ BIGINT
 # The Kudu table kudu_tbl_to_alter is used in order to validate that renaming
 # the managed Impala-Kudu table tbl_to_alter renames the underlying Kudu table
 create external table external_tbl stored as kudu
-  tblproperties('kudu.table_name'='impala::$DATABASE.kudu_tbl_to_alter');
+  tblproperties('kudu.table_name'='$DATABASE.kudu_tbl_to_alter');
 select count(*) from external_tbl
 ---- RESULTS
 5
@@ -396,9 +400,9 @@ BIGINT
 # Ensure that after renaming the Impala table that the old Kudu table no longer
 # exists
 create external table external_tbl_on_nonexistent_kudu_tbl stored as kudu
-  tblproperties('kudu.table_name'='impala::$DATABASE.tbl_to_alter');
+  tblproperties('kudu.table_name'='$DATABASE.tbl_to_alter');
 ---- CATCH
-ImpalaRuntimeException: Table does not exist in Kudu: 'impala::$DATABASE.tbl_to_alter'
+ImpalaRuntimeException: Table does not exist in Kudu: '$DATABASE.tbl_to_alter'
 ====
 ---- QUERY
 # Insert an item into the table pointed by the external Kudu table
@@ -433,7 +437,7 @@ BIGINT
 create table temp_kudu_table (i int primary key) stored as kudu;
 insert into temp_kudu_table values (1), (2), (3);
 alter table external_tbl set
-  tblproperties('kudu.table_name'='impala::$DATABASE.temp_kudu_table');
+  tblproperties('kudu.table_name'='$DATABASE.temp_kudu_table');
 select count(*) from external_tbl
 ---- RESULTS
 3
@@ -631,7 +635,7 @@ alter table external_tbl rename to external_tbl_new;
 ---- QUERY
 describe formatted external_tbl_new;
 ---- RESULTS: VERIFY_IS_SUBSET
-'','kudu.table_name     ','impala::$DATABASE.temp_kudu_table'
+'','kudu.table_name     ','$DATABASE.temp_kudu_table'
 ---- TYPES
 STRING,STRING,STRING
 ----
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 9bf6d3f..e3d6cc0 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -116,7 +116,8 @@ class TestKuduClientTimeout(CustomClusterTestSuite, KuduTestSuite):
 
 class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
   # TODO(IMPALA-8614): parameterize the common tests in query_test/test_kudu.py
-  # to run with HMS integration enabled.
+  # to run with HMS integration enabled. Also avoid restarting Impala to reduce
+  # tests time.
   """Tests the different DDL operations when using a kudu table with Kudu's integration
      with the Hive Metastore."""
 
@@ -284,3 +285,7 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
       cursor.execute("DROP TABLE %s" % (external_table_name))
       cursor.execute("SHOW TABLES")
       assert (external_table_name,) not in cursor.fetchall()
+
+  @SkipIfKudu.no_hybrid_clock
+  def test_kudu_alter_table(self, vector, unique_database):
+    self.run_test_case('QueryTest/kudu_hms_alter', vector, use_db=unique_database)
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 549da6a..1dc5eba 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -119,8 +119,6 @@ class TestKuduOperations(KuduTestSuite):
   def test_kudu_partition_ddl(self, vector, unique_database):
     self.run_test_case('QueryTest/kudu_partition_ddl', vector, use_db=unique_database)
 
-  @pytest.mark.skipif(pytest.config.option.testing_remote_cluster,
-                      reason="Test references hardcoded hostnames: IMPALA-4873")
   @pytest.mark.execute_serially
   @SkipIfKudu.no_hybrid_clock
   @SkipIfKudu.hms_integration_enabled