You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/10/25 23:10:14 UTC

[impala] 03/03: IMPALA-9071: Fix wrong table path of transaction table created by CTAS

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0f70ade0d78a9eb3afafdfd1a3e36cc8a5563cb4
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Fri Oct 25 16:45:08 2019 +0800

    IMPALA-9071: Fix wrong table path of transaction table created by CTAS
    
    The previous patch of IMPALA-9071 assumes that all tables created by
    CTAS statement are non transactional table. This is wrong since CTAS
    statement can also specify tblproperties so can create transactional
    table.
    
    This patch fixs the hard coded external checking. Instead, we judge on
    whether the table is transactional. If not, it will be translated to
    external table by HMS.
    
    Tests:
     - Add coverage for creating transactional tables by CTAS.
    
    Change-Id: I4b585216e33e4f7962b19ae2351165288691eaf2
    Reviewed-on: http://gerrit.cloudera.org:8080/14546
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/compat/MetastoreShim.java    |  6 +--
 .../org/apache/impala/compat/MetastoreShim.java    | 18 +++++---
 .../impala/analysis/CreateTableAsSelectStmt.java   |  4 +-
 tests/custom_cluster/test_custom_hive_configs.py   | 48 ++++++++++++++--------
 4 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
index 667d883..5f47d9d 100644
--- a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
@@ -453,7 +453,7 @@ public class MetastoreShim {
   }
 
   /**
-   * Return the default table path.
+   * Return the default table path for a new table.
    *
    * Hive-3 doesn't allow managed table to be non transactional after HIVE-22158.
    * Creating a non transactional managed table will finally result in an external table
@@ -461,8 +461,8 @@ public class MetastoreShim {
    * EXTERNAL, the location will be under "metastore.warehouse.external.dir" (HIVE-19837,
    * introduces in hive-2.7, not in hive-2.1.x-cdh6.x yet).
    */
-  public static String getNonAcidTablePath(Database db, String tableName)
+  public static String getPathForNewTable(Database db, Table tbl)
       throws MetaException {
-    return new Path(db.getLocationUri(), tableName).toString();
+    return new Path(db.getLocationUri(), tbl.getTableName().toLowerCase()).toString();
   }
 }
diff --git a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
index abdf321..29fe34f 100644
--- a/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
+++ b/fe/src/compat-hive-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -90,6 +90,7 @@ import org.apache.impala.service.Frontend;
 import org.apache.impala.service.MetadataOp;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TResultSet;
+import org.apache.impala.util.AcidUtils;
 import org.apache.impala.util.AcidUtils.TblTransaction;
 import org.apache.log4j.Logger;
 import org.apache.thrift.TException;
@@ -932,7 +933,7 @@ public class MetastoreShim {
   }
 
   /**
-   * Return the default table path.
+   * Return the default table path for a new table.
    *
    * Hive-3 doesn't allow managed table to be non transactional after HIVE-22158.
    * Creating a non transactional managed table will finally result in an external table
@@ -940,12 +941,19 @@ public class MetastoreShim {
    * EXTERNAL, the location will be under "metastore.warehouse.external.dir" (HIVE-19837,
    * introduces in hive-2.7, not in hive-2.1.x-cdh6.x yet).
    */
-  public static String getNonAcidTablePath(Database db, String tableName)
+  public static String getPathForNewTable(Database db, Table tbl)
       throws MetaException {
     Warehouse wh = new Warehouse(new HiveConf());
-    // Non ACID managed tables are all translated to external tables by HMS's default
-    // transformer (HIVE-22158).
+    // Non transactional tables are all translated to external tables by HMS's default
+    // transformer (HIVE-22158). Note that external tables can't be transactional.
+    // So the request and result of the default transformer is:
+    //     non transactional managed table => external table
+    //     non transactional external table => external table
+    //     transactional managed table => managed table
+    //     transactional external table (not allowed)
+    boolean isExternal = !AcidUtils.isTransactionalTable(tbl.getParameters());
     // TODO(IMPALA-9088): deal with customized transformer in HMS.
-    return wh.getDefaultTablePath(db, tableName, /*isExternal*/ true).toString();
+    return wh.getDefaultTablePath(db, tbl.getTableName().toLowerCase(), isExternal)
+        .toString();
   }
 }
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
index e0a632f..2c4e442 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
@@ -214,8 +214,8 @@ public class CreateTableAsSelectStmt extends StatementBase {
       // Set a valid location of this table using the same rules as the metastore, unless
       // the user specified a path.
       if (msTbl.getSd().getLocation() == null || msTbl.getSd().getLocation().isEmpty()) {
-        msTbl.getSd().setLocation(MetastoreShim.getNonAcidTablePath(
-            db.getMetaStoreDb(), msTbl.getTableName().toLowerCase()));
+        msTbl.getSd().setLocation(
+            MetastoreShim.getPathForNewTable(db.getMetaStoreDb(), msTbl));
       }
 
       FeTable tmpTable = null;
diff --git a/tests/custom_cluster/test_custom_hive_configs.py b/tests/custom_cluster/test_custom_hive_configs.py
index 74186bd..513c86a 100644
--- a/tests/custom_cluster/test_custom_hive_configs.py
+++ b/tests/custom_cluster/test_custom_hive_configs.py
@@ -15,11 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-
 import pytest
 from os import getenv
 
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.skip import SkipIfHive2
+
 HIVE_SITE_EXT_DIR = getenv('IMPALA_HOME') + '/fe/src/test/resources/hive-site-ext'
 
 
@@ -33,6 +34,7 @@ class TestCustomHiveConfigs(CustomClusterTestSuite):
     super(TestCustomHiveConfigs, cls).setup_class()
 
   # TODO: Remove the xfail marker after bumping CDP_BUILD_NUMBER to contain HIVE-22158
+  @SkipIfHive2.acid
   @pytest.mark.xfail(run=True, reason="May fail on Hive3 versions without HIVE-22158")
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(hive_conf_dir=HIVE_SITE_EXT_DIR)
@@ -42,25 +44,37 @@ class TestCustomHiveConfigs(CustomClusterTestSuite):
     'metastore.warehouse.external.dir' is different from 'metastore.warehouse.dir'
     in Hive.
     """
-    self.execute_query_expect_success(
-        self.client, 'create table %s.ctas_tbl as select 1, 2, "name"' %
-                     unique_database)
-    res = self.execute_query_expect_success(
-        self.client, 'select * from %s.ctas_tbl' % unique_database)
-    assert '1\t2\tname' == res.get_data()
+    # Test creating non-ACID managed table by CTAS. The HMS transformer will translate it
+    # into an external table. But we should still be able to read/write it correctly.
+    self.__check_query_results(
+      unique_database + '.ctas_tbl', '1\t2\tname',
+      'create table %s as select 1, 2, "name"')
 
-    self.execute_query_expect_success(
-        self.client, 'create external table %s.ctas_ext_tbl as select 1, 2, "name"' %
-                     unique_database)
+    # Test creating non-ACID external table by CTAS.
+    self.__check_query_results(
+      unique_database + '.ctas_ext_tbl', '1\t2\tname',
+      'create external table %s as select 1, 2, "name"')
     # Set "external.table.purge"="true" so we can clean files of the external table
     # finally.
     self.execute_query_expect_success(
         self.client, 'alter table %s.ctas_ext_tbl set tblproperties'
                      '("external.table.purge"="true")' % unique_database)
-    res = self.execute_query_expect_success(
-        self.client, 'select * from %s.ctas_ext_tbl' % unique_database)
-    assert '1\t2\tname' == res.get_data()
 
-    # Explicitly drop the database with CASCADE to clean files of the external table
-    self.execute_query_expect_success(
-        self.client, 'drop database if exists cascade' + unique_database)
+    # Test creating insert-only ACID managed table by CTAS.
+    self.__check_query_results(
+      unique_database + '.insertonly_acid_ctas', '1\t2\tname',
+      'create table %s '
+      'tblproperties("transactional"="true", "transactional_properties"="insert_only") '
+      'as select 1, 2, "name"')
+
+    # Test creating insert-only ACID external table by CTAS. Should not be allowed.
+    self.execute_query_expect_failure(
+      self.client,
+      'create external table %s.insertonly_acid_ext_ctas '
+      'tblproperties("transactional"="true", "transactional_properties"="insert_only") '
+      'as select 1, 2, "name"' % unique_database)
+
+  def __check_query_results(self, table, expected_results, query_format):
+    self.execute_query_expect_success(self.client, query_format % table)
+    res = self.execute_query_expect_success(self.client, "select * from " + table)
+    assert expected_results == res.get_data()