You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by as...@apache.org on 2021/11/23 05:23:00 UTC

[impala] 03/03: IMPALA-11025: Transactional tables should use /test-warehouse/managed/databasename.db

This is an automated email from the ASF dual-hosted git repository.

asherman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ee03727971f379fe8fb3161387783ce00a9f6b9d
Author: Andrew Sherman <as...@cloudera.com>
AuthorDate: Fri Nov 19 10:33:02 2021 -0800

    IMPALA-11025: Transactional tables should use /test-warehouse/managed/databasename.db
    
    Recent Hive releases seem to be enforcing that data for a managed table
    is stored under the hive.metastore.warehouse.dir path property in a
    folder path similar to databasename.db/tablename  - see
    https://cwiki.apache.org/confluence/display/Hive/Managed+vs.+External+Tables
    Use this form /test-warehouse/managed/databasename.db in
    generate-schema-statements.py when creating transactional tables.
    
    Testing:
    - A few small changes to tests that verify filesystem changes for acid
      tables.
    - Exhaustive tests pass.
    
    Change-Id: Ib870ca802c9fa180e6be7a6f65bef35b227772db
    Reviewed-on: http://gerrit.cloudera.org:8080/18046
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java  | 1 +
 testdata/bin/generate-schema-statements.py                           | 5 +++--
 testdata/workloads/functional-query/queries/QueryTest/acid.test      | 2 +-
 tests/metadata/test_ddl.py                                           | 3 ++-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java b/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
index 5fd2f7f..51070e2 100644
--- a/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
@@ -152,6 +152,7 @@ public class FileMetadataLoaderTest {
     ValidWriteIdList writeIds = MetastoreShim.getValidWriteIdListFromString(
         "functional_orc_def.complextypestbl_minor_compacted:10:10::");
     Path tablePath = new Path("hdfs://localhost:20500/test-warehouse/managed/" +
+                              "functional_orc_def.db/" +
                               "complextypestbl_minor_compacted_orc_def/");
     FileMetadataLoader fml = new FileMetadataLoader(tablePath, /* recursive=*/true,
         /* oldFds = */ Collections.emptyList(), hostIndex, new ValidReadTxnList(""),
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 72c39f0..d0d715d 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -698,7 +698,8 @@ def generate_statements(output_name, test_vectors, sections,
         hdfs_location = hdfs_location.split('.')[-1]
       # Transactional tables need to be put under the 'managed' directory.
       if is_transactional(tblproperties):
-        hdfs_location = os.path.join('managed', hdfs_location)
+        db_location = '{0}{1}.db'.format(db_name, db_suffix)
+        hdfs_location = os.path.join('managed', db_location, hdfs_location)
       data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
 
       output = impala_create
@@ -775,7 +776,7 @@ def generate_statements(output_name, test_vectors, sections,
       if not force_reload and hdfs_location in existing_tables:
         print 'HDFS path:', data_path, 'contains data. Data loading can be skipped.'
       else:
-        print 'HDFS path:', data_path, 'does not exists or is empty. Data will be loaded.'
+        print 'HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.'
         if not db_suffix:
           if load:
             hive_output.load_base.append(build_load_statement(load, db_name,
diff --git a/testdata/workloads/functional-query/queries/QueryTest/acid.test b/testdata/workloads/functional-query/queries/QueryTest/acid.test
index 9d912c4..1b27ee4 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/acid.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/acid.test
@@ -117,7 +117,7 @@ show files in functional_orc_def.complextypestbl_minor_compacted;
 ---- LABELS
 Path,Size,Partition
 ---- RESULTS
-row_regex:'$NAMENODE/test-warehouse/managed/complextypestbl_minor_compacted_orc_def/delta_0000001_0000008_v\d+/bucket_00000','.+KB',''
+row_regex:'$NAMENODE/test-warehouse/managed/functional_orc_def.db/complextypestbl_minor_compacted_orc_def/delta_0000001_0000008_v\d+/bucket_00000','.+KB',''
 ---- TYPES
 STRING,STRING,STRING
 ====
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 739ba4f..34c2f76 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -308,7 +308,8 @@ class TestDdlStatements(TestDdlBase):
   @SkipIfGCS.hive
   @UniqueDatabase.parametrize(sync_ddl=True)
   def test_create_table_like_file_orc(self, vector, unique_database):
-    COMPLEXTYPETBL_PATH = 'test-warehouse/managed/complextypestbl_orc_def/'
+    COMPLEXTYPETBL_PATH = 'test-warehouse/managed/functional_orc_def.db/' \
+                          'complextypestbl_orc_def/'
     base_dir = filter(lambda s: s.startswith('base'),
       self.filesystem_client.ls(COMPLEXTYPETBL_PATH))[0]
     bucket_file = filter(lambda s: s.startswith('bucket'),