You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by as...@apache.org on 2021/11/23 05:23:00 UTC
[impala] 03/03: IMPALA-11025: Transactional tables should use /test-warehouse/managed/databasename.db
This is an automated email from the ASF dual-hosted git repository.
asherman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit ee03727971f379fe8fb3161387783ce00a9f6b9d
Author: Andrew Sherman <as...@cloudera.com>
AuthorDate: Fri Nov 19 10:33:02 2021 -0800
IMPALA-11025: Transactional tables should use /test-warehouse/managed/databasename.db
Recent Hive releases seem to be enforcing that data for a managed table
is stored under the hive.metastore.warehouse.dir path property in a
folder path similar to databasename.db/tablename - see
https://cwiki.apache.org/confluence/display/Hive/Managed+vs.+External+Tables
Use this form /test-warehouse/managed/databasename.db in
generate-schema-statements.py when creating transactional tables.
Testing:
- A few small changes to tests that verify filesystem changes for acid
tables.
- Exhaustive tests pass.
Change-Id: Ib870ca802c9fa180e6be7a6f65bef35b227772db
Reviewed-on: http://gerrit.cloudera.org:8080/18046
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java | 1 +
testdata/bin/generate-schema-statements.py | 5 +++--
testdata/workloads/functional-query/queries/QueryTest/acid.test | 2 +-
tests/metadata/test_ddl.py | 3 ++-
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java b/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
index 5fd2f7f..51070e2 100644
--- a/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/FileMetadataLoaderTest.java
@@ -152,6 +152,7 @@ public class FileMetadataLoaderTest {
ValidWriteIdList writeIds = MetastoreShim.getValidWriteIdListFromString(
"functional_orc_def.complextypestbl_minor_compacted:10:10::");
Path tablePath = new Path("hdfs://localhost:20500/test-warehouse/managed/" +
+ "functional_orc_def.db/" +
"complextypestbl_minor_compacted_orc_def/");
FileMetadataLoader fml = new FileMetadataLoader(tablePath, /* recursive=*/true,
/* oldFds = */ Collections.emptyList(), hostIndex, new ValidReadTxnList(""),
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 72c39f0..d0d715d 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -698,7 +698,8 @@ def generate_statements(output_name, test_vectors, sections,
hdfs_location = hdfs_location.split('.')[-1]
# Transactional tables need to be put under the 'managed' directory.
if is_transactional(tblproperties):
- hdfs_location = os.path.join('managed', hdfs_location)
+ db_location = '{0}{1}.db'.format(db_name, db_suffix)
+ hdfs_location = os.path.join('managed', db_location, hdfs_location)
data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
output = impala_create
@@ -775,7 +776,7 @@ def generate_statements(output_name, test_vectors, sections,
if not force_reload and hdfs_location in existing_tables:
print 'HDFS path:', data_path, 'contains data. Data loading can be skipped.'
else:
- print 'HDFS path:', data_path, 'does not exists or is empty. Data will be loaded.'
+ print 'HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.'
if not db_suffix:
if load:
hive_output.load_base.append(build_load_statement(load, db_name,
diff --git a/testdata/workloads/functional-query/queries/QueryTest/acid.test b/testdata/workloads/functional-query/queries/QueryTest/acid.test
index 9d912c4..1b27ee4 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/acid.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/acid.test
@@ -117,7 +117,7 @@ show files in functional_orc_def.complextypestbl_minor_compacted;
---- LABELS
Path,Size,Partition
---- RESULTS
-row_regex:'$NAMENODE/test-warehouse/managed/complextypestbl_minor_compacted_orc_def/delta_0000001_0000008_v\d+/bucket_00000','.+KB',''
+row_regex:'$NAMENODE/test-warehouse/managed/functional_orc_def.db/complextypestbl_minor_compacted_orc_def/delta_0000001_0000008_v\d+/bucket_00000','.+KB',''
---- TYPES
STRING,STRING,STRING
====
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 739ba4f..34c2f76 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -308,7 +308,8 @@ class TestDdlStatements(TestDdlBase):
@SkipIfGCS.hive
@UniqueDatabase.parametrize(sync_ddl=True)
def test_create_table_like_file_orc(self, vector, unique_database):
- COMPLEXTYPETBL_PATH = 'test-warehouse/managed/complextypestbl_orc_def/'
+ COMPLEXTYPETBL_PATH = 'test-warehouse/managed/functional_orc_def.db/' \
+ 'complextypestbl_orc_def/'
base_dir = filter(lambda s: s.startswith('base'),
self.filesystem_client.ls(COMPLEXTYPETBL_PATH))[0]
bucket_file = filter(lambda s: s.startswith('bucket'),