You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/13 16:41:58 UTC
[impala] 02/04: Put transactional tables into 'managed' directory
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit b770d2d378d642dcc1bdda733c99cc80ca239dc3
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Apr 10 12:22:58 2020 +0200
Put transactional tables into 'managed' directory
HIVE-22794 disallows ACID tables outside of the 'managed' warehouse
directory. This change updates data loading to make it conform to
the new rules.
The following tests had to be modified to use the new paths:
* AnalyzeDDLTest.TestCreateTableLikeFileOrc()
* create-table-like-file-orc.test
Change-Id: Id3b65f56bf7f225b1d29aa397f987fdd7eb7176c
Reviewed-on: http://gerrit.cloudera.org:8080/15708
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../org/apache/impala/analysis/AnalyzeDDLTest.java | 4 +--
testdata/bin/generate-schema-statements.py | 35 +++++++++++-----------
.../functional/functional_schema_template.sql | 6 ++++
.../QueryTest/create-table-like-file-orc.test | 23 ++------------
4 files changed, 28 insertions(+), 40 deletions(-)
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 7b2709a..b5b65fe 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2036,10 +2036,10 @@ public class AnalyzeDDLTest extends FrontendTestBase {
// Inferring primitive and complex types
AnalyzesOk("create table if not exists newtbl_DNE like orc " +
- "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/" +
+ "'/test-warehouse/managed/alltypestiny_orc_def/year=2009/month=1/" +
"base_0000001/bucket_00000_0'");
AnalyzesOk("create table if not exists newtbl_DNE like orc " +
- "'/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0'");
+ "'/test-warehouse/managed/complextypestbl_orc_def/base_0000001/bucket_00000_0'");
// check invalid paths
AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 8113498..590445c 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -315,13 +315,6 @@ def build_table_template(file_format, columns, partition_columns, row_format,
# Kudu's test tables are managed.
external = ""
- # ORC tables are full ACID by default.
- if (HIVE_MAJOR_VERSION == 3 and
- file_format == 'orc' and
- 'transactional' not in tblproperties):
- external = ""
- tblproperties['transactional'] = 'true'
-
all_tblproperties = []
for key, value in tblproperties.iteritems():
all_tblproperties.append("'{0}' = '{1}'".format(key, value))
@@ -655,15 +648,6 @@ def generate_statements(output_name, test_vectors, sections,
force_reload = options.force_reload or (partition_columns and not alter) or \
file_format == 'kudu'
- hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix)
- # hdfs file names for functional datasets are stored
- # directly under /test-warehouse
- # TODO: We should not need to specify the hdfs file path in the schema file.
- # This needs to be done programmatically.
- if data_set == 'functional':
- hdfs_location = hdfs_location.split('.')[-1]
- data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
-
# Empty tables (tables with no "LOAD" sections) are assumed to be used for insert
# testing. Since Impala currently only supports inserting into TEXT, PARQUET and
# HBASE we need to create these tables with a supported insert format.
@@ -677,6 +661,23 @@ def generate_statements(output_name, test_vectors, sections,
create_file_format = 'text'
tblproperties = parse_table_properties(create_file_format, table_properties)
+ # ORC tables are full ACID by default.
+ if (HIVE_MAJOR_VERSION == 3 and
+ create_file_format == 'orc' and
+ 'transactional' not in tblproperties):
+ tblproperties['transactional'] = 'true'
+
+ hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix)
+ # hdfs file names for functional datasets are stored
+ # directly under /test-warehouse
+ # TODO: We should not need to specify the hdfs file path in the schema file.
+ # This needs to be done programmatically.
+ if data_set == 'functional':
+ hdfs_location = hdfs_location.split('.')[-1]
+ # Transactional tables need to be put under the 'managed' directory.
+ if is_transactional(tblproperties):
+ hdfs_location = os.path.join('managed', hdfs_location)
+ data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
output = impala_create
if create_hive or file_format == 'hbase':
@@ -684,8 +685,6 @@ def generate_statements(output_name, test_vectors, sections,
elif codec == 'lzo':
# Impala CREATE TABLE doesn't allow INPUTFORMAT.
output = hive_output
- elif is_transactional(tblproperties):
- output = hive_output
# TODO: Currently, Kudu does not support partitioned tables via Impala.
# If a CREATE_KUDU section was provided, assume it handles the partition columns
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 6cf0b4b..e3dc3da 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -332,6 +332,8 @@ LOCATION '{hdfs_location}';
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1);
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2);
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3);
+---- TABLE_PROPERTIES
+transactional=false
---- DEPENDENT_LOAD
USE {db_name}{db_suffix};
-- Step 4: Stream the data from tmp text table to desired format tmp table
@@ -458,6 +460,8 @@ USE {db_name}{db_suffix};
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1);
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2);
ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3);
+---- TABLE_PROPERTIES
+transactional=false
---- DEPENDENT_LOAD
USE {db_name}{db_suffix};
-- Step 4: Stream the data from tmp text table to desired format tmp table
@@ -1526,6 +1530,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS {file_format}
LOCATION '{hdfs_location}';
+---- TABLE_PROPERTIES
+transactional=false
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
index 5bbd7c1..3d36299 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
@@ -23,7 +23,7 @@ STRING, STRING, STRING
====
---- QUERY
create table $DATABASE.temp_chars_table like ORC
-'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/base_0000001/bucket_00000_0'
+'$NAMENODE/$MANAGED_WAREHOUSE_DIR/chars_tiny_orc_def/base_0000001/bucket_00000_0'
---- RESULTS
'Table has been created.'
====
@@ -115,9 +115,8 @@ STRING, STRING, STRING
====
---- QUERY
create external table transactional_complextypes_clone like ORC
-'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0'
-stored as orc
-location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/';
+'$NAMENODE/$MANAGED_WAREHOUSE_DIR/complextypestbl_orc_def/base_0000001/bucket_00000_0'
+stored as orc;
---- RESULTS
'Table has been created.'
====
@@ -133,19 +132,3 @@ describe transactional_complextypes_clone
---- TYPES
STRING, STRING, STRING
====
----- QUERY
-select originaltransaction, rowid, `row`.id from transactional_complextypes_clone;
----- LABELS
-originaltransaction, rowid, row.id
----- RESULTS
-1,0,8
-1,0,1
-1,1,2
-1,2,3
-1,3,4
-1,4,5
-1,5,6
-1,6,7
----- TYPES
-BIGINT, BIGINT, BIGINT
-====