You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/13 16:41:58 UTC

[impala] 02/04: Put transactional tables into 'managed' directory

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b770d2d378d642dcc1bdda733c99cc80ca239dc3
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Apr 10 12:22:58 2020 +0200

    Put transactional tables into 'managed' directory
    
    HIVE-22794 disallows ACID tables outside of the 'managed' warehouse
    directory. This change updates data loading to make it conform to
    the new rules.
    
    The following tests had to be modified to use the new paths:
    * AnalyzeDDLTest.TestCreateTableLikeFileOrc()
    * create-table-like-file-orc.test
    
    Change-Id: Id3b65f56bf7f225b1d29aa397f987fdd7eb7176c
    Reviewed-on: http://gerrit.cloudera.org:8080/15708
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  4 +--
 testdata/bin/generate-schema-statements.py         | 35 +++++++++++-----------
 .../functional/functional_schema_template.sql      |  6 ++++
 .../QueryTest/create-table-like-file-orc.test      | 23 ++------------
 4 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 7b2709a..b5b65fe 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2036,10 +2036,10 @@ public class AnalyzeDDLTest extends FrontendTestBase {
 
     // Inferring primitive and complex types
     AnalyzesOk("create table if not exists newtbl_DNE like orc " +
-        "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/" +
+        "'/test-warehouse/managed/alltypestiny_orc_def/year=2009/month=1/" +
         "base_0000001/bucket_00000_0'");
     AnalyzesOk("create table if not exists newtbl_DNE like orc " +
-        "'/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0'");
+        "'/test-warehouse/managed/complextypestbl_orc_def/base_0000001/bucket_00000_0'");
 
     // check invalid paths
     AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 8113498..590445c 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -315,13 +315,6 @@ def build_table_template(file_format, columns, partition_columns, row_format,
     # Kudu's test tables are managed.
     external = ""
 
-  # ORC tables are full ACID by default.
-  if (HIVE_MAJOR_VERSION == 3 and
-      file_format == 'orc' and
-      'transactional' not in tblproperties):
-    external = ""
-    tblproperties['transactional'] = 'true'
-
   all_tblproperties = []
   for key, value in tblproperties.iteritems():
     all_tblproperties.append("'{0}' = '{1}'".format(key, value))
@@ -655,15 +648,6 @@ def generate_statements(output_name, test_vectors, sections,
       force_reload = options.force_reload or (partition_columns and not alter) or \
           file_format == 'kudu'
 
-      hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix)
-      # hdfs file names for functional datasets are stored
-      # directly under /test-warehouse
-      # TODO: We should not need to specify the hdfs file path in the schema file.
-      # This needs to be done programmatically.
-      if data_set == 'functional':
-        hdfs_location = hdfs_location.split('.')[-1]
-      data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
-
       # Empty tables (tables with no "LOAD" sections) are assumed to be used for insert
       # testing. Since Impala currently only supports inserting into TEXT, PARQUET and
       # HBASE we need to create these tables with a supported insert format.
@@ -677,6 +661,23 @@ def generate_statements(output_name, test_vectors, sections,
           create_file_format = 'text'
 
       tblproperties = parse_table_properties(create_file_format, table_properties)
+      # ORC tables are full ACID by default.
+      if (HIVE_MAJOR_VERSION == 3 and
+          create_file_format == 'orc' and
+          'transactional' not in tblproperties):
+        tblproperties['transactional'] = 'true'
+
+      hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix)
+      # hdfs file names for functional datasets are stored
+      # directly under /test-warehouse
+      # TODO: We should not need to specify the hdfs file path in the schema file.
+      # This needs to be done programmatically.
+      if data_set == 'functional':
+        hdfs_location = hdfs_location.split('.')[-1]
+      # Transactional tables need to be put under the 'managed' directory.
+      if is_transactional(tblproperties):
+        hdfs_location = os.path.join('managed', hdfs_location)
+      data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
 
       output = impala_create
       if create_hive or file_format == 'hbase':
@@ -684,8 +685,6 @@ def generate_statements(output_name, test_vectors, sections,
       elif codec == 'lzo':
         # Impala CREATE TABLE doesn't allow INPUTFORMAT.
         output = hive_output
-      elif is_transactional(tblproperties):
-        output = hive_output
 
       # TODO: Currently, Kudu does not support partitioned tables via Impala.
       # If a CREATE_KUDU section was provided, assume it handles the partition columns
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 6cf0b4b..e3dc3da 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -332,6 +332,8 @@ LOCATION '{hdfs_location}';
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1);
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2);
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3);
+---- TABLE_PROPERTIES
+transactional=false
 ---- DEPENDENT_LOAD
 USE {db_name}{db_suffix};
 -- Step 4: Stream the data from tmp text table to desired format tmp table
@@ -458,6 +460,8 @@ USE {db_name}{db_suffix};
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1);
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2);
 ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3);
+---- TABLE_PROPERTIES
+transactional=false
 ---- DEPENDENT_LOAD
 USE {db_name}{db_suffix};
 -- Step 4: Stream the data from tmp text table to desired format tmp table
@@ -1526,6 +1530,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
 ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
 STORED AS {file_format}
 LOCATION '{hdfs_location}';
+---- TABLE_PROPERTIES
+transactional=false
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
index 5bbd7c1..3d36299 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
@@ -23,7 +23,7 @@ STRING, STRING, STRING
 ====
 ---- QUERY
 create table $DATABASE.temp_chars_table like ORC
-'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/base_0000001/bucket_00000_0'
+'$NAMENODE/$MANAGED_WAREHOUSE_DIR/chars_tiny_orc_def/base_0000001/bucket_00000_0'
 ---- RESULTS
 'Table has been created.'
 ====
@@ -115,9 +115,8 @@ STRING, STRING, STRING
 ====
 ---- QUERY
 create external table transactional_complextypes_clone like ORC
-'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0'
-stored as orc
-location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/';
+'$NAMENODE/$MANAGED_WAREHOUSE_DIR/complextypestbl_orc_def/base_0000001/bucket_00000_0'
+stored as orc;
 ---- RESULTS
 'Table has been created.'
 ====
@@ -133,19 +132,3 @@ describe transactional_complextypes_clone
 ---- TYPES
 STRING, STRING, STRING
 ====
----- QUERY
-select originaltransaction, rowid, `row`.id from transactional_complextypes_clone;
----- LABELS
-originaltransaction, rowid, row.id
----- RESULTS
-1,0,8
-1,0,1
-1,1,2
-1,2,3
-1,3,4
-1,4,5
-1,5,6
-1,6,7
----- TYPES
-BIGINT, BIGINT, BIGINT
-====