You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/12/17 22:39:21 UTC

(impala) 01/03: IMPALA-12629: Fix Iceberg V2 tests in non-HDFS environment

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1141a6a80c45bef2d1a354301e20e6ac63aa9e97
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Dec 14 14:59:17 2023 +0100

    IMPALA-12629: Fix Iceberg V2 tests in non-HDFS environment
    
    The UPDATE part 2 patch (https://gerrit.cloudera.org/#/c/20677/)
    added new ranger tests that verifies that row filtering and column
    masking are working on Iceberg V2 tables with delete files.
    
    The Iceberg V2 tables we load during data loading are already written
    and the position delete files have hard-coded file URIs that refer
    to the data files. Unfortunately these URIs start with 'hdfs://...'.
    Therefore these tables cannot be used in non-HDFS environment.
    
    To quickly fix this, this patch introduces the 'IS_HDFS_ONLY' test
    section. Tests that are annotated with it are only executed when
    the underlying storage system is HDFS. This new test section can be
    also used in the future to make it easier to write Impala/Hive interop
    tests, because currently we can only run Hive in an HDFS environment.
    So we could just annotate the tests that have HIVE_QUERY with
    IS_HDFS_ONLY, and we could also add a HIVE_RESULTS section to check
    the results Hive produces.
    
    Testing:
     * Ran the tests with '-s' option in HDFS environment and verified
       that the IS_HDFS_ONLY tests are executed
     * Ran the ranger tests in Ozone environment and they passed
    
    Change-Id: Ifa72041e281e3d5083adaee2f8a41bb1c2c8d19f
    Reviewed-on: http://gerrit.cloudera.org:8080/20803
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../functional-query/queries/QueryTest/ranger_column_masking.test      | 1 +
 .../functional-query/queries/QueryTest/ranger_row_filtering.test       | 1 +
 tests/common/impala_test_suite.py                                      | 3 +++
 tests/util/test_file_parser.py                                         | 2 +-
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
index dfa9cbf10..e6a766ac2 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
@@ -714,6 +714,7 @@ select id, bigint_col from functional.alltypesagg order by id limit 10
 ---- TYPES
 INT,BIGINT
 ====
+---- IS_HDFS_ONLY
 ---- QUERY
 select * from functional_parquet.iceberg_v2_delete_positional;
 ---- RESULTS
diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_row_filtering.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_row_filtering.test
index 0b23fecb8..0558d28c8 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_row_filtering.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_row_filtering.test
@@ -593,6 +593,7 @@ select id, b.item from functional_parquet.complextypestbl t, t.nested_struct.b
 ---- TYPES
 BIGINT,INT
 ====
+---- IS_HDFS_ONLY
 ---- QUERY
 select * from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files;
 ---- RESULTS
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 4530aacfa..3fa18055f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -688,6 +688,9 @@ class ImpalaTestSuite(BaseTestSuite):
         if needed_hive_major_version != HIVE_MAJOR_VERSION:
           continue
 
+      if 'IS_HDFS_ONLY' in test_section and not IS_HDFS:
+        continue
+
       if 'SHELL' in test_section:
         assert len(test_section) == 1, \
             "SHELL test sections can't contain other sections"
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index 3fe347f7d..a5ddb9670 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -98,7 +98,7 @@ def parse_query_test_file(file_name, valid_section_names=None, encoding=None):
   if section_names is None:
     section_names = ['QUERY', 'HIVE_QUERY', 'RESULTS', 'TYPES', 'LABELS', 'SETUP',
         'CATCH', 'ERRORS', 'USER', 'RUNTIME_PROFILE', 'SHELL', 'DML_RESULTS',
-        'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE']
+        'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE', 'IS_HDFS_ONLY']
   return parse_test_file(file_name, section_names, encoding=encoding,
       skip_unknown_sections=False)