You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/12/22 02:33:08 UTC

[impala] branch master updated: IMPALA-11806: Fix TestIcebergTable.test_load E2E test

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 6ff99431a IMPALA-11806: Fix TestIcebergTable.test_load E2E test
6ff99431a is described below

commit 6ff99431a6eab1841f1bf46b923ad4664772e185
Author: Tamas Mate <tm...@apache.org>
AuthorDate: Wed Dec 21 13:00:58 2022 +0100

    IMPALA-11806: Fix TestIcebergTable.test_load E2E test
    
    The test had a flaky part, it was referring to a directory which was
    random generated. Removed the reference to this directory.
    
    The test was failing with filesystems other than HDFS due to the
    hdfs_client dependency, replaced the hdfs_client calls to
    filesystem_client instead.
    
    Testing:
     - Executed the test locally (HDFS/Minicluster)
     - Triggered an Ozone build to verify it with different FS
    
    Change-Id: Id95523949aab7dc2417a3d06cf780d3de2e44ee3
    Reviewed-on: http://gerrit.cloudera.org:8080/19385
    Reviewed-by: Tamas Mate <tm...@apache.org>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../queries/QueryTest/iceberg-load.test            |  2 +-
 tests/query_test/test_iceberg.py                   | 27 +++++++++++-----------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
index a0c87aa65..c5ad08e6a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
@@ -118,7 +118,7 @@ stored as iceberg;
 ---- QUERY
 load data inpath '/tmp/$DATABASE/mismatching_schema/' overwrite into table test_iceberg_load_schema_mismatch;
 ---- CATCH
-AnalysisException: Target table 'test_load_a61184e9.test_iceberg_load_schema_mismatch' has fewer columns (1) than the SELECT / VALUES clause returns (4)
+row_regex:.*AnalysisException: Target table .* has fewer columns \(1\) than the SELECT / VALUES clause returns \(4\).*
 ====
 ---- QUERY
 # Test 9: Partitioned Iceberg table
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 1a8b8188a..fe41d2487 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -790,7 +790,6 @@ class TestIcebergTable(IcebergTestSuite):
     self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
                       unique_database)
 
-  @SkipIfLocal.hdfs_client
   def test_load(self, vector, unique_database):
     """Test LOAD DATA INPATH for Iceberg tables, the first part of this method inits the
     target directory, copies existing test data to HDFS. The second part runs the test
@@ -800,36 +799,36 @@ class TestIcebergTable(IcebergTestSuite):
     SRC_DIR = os.path.join(os.environ['IMPALA_HOME'],
         "testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/{0}")
     DST_DIR = "/tmp/" + unique_database + "/parquet/"
-    self.hdfs_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
     file_parq1 = "00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-" \
         "fff150b6136a-job_16619542960420_0002-1-00001.parquet"
     file_parq2 = "00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-" \
         "27ff880faff0-job_16619542960420_0004-1-00001.parquet"
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
     DST_DIR = "/tmp/" + unique_database + "/orc/"
-    self.hdfs_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
     file_orc1 = "00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-" \
         "ee500a19c1d1-job_16619542960420_0003-1-00001.orc"
     file_orc2 = "00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-" \
         "519c5500ed04-job_16619542960420_0004-1-00001.orc"
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
     # Test 7 init: overwrite
     DST_DIR = "/tmp/" + unique_database + "/overwrite/"
-    self.hdfs_client.make_dir(DST_DIR, permission=777)
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
     # Test 8 init: mismatching parquet schema format
     SRC_DIR = os.path.join(os.environ['IMPALA_HOME'], "testdata/data/iceberg_test/"
         "iceberg_partitioned/data/event_time_hour=2020-01-01-08/action=view/{0}")
     DST_DIR = "/tmp/" + unique_database + "/mismatching_schema/"
-    self.hdfs_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
     file = "00001-1-b975a171-0911-47c2-90c8-300f23c28772-00000.parquet"
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
     # Test 9 init: partitioned
     DST_DIR = "/tmp/" + unique_database + "/partitioned/"
-    self.hdfs_client.make_dir(DST_DIR, permission=777)
-    self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
 
     # Init test table
     create_iceberg_table_from_directory(self.client, unique_database,
@@ -838,7 +837,7 @@ class TestIcebergTable(IcebergTestSuite):
     # Execute tests
     self.run_test_case('QueryTest/iceberg-load', vector, use_db=unique_database)
     # Clean up temporary directory
-    self.hdfs_client.delete_file_dir("/tmp/{0}".format(unique_database), True)
+    self.filesystem_client.delete_file_dir("/tmp/{0}".format(unique_database), True)
 
   def test_table_sampling(self, vector):
     self.run_test_case('QueryTest/iceberg-tablesample', vector,