You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/12/22 02:33:08 UTC
[impala] branch master updated: IMPALA-11806: Fix TestIcebergTable.test_load E2E test
This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 6ff99431a IMPALA-11806: Fix TestIcebergTable.test_load E2E test
6ff99431a is described below
commit 6ff99431a6eab1841f1bf46b923ad4664772e185
Author: Tamas Mate <tm...@apache.org>
AuthorDate: Wed Dec 21 13:00:58 2022 +0100
IMPALA-11806: Fix TestIcebergTable.test_load E2E test
The test had a flaky part, it was referring to a directory which was
random generated. Removed the reference to this directory.
The test was failing with filesystems other than HDFS due to the
hdfs_client dependency, replaced the hdfs_client calls to
filesystem_client instead.
Testing:
- Executed the test locally (HDFS/Minicluster)
- Triggered an Ozone build to verify it with different FS
Change-Id: Id95523949aab7dc2417a3d06cf780d3de2e44ee3
Reviewed-on: http://gerrit.cloudera.org:8080/19385
Reviewed-by: Tamas Mate <tm...@apache.org>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../queries/QueryTest/iceberg-load.test | 2 +-
tests/query_test/test_iceberg.py | 27 +++++++++++-----------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
index a0c87aa65..c5ad08e6a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
@@ -118,7 +118,7 @@ stored as iceberg;
---- QUERY
load data inpath '/tmp/$DATABASE/mismatching_schema/' overwrite into table test_iceberg_load_schema_mismatch;
---- CATCH
-AnalysisException: Target table 'test_load_a61184e9.test_iceberg_load_schema_mismatch' has fewer columns (1) than the SELECT / VALUES clause returns (4)
+row_regex:.*AnalysisException: Target table .* has fewer columns \(1\) than the SELECT / VALUES clause returns \(4\).*
====
---- QUERY
# Test 9: Partitioned Iceberg table
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 1a8b8188a..fe41d2487 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -790,7 +790,6 @@ class TestIcebergTable(IcebergTestSuite):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)
- @SkipIfLocal.hdfs_client
def test_load(self, vector, unique_database):
"""Test LOAD DATA INPATH for Iceberg tables, the first part of this method inits the
target directory, copies existing test data to HDFS. The second part runs the test
@@ -800,36 +799,36 @@ class TestIcebergTable(IcebergTestSuite):
SRC_DIR = os.path.join(os.environ['IMPALA_HOME'],
"testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/{0}")
DST_DIR = "/tmp/" + unique_database + "/parquet/"
- self.hdfs_client.make_dir(DST_DIR, permission=777)
+ self.filesystem_client.make_dir(DST_DIR, permission=777)
file_parq1 = "00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-" \
"fff150b6136a-job_16619542960420_0002-1-00001.parquet"
file_parq2 = "00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-" \
"27ff880faff0-job_16619542960420_0004-1-00001.parquet"
- self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
- self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
DST_DIR = "/tmp/" + unique_database + "/orc/"
- self.hdfs_client.make_dir(DST_DIR, permission=777)
+ self.filesystem_client.make_dir(DST_DIR, permission=777)
file_orc1 = "00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-" \
"ee500a19c1d1-job_16619542960420_0003-1-00001.orc"
file_orc2 = "00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-" \
"519c5500ed04-job_16619542960420_0004-1-00001.orc"
- self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
- self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
# Test 7 init: overwrite
DST_DIR = "/tmp/" + unique_database + "/overwrite/"
- self.hdfs_client.make_dir(DST_DIR, permission=777)
- self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
+ self.filesystem_client.make_dir(DST_DIR, permission=777)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
# Test 8 init: mismatching parquet schema format
SRC_DIR = os.path.join(os.environ['IMPALA_HOME'], "testdata/data/iceberg_test/"
"iceberg_partitioned/data/event_time_hour=2020-01-01-08/action=view/{0}")
DST_DIR = "/tmp/" + unique_database + "/mismatching_schema/"
- self.hdfs_client.make_dir(DST_DIR, permission=777)
+ self.filesystem_client.make_dir(DST_DIR, permission=777)
file = "00001-1-b975a171-0911-47c2-90c8-300f23c28772-00000.parquet"
- self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
# Test 9 init: partitioned
DST_DIR = "/tmp/" + unique_database + "/partitioned/"
- self.hdfs_client.make_dir(DST_DIR, permission=777)
- self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
+ self.filesystem_client.make_dir(DST_DIR, permission=777)
+ self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
# Init test table
create_iceberg_table_from_directory(self.client, unique_database,
@@ -838,7 +837,7 @@ class TestIcebergTable(IcebergTestSuite):
# Execute tests
self.run_test_case('QueryTest/iceberg-load', vector, use_db=unique_database)
# Clean up temporary directory
- self.hdfs_client.delete_file_dir("/tmp/{0}".format(unique_database), True)
+ self.filesystem_client.delete_file_dir("/tmp/{0}".format(unique_database), True)
def test_table_sampling(self, vector):
self.run_test_case('QueryTest/iceberg-tablesample', vector,