You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2024/01/16 01:01:23 UTC

(impala) branch master updated (6ddd69c60 -> e071e6f05)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 6ddd69c60 IMPALA-12665: Adjust complete_micro_batch_ length to new scratch_batch_->capacity after ScratchTupleBatch::Reset
     new 02d004a12 IMPALA-12698: Restrict check_deleted_file_fd() for fixing flaky tests
     new ad2e7ff6b IMPALA-12704: Fix NPE when quering empty iceberg table's metadata
     new e071e6f05 IMPALA-12714: Fix test_reduced_cardinality_by_filter for non-HDFS

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/impala/util/IcebergMetadataScanner.java |  4 +--
 .../queries/QueryTest/iceberg-metadata-tables.test | 35 ++++++++++++++--------
 tests/custom_cluster/test_scratch_disk.py          |  7 ++++-
 tests/query_test/test_observability.py             |  3 +-
 4 files changed, 33 insertions(+), 16 deletions(-)


(impala) 02/03: IMPALA-12704: Fix NPE when quering empty iceberg table's metadata

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ad2e7ff6bce6048ac1ee58c9bfc911fd392d655c
Author: Eyizoha <ey...@163.com>
AuthorDate: Fri Jan 12 10:52:07 2024 +0800

    IMPALA-12704: Fix NPE when quering empty iceberg table's metadata
    
    Currently, When querying some metadata tables of an empty iceberg table,
    a null pointer exception occurs. This patch fixes the issue and adds
    corresponding test cases in test_metadata_tables.
    
    Testing:
     - Added E2E test to cover this case
    
    Change-Id: I6b4d4fb81a45214045b8809a4bdd910a1f1f3843
    Reviewed-on: http://gerrit.cloudera.org:8080/20890
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/util/IcebergMetadataScanner.java |  4 +--
 .../queries/QueryTest/iceberg-metadata-tables.test | 35 ++++++++++++++--------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/util/IcebergMetadataScanner.java b/fe/src/main/java/org/apache/impala/util/IcebergMetadataScanner.java
index 18f3d8981..37087da43 100644
--- a/fe/src/main/java/org/apache/impala/util/IcebergMetadataScanner.java
+++ b/fe/src/main/java/org/apache/impala/util/IcebergMetadataScanner.java
@@ -102,11 +102,11 @@ public class IcebergMetadataScanner {
    */
   public StructLike GetNext() {
     // Return the next row in the DataRows iterator
-    if (dataRowsIterator_.hasNext()) {
+    if (dataRowsIterator_ != null && dataRowsIterator_.hasNext()) {
       return dataRowsIterator_.next();
     }
     // Otherwise this DataTask is empty, find a FileScanTask that has a non-empty DataTask
-    if(FindFileScanTaskWithRows()) {
+    if (FindFileScanTaskWithRows()) {
       return dataRowsIterator_.next();
     }
     return null;
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index a559e13b6..e678efe9a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -176,7 +176,18 @@ row_regex:1,[1-9]\d*|0,[1-9]\d*|0,[1-9]\d*|0
 INT,BIGINT,BIGINT,BIGINT
 
 ####
-# Test 1 : Test select list
+# Test 1 : Test query empty table's metadata
+####
+====
+---- QUERY
+create table empty_ice_tbl (id int) stored by iceberg;
+select * from $DATABASE.empty_ice_tbl.entries;
+---- RESULTS
+---- TYPES
+INT,BIGINT,BIGINT,BIGINT
+
+####
+# Test 2 : Test select list
 ####
 ====
 ---- QUERY
@@ -218,7 +229,7 @@ select record_count + file_count from functional_parquet.iceberg_query_metadata.
 BIGINT
 
 ####
-# Test 2 : Test filtering
+# Test 3 : Test filtering
 ####
 ====
 ---- QUERY
@@ -285,7 +296,7 @@ row_regex:$OVERWRITE_SNAPSHOT_TS,$OVERWRITE_SNAPSHOT_ID,[1-9]\d*|0,true
 TIMESTAMP,BIGINT,BIGINT,BOOLEAN
 
 ####
-# Test 2 : Test joins
+# Test 4 : Test joins
 ####
 ====
 ---- QUERY
@@ -328,7 +339,7 @@ $OVERWRITE_SNAPSHOT_ID
 BIGINT
 
 ####
-# Test 3 : Inline query
+# Test 5 : Inline query
 ####
 ====
 ---- QUERY
@@ -343,7 +354,7 @@ row_regex:[1-9]\d*|0
 BIGINT
 
 ####
-# Test 4 : Complex types
+# Test 6 : Complex types
 # Currently not supported, complex type slots are set to NULL (IMPALA-12205)
 ####
 ====
@@ -358,7 +369,7 @@ row_regex:[1-9]\d*|0,'NULL'
 BIGINT,STRING
 
 ####
-# Test 5 : Multiple RowBatch results
+# Test 7 : Multiple RowBatch results
 ####
 ====
 ---- QUERY
@@ -372,9 +383,8 @@ row_regex:\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(\.\d{9})?,[1-9]\d*|0,[1-9]\d*|0,t
 ---- TYPES
 TIMESTAMP,BIGINT,BIGINT,BOOLEAN
 
-
 ####
-# Test 6 : Timetravel
+# Test 8 : Timetravel
 # Timetravel is not supported currently, related Jira IMPALA-11991.
 ####
 ====
@@ -385,7 +395,7 @@ AnalysisException: FOR SYSTEM_VERSION AS OF clause is only supported for Iceberg
 ====
 
 ####
-# Test 7 : Use-cases
+# Test 9 : Use-cases
 ####
 ====
 ---- QUERY
@@ -415,7 +425,7 @@ row_regex:[1-9]\d*|0,'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/i
 INT,STRING,BIGINT
 
 ####
-# Test 8 : Invalid operations
+# Test 10 : Invalid operations
 # In most cases the parser catches the table reference.
 ####
 ====
@@ -456,7 +466,7 @@ ParseException: Syntax error in line 1
 ====
 
 ####
-# Test 9 : Query nested type columns
+# Test 11 : Query nested type columns
 ####
 ====
 ---- QUERY
@@ -563,8 +573,9 @@ AnalysisException: Querying collection types (ARRAY/MAP) is not supported for Ic
 ====
 
 ####
-# Test 10 : Describe all the metadata tables once
+# Test 12 : Describe all the metadata tables once
 ####
+====
 ---- QUERY
 describe functional_parquet.iceberg_query_metadata.snapshots;
 ---- RESULTS


(impala) 01/03: IMPALA-12698: Restrict check_deleted_file_fd() for fixing flaky tests

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 02d004a12166c5549d591b7352ec1463c5ee8ba3
Author: Yida Wu <yi...@cloudera.com>
AuthorDate: Sun Jan 14 17:51:02 2024 -0800

    IMPALA-12698: Restrict check_deleted_file_fd() for fixing flaky tests
    
    The introduction of check_deleted_file_fd() in IMPALA-12681 aimed
    to detect a bug related to remote spilling where local temporary file
    handles were not being released after deletion. However, the tests
    associated with this function seem flaky in exhaustive builds with
    occasionally some files of hdfs may not be promptly released after
    deletion, though locally, I observed that these files are eventually
    removed from /proc/xx/fd in a few minutes, the reason is unclear
    yet.
    
    To fix the flaky build failure, this patch confines the scope of
    check_deleted_file_fd() to detect files containing the keyword
    "scratch" only. Given that hdfs files eventually get removed, and
    it seems not an urgent issue, a separate Jira will be filed to track
    and investigate this behavior further.
    
    Testing:
    Reran the tests a couple times and passed.
    
    Change-Id: I55f5aa1cdbc0c74f6c7ebd25575e71d2b238bf98
    Reviewed-on: http://gerrit.cloudera.org:8080/20898
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_scratch_disk.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py
index a00fff04d..d1ab74fc8 100644
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -283,7 +283,12 @@ class TestScratchDir(CustomClusterTestSuite):
 
   def find_deleted_files_in_fd(self, pid):
     fd_path = "/proc/{}/fd".format(pid)
-    command = "find {} -ls | grep '(deleted)'".format(fd_path)
+    # Look for the files with keywords 'scratch' and '(deleted)'.
+    # Limited to keyword 'scratch' because in IMPALA-12698 the process may
+    # create some reference deleted hdfs files, but the files are eventually
+    # removed in a few minutes. This limitation helps to mitigate false-positive
+    # checks.
+    command = "find {} -ls | grep -E 'scratch.*(deleted)'".format(fd_path)
     try:
       result = subprocess.check_output(command, shell=True)
       return result.strip()


(impala) 03/03: IMPALA-12714: Fix test_reduced_cardinality_by_filter for non-HDFS

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e071e6f053ceb8cb4536837f76e76610a34aa8f2
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Jan 15 08:45:34 2024 -0800

    IMPALA-12714: Fix test_reduced_cardinality_by_filter for non-HDFS
    
    test_reduced_cardinality_by_filter failed in non-HDFS environment
    because it assert for existence of '00:SCAN HDFS' in ExecSummary. This
    patch change that assertion to ignore the type of scan node from test
    query. Also marked the test with SkipIfNotHdfsMinicluster.plans
    decorator.
    
    Testing:
    - Pass test_reduced_cardinality_by_filter
    
    Change-Id: Icbf72687cc3c5a99aa0a0a74e229ed8c88ed06ef
    Reviewed-on: http://gerrit.cloudera.org:8080/20902
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_observability.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 597dea515..27dcf8d10 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -872,6 +872,7 @@ class TestObservability(ImpalaTestSuite):
     assert len(re.findall('Single node plan created:', runtime_profile, re.M)) == 2
     assert len(re.findall('Distributed plan created:', runtime_profile, re.M)) == 2
 
+  @SkipIfNotHdfsMinicluster.plans
   def test_reduced_cardinality_by_filter(self):
     """IMPALA-12702: Check that ExecSummary shows the reduced cardinality estimation."""
     query_opts = {'compute_processing_cost': True}
@@ -882,7 +883,7 @@ class TestObservability(ImpalaTestSuite):
         where l1.o_custkey < 1000"""
     result = self.execute_query(query, query_opts)
     scan = result.exec_summary[10]
-    assert scan['operator'] == '00:SCAN HDFS'
+    assert '00:SCAN' in scan['operator']
     assert scan['num_rows'] == 39563
     assert scan['est_num_rows'] == 575771
     assert scan['detail'] == 'tpch_parquet.lineitem'