You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2023/12/13 01:59:47 UTC

(impala) branch master updated: IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 69aad8b5d IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
69aad8b5d is described below

commit 69aad8b5df52133bb349ca046ce6e13d2a0cf217
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Dec 11 13:33:18 2023 -0800

    IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
    
    IMPALA-11507 add absolute_path field to FbFileDesc in
    CatalogObjects.fbs. This field is used as a fallback for the
    relative_path field when the relative path of an Iceberg data file
    cannot be obtained by the table location path. relative_path is never
    set with null, while absolute_path may not be set and left as null.
    
    IMPALA-11507 missed a case where both FileDescriptor.getRelativePath()
    and FbFileDesc.absolutePath() are null/empty. This patch fixes the bug
    so that FileDescriptor.getPath() and FileDescriptor.getAbsolutePath() do
    not fall back to FbFileDesc.absolutePath() if it is null/empty. With
    this fix, FileDescriptor.getPath() and FileDescriptor.getAbsolutePath()
    will not return null.
    
    Testing:
    - Add test_scanner.py::TestSingleFileTable
    - Pass core tests
    
    Change-Id: I15d811430871cd1319d9560901318c1c3b89403c
    Reviewed-on: http://gerrit.cloudera.org:8080/20769
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/catalog/HdfsPartition.java   | 23 ++++++++---
 tests/query_test/test_scanners.py                  | 46 ++++++++++++++++++++++
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
index ca77e75cc..861a01adc 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
@@ -260,17 +260,28 @@ public class HdfsPartition extends CatalogObjectImpl
 
     public String getRelativePath() { return fbFileDescriptor_.relativePath(); }
 
-    public String getAbsolutePath() { return fbFileDescriptor_.absolutePath(); }
+    public String getAbsolutePath() {
+      return StringUtils.isEmpty(fbFileDescriptor_.absolutePath()) ?
+          StringUtils.EMPTY :
+          fbFileDescriptor_.absolutePath();
+    }
 
     public String getAbsolutePath(String rootPath) {
-      return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath())
-          ? rootPath + Path.SEPARATOR + fbFileDescriptor_.relativePath()
-          : fbFileDescriptor_.absolutePath();
+      if (StringUtils.isEmpty(fbFileDescriptor_.relativePath())
+          && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) {
+        return fbFileDescriptor_.absolutePath();
+      } else {
+        return rootPath + Path.SEPARATOR + fbFileDescriptor_.relativePath();
+      }
     }
 
     public String getPath() {
-      return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath())
-          ? fbFileDescriptor_.relativePath() : fbFileDescriptor_.absolutePath();
+      if (StringUtils.isEmpty(fbFileDescriptor_.relativePath())
+          && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) {
+        return fbFileDescriptor_.absolutePath();
+      } else {
+        return fbFileDescriptor_.relativePath();
+      }
     }
 
     public long getFileLength() { return fbFileDescriptor_.length(); }
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 1cd0ea298..6a5e2b8d6 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -1949,3 +1949,49 @@ class TestParquetV2(ImpalaTestSuite):
 
   def test_parquet_v2(self, vector):
     self.run_test_case('QueryTest/parquet-v2', vector)
+
+
+class TestSingleFileTable(ImpalaTestSuite):
+  """IMPALA-12589: Regression test for corner case behavior where a table LOCATION might
+  point to a file instead of a directory. Expect SELECT and SHOW FILES to still work in
+  that case."""
+
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestSingleFileTable, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(
+      lambda v: v.get_value('table_format').file_format == 'text')
+
+  def test_single_file_table(self, vector, unique_database):
+    # Create a simple table with one column.
+    params = {"db": unique_database, "tbl": "single_file_table"}
+    create_tbl_ddl = ("create external table {db}.{tbl} (c1 int) "
+                      "stored as textfile").format(**params)
+    self.execute_query_expect_success(self.client, create_tbl_ddl)
+
+    # Insert one value to the table.
+    insert_stmt = "insert into {db}.{tbl} values (1)".format(**params)
+    self.execute_query_expect_success(self.client, insert_stmt)
+
+    # Show files and get the path to the first data file.
+    show_files_stmt = "show files in {db}.{tbl}".format(**params)
+    res = self.execute_query_expect_success(self.client, show_files_stmt)
+    assert len(res.data) == 1
+    hdfs_file_path = res.data[0].split("\t")[0]
+    params['new_location'] = hdfs_file_path
+
+    # Alter location to point a data file.
+    alter_stmt = "alter table {db}.{tbl} set location '{new_location}'".format(**params)
+    self.execute_query_expect_success(self.client, alter_stmt)
+
+    # Show files and count star should still work.
+    res = self.execute_query_expect_success(self.client, show_files_stmt)
+    assert res.data[0].split("\t")[0] == (hdfs_file_path + '/')
+    select_stmt = "select count(*) from {db}.{tbl}".format(**params)
+    res = self.execute_query_expect_success(self.client, select_stmt)
+    assert res.data[0].split("\t")[0] == '1'
+