You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2023/12/13 01:59:47 UTC
(impala) branch master updated: IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 69aad8b5d IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
69aad8b5d is described below
commit 69aad8b5df52133bb349ca046ce6e13d2a0cf217
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Dec 11 13:33:18 2023 -0800
IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
IMPALA-11507 add absolute_path field to FbFileDesc in
CatalogObjects.fbs. This field is used as a fallback for the
relative_path field when the relative path of an Iceberg data file
cannot be obtained by the table location path. relative_path is never
set with null, while absolute_path may not be set and left as null.
IMPALA-11507 missed a case where both FileDescriptor.getRelativePath()
and FbFileDesc.absolutePath() are null/empty. This patch fixes the bug
so that FileDescriptor.getPath() and FileDescriptor.getAbsolutePath() do
not fall back to FbFileDesc.absolutePath() if it is null/empty. With
this fix, FileDescriptor.getPath() and FileDescriptor.getAbsolutePath()
will not return null.
Testing:
- Add test_scanner.py::TestSingleFileTable
- Pass core tests
Change-Id: I15d811430871cd1319d9560901318c1c3b89403c
Reviewed-on: http://gerrit.cloudera.org:8080/20769
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../org/apache/impala/catalog/HdfsPartition.java | 23 ++++++++---
tests/query_test/test_scanners.py | 46 ++++++++++++++++++++++
2 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
index ca77e75cc..861a01adc 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
@@ -260,17 +260,28 @@ public class HdfsPartition extends CatalogObjectImpl
public String getRelativePath() { return fbFileDescriptor_.relativePath(); }
- public String getAbsolutePath() { return fbFileDescriptor_.absolutePath(); }
+ public String getAbsolutePath() {
+ return StringUtils.isEmpty(fbFileDescriptor_.absolutePath()) ?
+ StringUtils.EMPTY :
+ fbFileDescriptor_.absolutePath();
+ }
public String getAbsolutePath(String rootPath) {
- return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath())
- ? rootPath + Path.SEPARATOR + fbFileDescriptor_.relativePath()
- : fbFileDescriptor_.absolutePath();
+ if (StringUtils.isEmpty(fbFileDescriptor_.relativePath())
+ && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) {
+ return fbFileDescriptor_.absolutePath();
+ } else {
+ return rootPath + Path.SEPARATOR + fbFileDescriptor_.relativePath();
+ }
}
public String getPath() {
- return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath())
- ? fbFileDescriptor_.relativePath() : fbFileDescriptor_.absolutePath();
+ if (StringUtils.isEmpty(fbFileDescriptor_.relativePath())
+ && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) {
+ return fbFileDescriptor_.absolutePath();
+ } else {
+ return fbFileDescriptor_.relativePath();
+ }
}
public long getFileLength() { return fbFileDescriptor_.length(); }
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 1cd0ea298..6a5e2b8d6 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -1949,3 +1949,49 @@ class TestParquetV2(ImpalaTestSuite):
def test_parquet_v2(self, vector):
self.run_test_case('QueryTest/parquet-v2', vector)
+
+
+class TestSingleFileTable(ImpalaTestSuite):
+ """IMPALA-12589: Regression test for corner case behavior where a table LOCATION might
+ point to a file instead of a directory. Expect SELECT and SHOW FILES to still work in
+ that case."""
+
+ @classmethod
+ def get_workload(cls):
+ return 'functional-query'
+
+ @classmethod
+ def add_test_dimensions(cls):
+ super(TestSingleFileTable, cls).add_test_dimensions()
+ cls.ImpalaTestMatrix.add_constraint(
+ lambda v: v.get_value('table_format').file_format == 'text')
+
+ def test_single_file_table(self, vector, unique_database):
+ # Create a simple table with one column.
+ params = {"db": unique_database, "tbl": "single_file_table"}
+ create_tbl_ddl = ("create external table {db}.{tbl} (c1 int) "
+ "stored as textfile").format(**params)
+ self.execute_query_expect_success(self.client, create_tbl_ddl)
+
+ # Insert one value to the table.
+ insert_stmt = "insert into {db}.{tbl} values (1)".format(**params)
+ self.execute_query_expect_success(self.client, insert_stmt)
+
+ # Show files and get the path to the first data file.
+ show_files_stmt = "show files in {db}.{tbl}".format(**params)
+ res = self.execute_query_expect_success(self.client, show_files_stmt)
+ assert len(res.data) == 1
+ hdfs_file_path = res.data[0].split("\t")[0]
+ params['new_location'] = hdfs_file_path
+
+ # Alter location to point a data file.
+ alter_stmt = "alter table {db}.{tbl} set location '{new_location}'".format(**params)
+ self.execute_query_expect_success(self.client, alter_stmt)
+
+ # Show files and count star should still work.
+ res = self.execute_query_expect_success(self.client, show_files_stmt)
+ assert res.data[0].split("\t")[0] == (hdfs_file_path + '/')
+ select_stmt = "select count(*) from {db}.{tbl}".format(**params)
+ res = self.execute_query_expect_success(self.client, select_stmt)
+ assert res.data[0].split("\t")[0] == '1'
+