You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/02/01 17:03:31 UTC

[impala] 02/02: IMPALA-10460: Impala should write normalized paths in Iceberg manifests

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a81c6a78294d1da72b57ed90ec4e365de8c4e54b
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Jan 28 11:51:44 2021 +0100

    IMPALA-10460: Impala should write normalized paths in Iceberg manifests
    
    Currently Impala writes double slashes in the paths of datafiles
    for non-partitioned Iceberg tables. Unnormalized paths can cause
    problems later.
    
    This patch removes the redundant slashes.
    
    Testing:
     * Tested manually by inspecting the manifest files of the
       Iceberg tables. Used both non-partitioned and partitioned tables.
    
    Change-Id: If5ecac78102ed35710dd70a18edc71f6e891e748
    Reviewed-on: http://gerrit.cloudera.org:8080/16993
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/hdfs-table-sink.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/be/src/exec/hdfs-table-sink.cc b/be/src/exec/hdfs-table-sink.cc
index ae4ae6e..ffc79af 100644
--- a/be/src/exec/hdfs-table-sink.cc
+++ b/be/src/exec/hdfs-table-sink.cc
@@ -258,9 +258,14 @@ void HdfsTableSink::BuildHdfsFileNames(
   }
   if (IsIceberg()) {
     //TODO: implement LocationProviders.
-    output_partition->final_hdfs_file_name_prefix =
-        Substitute("$0/data/$1/", table_desc_->IcebergTableLocation(),
-            output_partition->partition_name);
+    if (output_partition->partition_name.empty()) {
+      output_partition->final_hdfs_file_name_prefix =
+          Substitute("$0/data/", table_desc_->IcebergTableLocation());
+    } else {
+      output_partition->final_hdfs_file_name_prefix =
+          Substitute("$0/data/$1/", table_desc_->IcebergTableLocation(),
+              output_partition->partition_name);
+    }
   }
   output_partition->final_hdfs_file_name_prefix += query_suffix;