You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by fo...@apache.org on 2023/05/24 11:01:30 UTC

[iceberg] branch master updated: Nessie: Disable table metadata files cleanup during commits (#7641)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 8fe6248736 Nessie: Disable table metadata files cleanup during commits (#7641)
8fe6248736 is described below

commit 8fe6248736a8a15a8dc85376c8741cbce62e6b47
Author: Ajantha Bhat <aj...@gmail.com>
AuthorDate: Wed May 24 16:31:23 2023 +0530

    Nessie: Disable table metadata files cleanup during commits (#7641)
---
 .../iceberg/nessie/NessieTableOperations.java      | 13 +++++
 .../org/apache/iceberg/nessie/TestNessieTable.java | 57 ++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
index c9486e2e27..43a453c78a 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
@@ -92,6 +92,19 @@ public class NessieTableOperations extends BaseMetastoreTableOperations {
     // Nessie CLI will provide a reference aware GC functionality for the expired/unreferenced
     // files.
     newProperties.put(TableProperties.GC_ENABLED, "false");
+
+    boolean metadataCleanupEnabled =
+        newProperties
+            .getOrDefault(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "false")
+            .equalsIgnoreCase("true");
+    if (metadataCleanupEnabled) {
+      newProperties.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "false");
+      LOG.warn(
+          "Automatic table metadata files cleanup was requested, but disabled because "
+              + "the Nessie catalog can use historical metadata files from other references. "
+              + "Use the 'nessie-gc' tool for history-aware GC");
+    }
+
     TableMetadata.Builder builder =
         TableMetadata.buildFrom(deserialized)
             .setPreviousFileLocation(null)
diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
index 9881af5cfd..be3f31ac3d 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
@@ -25,18 +25,23 @@ import static org.apache.iceberg.types.Types.NestedField.required;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
+import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.commons.io.FileUtils;
+import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.HasTableOperations;
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableMetadataParser;
 import org.apache.iceberg.TableOperations;
 import org.apache.iceberg.TableProperties;
@@ -579,6 +584,58 @@ public class TestNessieTable extends BaseTestIceberg {
             "Cannot expire snapshots: GC is disabled (deleting files may corrupt other tables)");
   }
 
+  @Test
+  public void testTableMetadataFilesCleanupDisable() throws NessieNotFoundException {
+    Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
+
+    // Forceful setting of property also should get override with false
+    icebergTable
+        .updateProperties()
+        .set(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "true")
+        .commit();
+    Assertions.assertThat(
+            icebergTable.properties().get(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED))
+        .isNotNull()
+        .isEqualTo("false");
+
+    icebergTable
+        .updateProperties()
+        .set(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "1")
+        .commit();
+
+    String hash = api.getReference().refName(BRANCH).get().getHash();
+    String metadataFileLocation =
+        ((BaseTable) icebergTable).operations().current().metadataFileLocation();
+    Path metadataFileLocationPath = Paths.get(metadataFileLocation.replaceFirst("file:", ""));
+
+    Assertions.assertThat(metadataFileLocationPath).exists();
+
+    icebergTable.updateSchema().addColumn("x1", Types.LongType.get()).commit();
+    icebergTable.updateSchema().addColumn("x2", Types.LongType.get()).commit();
+
+    // old table metadata file should still exist after commits.
+    Assertions.assertThat(metadataFileLocationPath).exists();
+
+    // load the table from the specific hash which reads the mapping metadataFileLocation
+    ImmutableTableReference tableReference =
+        ImmutableTableReference.builder().reference(BRANCH).hash(hash).name(TABLE_NAME).build();
+    TableIdentifier identifier = TableIdentifier.of(DB_NAME, tableReference.toString());
+    Assertions.assertThat(
+            ((BaseTable) catalog.loadTable(identifier))
+                .operations()
+                .current()
+                .metadataFileLocation())
+        .isEqualTo(metadataFileLocation);
+
+    // table at the latest hash should not contain `metadataFileLocation` in previousFiles.
+    Set<String> tableMetadataFiles =
+        ((BaseTable) icebergTable)
+            .operations().current().previousFiles().stream()
+                .map(TableMetadata.MetadataLogEntry::file)
+                .collect(Collectors.toSet());
+    Assertions.assertThat(tableMetadataFiles).hasSize(1).doesNotContain(metadataFileLocation);
+  }
+
   private String getTableBasePath(String tableName) {
     return temp.toUri() + DB_NAME + "/" + tableName;
   }