You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by fo...@apache.org on 2023/05/24 11:01:30 UTC
[iceberg] branch master updated: Nessie: Disable table metadata files cleanup during commits (#7641)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 8fe6248736 Nessie: Disable table metadata files cleanup during commits (#7641)
8fe6248736 is described below
commit 8fe6248736a8a15a8dc85376c8741cbce62e6b47
Author: Ajantha Bhat <aj...@gmail.com>
AuthorDate: Wed May 24 16:31:23 2023 +0530
Nessie: Disable table metadata files cleanup during commits (#7641)
---
.../iceberg/nessie/NessieTableOperations.java | 13 +++++
.../org/apache/iceberg/nessie/TestNessieTable.java | 57 ++++++++++++++++++++++
2 files changed, 70 insertions(+)
diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
index c9486e2e27..43a453c78a 100644
--- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
+++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java
@@ -92,6 +92,19 @@ public class NessieTableOperations extends BaseMetastoreTableOperations {
// Nessie CLI will provide a reference aware GC functionality for the expired/unreferenced
// files.
newProperties.put(TableProperties.GC_ENABLED, "false");
+
+ boolean metadataCleanupEnabled =
+ newProperties
+ .getOrDefault(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "false")
+ .equalsIgnoreCase("true");
+ if (metadataCleanupEnabled) {
+ newProperties.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "false");
+ LOG.warn(
+ "Automatic table metadata files cleanup was requested, but disabled because "
+ + "the Nessie catalog can use historical metadata files from other references. "
+ + "Use the 'nessie-gc' tool for history-aware GC");
+ }
+
TableMetadata.Builder builder =
TableMetadata.buildFrom(deserialized)
.setPreviousFileLocation(null)
diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
index 9881af5cfd..be3f31ac3d 100644
--- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
+++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java
@@ -25,18 +25,23 @@ import static org.apache.iceberg.types.Types.NestedField.required;
import java.io.File;
import java.io.IOException;
import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
+import java.util.Set;
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.commons.io.FileUtils;
+import org.apache.iceberg.BaseTable;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
+import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableMetadataParser;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
@@ -579,6 +584,58 @@ public class TestNessieTable extends BaseTestIceberg {
"Cannot expire snapshots: GC is disabled (deleting files may corrupt other tables)");
}
+ @Test
+ public void testTableMetadataFilesCleanupDisable() throws NessieNotFoundException {
+ Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
+
+ // Forceful setting of property also should get override with false
+ icebergTable
+ .updateProperties()
+ .set(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, "true")
+ .commit();
+ Assertions.assertThat(
+ icebergTable.properties().get(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED))
+ .isNotNull()
+ .isEqualTo("false");
+
+ icebergTable
+ .updateProperties()
+ .set(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "1")
+ .commit();
+
+ String hash = api.getReference().refName(BRANCH).get().getHash();
+ String metadataFileLocation =
+ ((BaseTable) icebergTable).operations().current().metadataFileLocation();
+ Path metadataFileLocationPath = Paths.get(metadataFileLocation.replaceFirst("file:", ""));
+
+ Assertions.assertThat(metadataFileLocationPath).exists();
+
+ icebergTable.updateSchema().addColumn("x1", Types.LongType.get()).commit();
+ icebergTable.updateSchema().addColumn("x2", Types.LongType.get()).commit();
+
+ // old table metadata file should still exist after commits.
+ Assertions.assertThat(metadataFileLocationPath).exists();
+
+ // load the table from the specific hash which reads the mapping metadataFileLocation
+ ImmutableTableReference tableReference =
+ ImmutableTableReference.builder().reference(BRANCH).hash(hash).name(TABLE_NAME).build();
+ TableIdentifier identifier = TableIdentifier.of(DB_NAME, tableReference.toString());
+ Assertions.assertThat(
+ ((BaseTable) catalog.loadTable(identifier))
+ .operations()
+ .current()
+ .metadataFileLocation())
+ .isEqualTo(metadataFileLocation);
+
+ // table at the latest hash should not contain `metadataFileLocation` in previousFiles.
+ Set<String> tableMetadataFiles =
+ ((BaseTable) icebergTable)
+ .operations().current().previousFiles().stream()
+ .map(TableMetadata.MetadataLogEntry::file)
+ .collect(Collectors.toSet());
+ Assertions.assertThat(tableMetadataFiles).hasSize(1).doesNotContain(metadataFileLocation);
+ }
+
private String getTableBasePath(String tableName) {
return temp.toUri() + DB_NAME + "/" + tableName;
}