You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@iceberg.apache.org by GitBox <gi...@apache.org> on 2018/12/13 21:18:39 UTC

[GitHub] rdblue closed pull request #48: Fix commit retry with manfiest lists.

rdblue closed pull request #48: Fix commit retry with manfiest lists.
URL: https://github.com/apache/incubator-iceberg/pull/48
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/core/src/main/java/com/netflix/iceberg/SnapshotUpdate.java b/core/src/main/java/com/netflix/iceberg/SnapshotUpdate.java
index ce9d59c..796df2f 100644
--- a/core/src/main/java/com/netflix/iceberg/SnapshotUpdate.java
+++ b/core/src/main/java/com/netflix/iceberg/SnapshotUpdate.java
@@ -22,6 +22,7 @@
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.netflix.iceberg.exceptions.CommitFailedException;
 import com.netflix.iceberg.exceptions.RuntimeIOException;
@@ -35,6 +36,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
 import static com.netflix.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS;
@@ -70,6 +72,8 @@ public ManifestFile load(ManifestFile file) {
 
   private final TableOperations ops;
   private final String commitUUID = UUID.randomUUID().toString();
+  private final AtomicInteger attempt = new AtomicInteger(0);
+  private final List<String> manifestLists = Lists.newArrayList();
   private Long snapshotId = null;
   private TableMetadata base = null;
 
@@ -110,7 +114,11 @@ public Snapshot apply() {
       OutputFile manifestList = manifestListPath();
 
       try (ManifestListWriter writer = new ManifestListWriter(
-          manifestListPath(), snapshotId(), parentSnapshotId)) {
+          manifestList, snapshotId(), parentSnapshotId)) {
+
+        // keep track of the manifest lists created
+        manifestLists.add(manifestList.location());
+
         ManifestFile[] manifestFiles = new ManifestFile[manifests.size()];
 
         Tasks.range(manifestFiles.length)
@@ -172,6 +180,12 @@ public void commit() {
       Snapshot saved = ops.refresh().snapshot(newSnapshotId.get());
       if (saved != null) {
         cleanUncommitted(Sets.newHashSet(saved.manifests()));
+        // also clean up unused manifest lists created by multiple attempts
+        for (String manifestList : manifestLists) {
+          if (!saved.manifestListLocation().equals(manifestList)) {
+            ops.io().deleteFile(manifestList);
+          }
+        }
       } else {
         // saved may not be present if the latest metadata couldn't be loaded due to eventual
         // consistency problems in refresh. in that case, don't clean up.
@@ -184,6 +198,10 @@ public void commit() {
   }
 
   protected void cleanAll() {
+    for (String manifestList : manifestLists) {
+      ops.io().deleteFile(manifestList);
+    }
+    manifestLists.clear();
     cleanUncommitted(EMPTY_SET);
   }
 
@@ -193,7 +211,7 @@ protected void deleteFile(String path) {
 
   protected OutputFile manifestListPath() {
     return ops.io().newOutputFile(ops.metadataFileLocation(FileFormat.AVRO.addExtension(
-        String.format("snap-%d-%s", snapshotId(), commitUUID))));
+        String.format("snap-%d-%d-%s", snapshotId(), attempt.incrementAndGet(), commitUUID))));
   }
 
   protected OutputFile manifestPath(int i) {
diff --git a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
index 4d9e174..88252bb 100644
--- a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
+++ b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java
@@ -171,7 +171,32 @@ public void testFailure() {
   }
 
   @Test
-  public void testRecovery() {
+  public void testRecoveryWithManifestList() {
+    table.updateProperties().set(TableProperties.MANIFEST_LISTS_ENABLED, "true").commit();
+
+    // inject 3 failures, the last try will succeed
+    TestTables.TestTableOperations ops = table.ops();
+    ops.failCommits(3);
+
+    AppendFiles append = table.newFastAppend().appendFile(FILE_B);
+    Snapshot pending = append.apply();
+    ManifestFile newManifest = pending.manifests().get(0);
+    Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists());
+
+    append.commit();
+
+    TableMetadata metadata = readMetadata();
+
+    validateSnapshot(null, metadata.currentSnapshot(), FILE_B);
+    Assert.assertTrue("Should commit same new manifest", new File(newManifest.path()).exists());
+    Assert.assertTrue("Should commit the same new manifest",
+        metadata.currentSnapshot().manifests().contains(newManifest));
+  }
+
+  @Test
+  public void testRecoveryWithoutManifestList() {
+    table.updateProperties().set(TableProperties.MANIFEST_LISTS_ENABLED, "false").commit();
+
     // inject 3 failures, the last try will succeed
     TestTables.TestTableOperations ops = table.ops();
     ops.failCommits(3);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services