You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/06/28 00:10:34 UTC

[iceberg] branch master updated: Core: Fix tag ancestor snapshot handling (#5034)

This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 5590d1e43 Core: Fix tag ancestor snapshot handling (#5034)
5590d1e43 is described below

commit 5590d1e4338be8eb0032ec8442a52c324994ef7c
Author: Ryan Blue <bl...@apache.org>
AuthorDate: Mon Jun 27 17:10:29 2022 -0700

    Core: Fix tag ancestor snapshot handling (#5034)
    
    * Core: Fix tag parent snapshot handling.
    
    * Fix import order.
---
 .../java/org/apache/iceberg/RemoveSnapshots.java   |  8 ++-
 .../org/apache/iceberg/TestRemoveSnapshots.java    | 76 ++++++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java b/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java
index 2f9fa023d..cfad2a4b3 100644
--- a/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java
+++ b/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java
@@ -273,8 +273,12 @@ class RemoveSnapshots implements ExpireSnapshots {
   private Set<Long> unreferencedSnapshotsToRetain(Collection<SnapshotRef> refs) {
     Set<Long> referencedSnapshots = Sets.newHashSet();
     for (SnapshotRef ref : refs) {
-      for (Snapshot snapshot : SnapshotUtil.ancestorsOf(ref.snapshotId(), base::snapshot)) {
-        referencedSnapshots.add(snapshot.snapshotId());
+      if (ref.isBranch()) {
+        for (Snapshot snapshot : SnapshotUtil.ancestorsOf(ref.snapshotId(), base::snapshot)) {
+          referencedSnapshots.add(snapshot.snapshotId());
+        }
+      } else {
+        referencedSnapshots.add(ref.snapshotId());
       }
     }
 
diff --git a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
index 99cba44d3..5e492e397 100644
--- a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
+++ b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
@@ -1360,6 +1360,82 @@ public class TestRemoveSnapshots extends TableTestBase {
     Assert.assertEquals(2, table.ops().current().snapshots().size());
   }
 
+  @Test
+  public void testUnreferencedSnapshotParentOfTag() {
+    table.newAppend()
+        .appendFile(FILE_A)
+        .commit();
+
+    long initialSnapshotId = table.currentSnapshot().snapshotId();
+
+    // this will be expired because it is still unreferenced with a tag on its child snapshot
+    table.newAppend()
+        .appendFile(FILE_B)
+        .commit();
+
+    long expiredSnapshotId = table.currentSnapshot().snapshotId();
+
+    long expireTimestampSnapshotB = waitUntilAfter(table.currentSnapshot().timestampMillis());
+    waitUntilAfter(expireTimestampSnapshotB);
+
+    table.newAppend()
+        .appendFile(FILE_C)
+        .commit();
+
+    // create a tag that references the current history and rewrite main to point to the initial snapshot
+    table.manageSnapshots()
+        .createTag("tag", table.currentSnapshot().snapshotId())
+        .replaceBranch("main", initialSnapshotId)
+        .commit();
+
+    table.expireSnapshots()
+        .expireOlderThan(expireTimestampSnapshotB)
+        .cleanExpiredFiles(false)
+        .commit();
+
+    Assert.assertNull("Should remove unreferenced snapshot beneath a tag", table.snapshot(expiredSnapshotId));
+    Assert.assertEquals(2, table.ops().current().snapshots().size());
+  }
+
+  @Test
+  public void testSnapshotParentOfBranchNotUnreferenced() {
+    // similar to testUnreferencedSnapshotParentOfTag, but checks that branch history is not considered unreferenced
+    table.newAppend()
+        .appendFile(FILE_A)
+        .commit();
+
+    long initialSnapshotId = table.currentSnapshot().snapshotId();
+
+    // this will be expired because it is still unreferenced with a tag on its child snapshot
+    table.newAppend()
+        .appendFile(FILE_B)
+        .commit();
+
+    long snapshotId = table.currentSnapshot().snapshotId();
+
+    long expireTimestampSnapshotB = waitUntilAfter(table.currentSnapshot().timestampMillis());
+    waitUntilAfter(expireTimestampSnapshotB);
+
+    table.newAppend()
+        .appendFile(FILE_C)
+        .commit();
+
+    // create a branch that references the current history and rewrite main to point to the initial snapshot
+    table.manageSnapshots()
+        .createBranch("branch", table.currentSnapshot().snapshotId())
+        .setMaxSnapshotAgeMs("branch", Long.MAX_VALUE)
+        .replaceBranch("main", initialSnapshotId)
+        .commit();
+
+    table.expireSnapshots()
+        .expireOlderThan(expireTimestampSnapshotB)
+        .cleanExpiredFiles(false)
+        .commit();
+
+    Assert.assertNotNull("Should not remove snapshot beneath a branch", table.snapshot(snapshotId));
+    Assert.assertEquals(3, table.ops().current().snapshots().size());
+  }
+
   // ToDo: Add tests which commit to branches once committing snapshots to a branch is supported
 
   @Test