You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2021/03/29 22:06:19 UTC

[iceberg] 01/02: Core: Don't delete data files on DROP if GC is disabled (#2367)

This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git

commit 64e97153c85823a371c30d38bdcc0867497bacc8
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Wed Mar 24 17:25:14 2021 -0700

    Core: Don't delete data files on DROP if GC is disabled (#2367)
---
 .../main/java/org/apache/iceberg/CatalogUtil.java    | 11 ++++++++++-
 .../spark/extensions/TestSnapshotTableProcedure.java | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
index 776cc2f..cb6a0d8 100644
--- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -34,11 +34,15 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.MapMaker;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.PropertyUtil;
 import org.apache.iceberg.util.Tasks;
 import org.apache.iceberg.util.ThreadPools;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.iceberg.TableProperties.GC_ENABLED;
+import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
+
 public class CatalogUtil {
   private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class);
   public static final String ICEBERG_CATALOG_TYPE = "type";
@@ -78,7 +82,12 @@ public class CatalogUtil {
 
     // run all of the deletes
 
-    deleteFiles(io, manifestsToDelete);
+    boolean gcEnabled = PropertyUtil.propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT);
+
+    if (gcEnabled) {
+      // delete data files only if we are sure this won't corrupt other tables
+      deleteFiles(io, manifestsToDelete);
+    }
 
     Tasks.foreach(Iterables.transform(manifestsToDelete, ManifestFile::path))
         .noRetry().suppressFailureWhenFinished()
diff --git a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
index eced46c..cbfa2d5 100644
--- a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
+++ b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
@@ -119,6 +119,26 @@ public class TestSnapshotTableProcedure extends SparkExtensionsTestBase {
   }
 
   @Test
+  public void testDropTable() throws IOException {
+    String location = temp.newFolder().toString();
+    sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);
+    sql("INSERT INTO TABLE %s VALUES (1, 'a')", sourceName);
+
+    Object result = scalarSql("CALL %s.system.snapshot('%s', '%s')", catalogName, sourceName, tableName);
+    Assert.assertEquals("Should have added one file", 1L, result);
+
+    assertEquals("Should have expected rows",
+        ImmutableList.of(row(1L, "a")),
+        sql("SELECT * FROM %s", tableName));
+
+    sql("DROP TABLE %s", tableName);
+
+    assertEquals("Source table should be intact",
+        ImmutableList.of(row(1L, "a")),
+        sql("SELECT * FROM %s", sourceName));
+  }
+
+  @Test
   public void testInvalidSnapshotsCases() throws IOException {
     String location = temp.newFolder().toString();
     sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);