You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2021/03/29 22:06:19 UTC
[iceberg] 01/02: Core: Don't delete data files on DROP if GC is
disabled (#2367)
This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit 64e97153c85823a371c30d38bdcc0867497bacc8
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Wed Mar 24 17:25:14 2021 -0700
Core: Don't delete data files on DROP if GC is disabled (#2367)
---
.../main/java/org/apache/iceberg/CatalogUtil.java | 11 ++++++++++-
.../spark/extensions/TestSnapshotTableProcedure.java | 20 ++++++++++++++++++++
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
index 776cc2f..cb6a0d8 100644
--- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -34,11 +34,15 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.MapMaker;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.Tasks;
import org.apache.iceberg.util.ThreadPools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.iceberg.TableProperties.GC_ENABLED;
+import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
+
public class CatalogUtil {
private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class);
public static final String ICEBERG_CATALOG_TYPE = "type";
@@ -78,7 +82,12 @@ public class CatalogUtil {
// run all of the deletes
- deleteFiles(io, manifestsToDelete);
+ boolean gcEnabled = PropertyUtil.propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT);
+
+ if (gcEnabled) {
+ // delete data files only if we are sure this won't corrupt other tables
+ deleteFiles(io, manifestsToDelete);
+ }
Tasks.foreach(Iterables.transform(manifestsToDelete, ManifestFile::path))
.noRetry().suppressFailureWhenFinished()
diff --git a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
index eced46c..cbfa2d5 100644
--- a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
+++ b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
@@ -119,6 +119,26 @@ public class TestSnapshotTableProcedure extends SparkExtensionsTestBase {
}
@Test
+ public void testDropTable() throws IOException {
+ String location = temp.newFolder().toString();
+ sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);
+ sql("INSERT INTO TABLE %s VALUES (1, 'a')", sourceName);
+
+ Object result = scalarSql("CALL %s.system.snapshot('%s', '%s')", catalogName, sourceName, tableName);
+ Assert.assertEquals("Should have added one file", 1L, result);
+
+ assertEquals("Should have expected rows",
+ ImmutableList.of(row(1L, "a")),
+ sql("SELECT * FROM %s", tableName));
+
+ sql("DROP TABLE %s", tableName);
+
+ assertEquals("Source table should be intact",
+ ImmutableList.of(row(1L, "a")),
+ sql("SELECT * FROM %s", sourceName));
+ }
+
+ @Test
public void testInvalidSnapshotsCases() throws IOException {
String location = temp.newFolder().toString();
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);