You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2021/03/29 22:06:18 UTC

[iceberg] branch 0.11.x updated (aba3881 -> 1c007f2)

This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a change to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git.


    from aba3881  Spark: Remove softValues for Spark 2 catalog cache (#2363)
     new 64e9715  Core: Don't delete data files on DROP if GC is disabled (#2367)
     new 1c007f2  Core: Support dynamic loading for HadoopFileIO (#2333)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../main/java/org/apache/iceberg/CatalogUtil.java  | 11 ++++++++++-
 .../org/apache/iceberg/hadoop/HadoopFileIO.java    | 23 ++++++++++++++++++++--
 .../java/org/apache/iceberg/TestCatalogUtil.java   | 10 ++++++++++
 .../extensions/TestSnapshotTableProcedure.java     | 20 +++++++++++++++++++
 4 files changed, 61 insertions(+), 3 deletions(-)

[iceberg] 01/02: Core: Don't delete data files on DROP if GC is disabled (#2367)

Posted by ao...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git

commit 64e97153c85823a371c30d38bdcc0867497bacc8
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Wed Mar 24 17:25:14 2021 -0700

    Core: Don't delete data files on DROP if GC is disabled (#2367)
---
 .../main/java/org/apache/iceberg/CatalogUtil.java    | 11 ++++++++++-
 .../spark/extensions/TestSnapshotTableProcedure.java | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
index 776cc2f..cb6a0d8 100644
--- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -34,11 +34,15 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.MapMaker;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.PropertyUtil;
 import org.apache.iceberg.util.Tasks;
 import org.apache.iceberg.util.ThreadPools;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.iceberg.TableProperties.GC_ENABLED;
+import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
+
 public class CatalogUtil {
   private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class);
   public static final String ICEBERG_CATALOG_TYPE = "type";
@@ -78,7 +82,12 @@ public class CatalogUtil {
 
     // run all of the deletes
 
-    deleteFiles(io, manifestsToDelete);
+    boolean gcEnabled = PropertyUtil.propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT);
+
+    if (gcEnabled) {
+      // delete data files only if we are sure this won't corrupt other tables
+      deleteFiles(io, manifestsToDelete);
+    }
 
     Tasks.foreach(Iterables.transform(manifestsToDelete, ManifestFile::path))
         .noRetry().suppressFailureWhenFinished()
diff --git a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
index eced46c..cbfa2d5 100644
--- a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
+++ b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java
@@ -119,6 +119,26 @@ public class TestSnapshotTableProcedure extends SparkExtensionsTestBase {
   }
 
   @Test
+  public void testDropTable() throws IOException {
+    String location = temp.newFolder().toString();
+    sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);
+    sql("INSERT INTO TABLE %s VALUES (1, 'a')", sourceName);
+
+    Object result = scalarSql("CALL %s.system.snapshot('%s', '%s')", catalogName, sourceName, tableName);
+    Assert.assertEquals("Should have added one file", 1L, result);
+
+    assertEquals("Should have expected rows",
+        ImmutableList.of(row(1L, "a")),
+        sql("SELECT * FROM %s", tableName));
+
+    sql("DROP TABLE %s", tableName);
+
+    assertEquals("Source table should be intact",
+        ImmutableList.of(row(1L, "a")),
+        sql("SELECT * FROM %s", sourceName));
+  }
+
+  @Test
   public void testInvalidSnapshotsCases() throws IOException {
     String location = temp.newFolder().toString();
     sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", sourceName, location);

[iceberg] 02/02: Core: Support dynamic loading for HadoopFileIO (#2333)

Posted by ao...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git

commit 1c007f29a2b2f4c55d98ab5e809b0d24f05c0396
Author: Jack Ye <yz...@amazon.com>
AuthorDate: Fri Mar 26 14:28:30 2021 -0700

    Core: Support dynamic loading for HadoopFileIO (#2333)
---
 .../org/apache/iceberg/hadoop/HadoopFileIO.java    | 23 ++++++++++++++++++++--
 .../java/org/apache/iceberg/TestCatalogUtil.java   | 10 ++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java
index fbaa75d..34d66bf 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.hadoop;
 
 import java.io.IOException;
+import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -29,9 +30,17 @@ import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.util.SerializableSupplier;
 
-public class HadoopFileIO implements FileIO {
+public class HadoopFileIO implements FileIO, Configurable {
 
-  private final SerializableSupplier<Configuration> hadoopConf;
+  private SerializableSupplier<Configuration> hadoopConf;
+
+  /**
+   * Constructor used for dynamic FileIO loading.
+   * <p>
+   * {@link Configuration Hadoop configuration} must be set through {@link HadoopFileIO#setConf(Configuration)}
+   */
+  public HadoopFileIO() {
+  }
 
   public HadoopFileIO(Configuration hadoopConf) {
     this(new SerializableConfiguration(hadoopConf)::get);
@@ -65,4 +74,14 @@ public class HadoopFileIO implements FileIO {
       throw new RuntimeIOException(e, "Failed to delete file: %s", path);
     }
   }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.hadoopConf = new SerializableConfiguration(conf)::get;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return hadoopConf.get();
+  }
 }
diff --git a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
index 37d66c5..c2487eb 100644
--- a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.catalog.Namespace;
 import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.hadoop.HadoopFileIO;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.io.OutputFile;
@@ -124,6 +125,15 @@ public class TestCatalogUtil {
   }
 
   @Test
+  public void loadCustomFileIO_hadoopConfigConstructor() {
+    Configuration configuration = new Configuration();
+    configuration.set("key", "val");
+    FileIO fileIO = CatalogUtil.loadFileIO(HadoopFileIO.class.getName(), Maps.newHashMap(), configuration);
+    Assert.assertTrue(fileIO instanceof HadoopFileIO);
+    Assert.assertEquals("val", ((HadoopFileIO) fileIO).conf().get("key"));
+  }
+
+  @Test
   public void loadCustomFileIO_configurable() {
     Configuration configuration = new Configuration();
     configuration.set("key", "val");