You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/04/23 23:28:58 UTC

[incubator-iceberg] branch master updated: Spark: Default staging location in RewriteManifestsAction to metadata location (#959)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 57c56d3  Spark: Default staging location in RewriteManifestsAction to metadata location (#959)
57c56d3 is described below

commit 57c56d3375db3572e9c98ca5c76009fb12965a1d
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Thu Apr 23 16:28:48 2020 -0700

    Spark: Default staging location in RewriteManifestsAction to metadata location (#959)
---
 .../apache/iceberg/actions/RewriteManifestsAction.java  | 17 +++++++++++------
 .../org/apache/iceberg/TestRewriteManifestsAction.java  |  2 --
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java b/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
index b381746..b726fde 100644
--- a/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
+++ b/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
@@ -33,6 +33,7 @@ import java.util.stream.Collectors;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.HasTableOperations;
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.ManifestFiles;
 import org.apache.iceberg.ManifestWriter;
@@ -41,6 +42,7 @@ import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.RewriteManifests;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.hadoop.HadoopFileIO;
@@ -69,10 +71,10 @@ import org.slf4j.LoggerFactory;
 /**
  * An action that rewrites manifests in a distributed manner and co-locates metadata for partitions.
  * <p>
- * By default, this action rewrites all manifests for the current partition spec. The behavior can
- * be modified by passing a custom predicate to {@link #rewriteIf(Predicate)} and a custom spec id
- * to {@link #specId(int)}. In addition, this action requires a staging location for new manifests
- * that should be configured via {@link #stagingLocation}.
+ * By default, this action rewrites all manifests for the current partition spec and writes the result
+ * to the metadata folder. The behavior can be modified by passing a custom predicate to {@link #rewriteIf(Predicate)}
+ * and a custom spec id to {@link #specId(int)}. In addition, there is a way to configure a custom location
+ * for new manifests via {@link #stagingLocation}.
  */
 public class RewriteManifestsAction
     extends BaseSnapshotUpdateAction<RewriteManifestsAction, RewriteManifestsActionResult> {
@@ -109,6 +111,11 @@ public class RewriteManifestsAction
     } else {
       this.fileIO = table.io();
     }
+
+    // default the staging location to the metadata location
+    TableOperations ops = ((HasTableOperations) table).operations();
+    Path metadataFilePath = new Path(ops.metadataFileLocation("file"));
+    this.stagingLocation = metadataFilePath.getParent().toString();
   }
 
   @Override
@@ -162,8 +169,6 @@ public class RewriteManifestsAction
 
   @Override
   public RewriteManifestsActionResult execute() {
-    Preconditions.checkArgument(stagingLocation != null, "Staging location must be set");
-
     List<ManifestFile> matchingManifests = findMatchingManifests();
     if (matchingManifests.isEmpty()) {
       return RewriteManifestsActionResult.empty();
diff --git a/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java b/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
index df35590..0554d46 100644
--- a/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
+++ b/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
@@ -146,7 +146,6 @@ public class TestRewriteManifestsAction {
 
     RewriteManifestsActionResult result = actions.rewriteManifests()
         .rewriteIf(manifest -> true)
-        .stagingLocation(temp.newFolder().toString())
         .execute();
 
     Assert.assertEquals("Action should rewrite 2 manifests", 2, result.deletedManifests().size());
@@ -224,7 +223,6 @@ public class TestRewriteManifestsAction {
 
     RewriteManifestsActionResult result = actions.rewriteManifests()
         .rewriteIf(manifest -> true)
-        .stagingLocation(temp.newFolder().toString())
         .execute();
 
     Assert.assertEquals("Action should rewrite 4 manifests", 4, result.deletedManifests().size());