You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/04/23 23:28:58 UTC
[incubator-iceberg] branch master updated: Spark: Default staging
location in RewriteManifestsAction to metadata location (#959)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 57c56d3 Spark: Default staging location in RewriteManifestsAction to metadata location (#959)
57c56d3 is described below
commit 57c56d3375db3572e9c98ca5c76009fb12965a1d
Author: Anton Okolnychyi <ao...@apple.com>
AuthorDate: Thu Apr 23 16:28:48 2020 -0700
Spark: Default staging location in RewriteManifestsAction to metadata location (#959)
---
.../apache/iceberg/actions/RewriteManifestsAction.java | 17 +++++++++++------
.../org/apache/iceberg/TestRewriteManifestsAction.java | 2 --
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java b/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
index b381746..b726fde 100644
--- a/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
+++ b/spark/src/main/java/org/apache/iceberg/actions/RewriteManifestsAction.java
@@ -33,6 +33,7 @@ import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
@@ -41,6 +42,7 @@ import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.RewriteManifests;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.hadoop.HadoopFileIO;
@@ -69,10 +71,10 @@ import org.slf4j.LoggerFactory;
/**
* An action that rewrites manifests in a distributed manner and co-locates metadata for partitions.
* <p>
- * By default, this action rewrites all manifests for the current partition spec. The behavior can
- * be modified by passing a custom predicate to {@link #rewriteIf(Predicate)} and a custom spec id
- * to {@link #specId(int)}. In addition, this action requires a staging location for new manifests
- * that should be configured via {@link #stagingLocation}.
+ * By default, this action rewrites all manifests for the current partition spec and writes the result
+ * to the metadata folder. The behavior can be modified by passing a custom predicate to {@link #rewriteIf(Predicate)}
+ * and a custom spec id to {@link #specId(int)}. In addition, there is a way to configure a custom location
+ * for new manifests via {@link #stagingLocation}.
*/
public class RewriteManifestsAction
extends BaseSnapshotUpdateAction<RewriteManifestsAction, RewriteManifestsActionResult> {
@@ -109,6 +111,11 @@ public class RewriteManifestsAction
} else {
this.fileIO = table.io();
}
+
+ // default the staging location to the metadata location
+ TableOperations ops = ((HasTableOperations) table).operations();
+ Path metadataFilePath = new Path(ops.metadataFileLocation("file"));
+ this.stagingLocation = metadataFilePath.getParent().toString();
}
@Override
@@ -162,8 +169,6 @@ public class RewriteManifestsAction
@Override
public RewriteManifestsActionResult execute() {
- Preconditions.checkArgument(stagingLocation != null, "Staging location must be set");
-
List<ManifestFile> matchingManifests = findMatchingManifests();
if (matchingManifests.isEmpty()) {
return RewriteManifestsActionResult.empty();
diff --git a/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java b/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
index df35590..0554d46 100644
--- a/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
+++ b/spark/src/test/java/org/apache/iceberg/TestRewriteManifestsAction.java
@@ -146,7 +146,6 @@ public class TestRewriteManifestsAction {
RewriteManifestsActionResult result = actions.rewriteManifests()
.rewriteIf(manifest -> true)
- .stagingLocation(temp.newFolder().toString())
.execute();
Assert.assertEquals("Action should rewrite 2 manifests", 2, result.deletedManifests().size());
@@ -224,7 +223,6 @@ public class TestRewriteManifestsAction {
RewriteManifestsActionResult result = actions.rewriteManifests()
.rewriteIf(manifest -> true)
- .stagingLocation(temp.newFolder().toString())
.execute();
Assert.assertEquals("Action should rewrite 4 manifests", 4, result.deletedManifests().size());