You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/09/10 16:07:33 UTC
[hadoop] branch trunk updated: HADOOP-17244. S3A directory delete
tombstones dir markers prematurely. (#2280)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 9960c01 HADOOP-17244. S3A directory delete tombstones dir markers prematurely. (#2280)
9960c01 is described below
commit 9960c01a25c6025e81559a8cf32e9f3cea70d2cc
Author: Steve Loughran <st...@cloudera.com>
AuthorDate: Thu Sep 10 17:03:52 2020 +0100
HADOOP-17244. S3A directory delete tombstones dir markers prematurely. (#2280)
This changes directory tree deletion so that only files are incrementally deleted
from S3Guard after the objects are deleted; the directories are left alone
until metadataStore.deleteSubtree(path) is invoke.
This avoids directory tombstones being added above files/child directories,
which stop the treewalk and delete phase from working.
Also:
* Callback to delete objects splits files and dirs so that
any problems deleting the dirs doesn't trigger s3guard updates
* New statistic to measure #of objects deleted, alongside request count.
* Callback listFilesAndEmptyDirectories renamed listFilesAndDirectoryMarkers
to clarify behavior.
* Test enhancements to replicate the failure and verify the fix
Contributed by Steve Loughran
---
.../org/apache/hadoop/fs/AbstractFileSystem.java | 7 +-
.../fs/FileContextMainOperationsBaseTest.java | 23 ++++-
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 16 +++-
.../apache/hadoop/fs/s3a/S3AInstrumentation.java | 1 +
.../java/org/apache/hadoop/fs/s3a/Statistic.java | 2 +
.../apache/hadoop/fs/s3a/impl/DeleteOperation.java | 97 ++++++++++++++++++----
.../hadoop/fs/s3a/impl/OperationCallbacks.java | 4 +-
.../apache/hadoop/fs/s3a/impl/RenameOperation.java | 3 +-
.../fs/s3a/s3guard/DynamoDBMetadataStore.java | 2 +-
.../hadoop/fs/s3a/ITestS3GuardEmptyDirs.java | 24 ++++++
.../fs/s3a/impl/TestPartialDeleteFailures.java | 2 +-
.../fs/s3a/performance/AbstractS3ACostTest.java | 1 +
.../fs/s3a/performance/ITestS3ADeleteCost.java | 49 ++++++++++-
13 files changed, 201 insertions(+), 30 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
index ec346b4..785e783 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
@@ -773,8 +773,13 @@ public abstract class AbstractFileSystem implements PathCapabilities {
if (dstStatus.isDirectory()) {
RemoteIterator<FileStatus> list = listStatusIterator(dst);
if (list != null && list.hasNext()) {
+ FileStatus child = list.next();
+ LOG.debug("Rename {}, {} failing as destination has"
+ + " at least one child: {}",
+ src, dst, child);
throw new IOException(
- "Rename cannot overwrite non empty destination directory " + dst);
+ "Rename cannot overwrite non empty destination directory " + dst
+ + " containing child: " + child.getPath());
}
}
delete(dst, false);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java
index 4c90490..970e7bb 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java
@@ -1306,14 +1306,33 @@ public abstract class FileContextMainOperationsBaseTest {
protected void rename(Path src, Path dst, boolean srcExists,
boolean dstExists, Rename... options) throws IOException {
+ IOException ioe = null;
try {
fc.rename(src, dst, options);
- } finally {
+ } catch (IOException ex) {
+ // lets not swallow this completely.
+ LOG.warn("Rename result: " + ex, ex);
+ ioe = ex;
+ }
+
+ // There's a bit of extra work in these assertions, as if they fail
+ // any IOE caught earlier is added as the cause. This
+ // gets the likely root cause to the test report
+ try {
+ LOG.debug("Probing source and destination");
Assert.assertEquals("Source exists", srcExists, exists(fc, src));
Assert.assertEquals("Destination exists", dstExists, exists(fc, dst));
+ } catch (AssertionError e) {
+ if (ioe != null && e.getCause() == null) {
+ e.initCause(ioe);
+ }
+ throw e;
+ }
+ if (ioe != null) {
+ throw ioe;
}
}
-
+
private boolean containsPath(Path path, FileStatus[] filteredPaths)
throws IOException {
for(int i = 0; i < filteredPaths.length; i ++) {
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 63c80bd..e8cfad2 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -1574,7 +1574,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
@Override
@Retries.RetryTranslated
- public RemoteIterator<S3ALocatedFileStatus> listFilesAndEmptyDirectories(
+ public RemoteIterator<S3ALocatedFileStatus> listFilesAndDirectoryMarkers(
final Path path,
final S3AFileStatus status,
final boolean collectTombstones,
@@ -2079,6 +2079,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
DELETE_CONSIDERED_IDEMPOTENT,
()-> {
incrementStatistic(OBJECT_DELETE_REQUESTS);
+ incrementStatistic(OBJECT_DELETE_OBJECTS);
s3.deleteObject(bucket, key);
return null;
});
@@ -2125,9 +2126,14 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
}
/**
- * Perform a bulk object delete operation.
+ * Perform a bulk object delete operation against S3; leaves S3Guard
+ * alone.
* Increments the {@code OBJECT_DELETE_REQUESTS} and write
- * operation statistics.
+ * operation statistics
+ * <p></p>
+ * {@code OBJECT_DELETE_OBJECTS} is updated with the actual number
+ * of objects deleted in the request.
+ * <p></p>
* Retry policy: retry untranslated; delete considered idempotent.
* If the request is throttled, this is logged in the throttle statistics,
* with the counter set to the number of keys, rather than the number
@@ -2148,9 +2154,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
incrementWriteOperations();
BulkDeleteRetryHandler retryHandler =
new BulkDeleteRetryHandler(createStoreContext());
+ int keyCount = deleteRequest.getKeys().size();
try(DurationInfo ignored =
new DurationInfo(LOG, false, "DELETE %d keys",
- deleteRequest.getKeys().size())) {
+ keyCount)) {
return invoker.retryUntranslated("delete",
DELETE_CONSIDERED_IDEMPOTENT,
(text, e, r, i) -> {
@@ -2159,6 +2166,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
},
() -> {
incrementStatistic(OBJECT_DELETE_REQUESTS, 1);
+ incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount);
return s3.deleteObjects(deleteRequest);
});
} catch (MultiObjectDeleteException e) {
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
index cb0a434..f491a12 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
@@ -156,6 +156,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
INVOCATION_RENAME,
OBJECT_COPY_REQUESTS,
OBJECT_DELETE_REQUESTS,
+ OBJECT_DELETE_OBJECTS,
OBJECT_LIST_REQUESTS,
OBJECT_CONTINUE_LIST_REQUESTS,
OBJECT_METADATA_REQUESTS,
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
index 8153169..1addfbe 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -85,6 +85,8 @@ public enum Statistic {
"Calls of rename()"),
OBJECT_COPY_REQUESTS("object_copy_requests", "Object copy requests"),
OBJECT_DELETE_REQUESTS("object_delete_requests", "Object delete requests"),
+ OBJECT_DELETE_OBJECTS("object_delete_objects",
+ "Objects deleted in delete requests"),
OBJECT_LIST_REQUESTS("object_list_requests",
"Number of object listings made"),
OBJECT_CONTINUE_LIST_REQUESTS("object_continue_list_requests",
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java
index daf93d9..9b05ff4 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsResult;
@@ -152,10 +153,13 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
/**
* List of keys built up for the next delete batch.
*/
- private List<DeleteObjectsRequest.KeyVersion> keys;
+ private List<DeleteEntry> keys;
/**
- * List of paths built up for deletion.
+ * List of paths built up for incremental deletion on tree delete.
+ * At the end of the entire delete the full tree is scanned in S3Guard
+ * and tombstones added. For this reason this list of paths <i>must not</i>
+ * include directory markers, as that will break the scan.
*/
private List<Path> paths;
@@ -323,7 +327,7 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
// list files including any under tombstones through S3Guard
LOG.debug("Getting objects for directory prefix {} to delete", dirKey);
final RemoteIterator<S3ALocatedFileStatus> locatedFiles =
- callbacks.listFilesAndEmptyDirectories(path, status,
+ callbacks.listFilesAndDirectoryMarkers(path, status,
false, true);
// iterate through and delete. The next() call will block when a new S3
@@ -359,7 +363,10 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
while (objects.hasNext()) {
// get the next entry in the listing.
extraFilesDeleted++;
- queueForDeletion(deletionKey(objects.next()), null);
+ S3AFileStatus next = objects.next();
+ LOG.debug("Found Unlisted entry {}", next);
+ queueForDeletion(deletionKey(next), null,
+ next.isDirectory());
}
if (extraFilesDeleted > 0) {
LOG.debug("Raw S3 Scan found {} extra file(s) to delete",
@@ -402,7 +409,7 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
*/
private void queueForDeletion(
final S3AFileStatus stat) throws IOException {
- queueForDeletion(deletionKey(stat), stat.getPath());
+ queueForDeletion(deletionKey(stat), stat.getPath(), stat.isDirectory());
}
/**
@@ -413,14 +420,18 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
*
* @param key key to delete
* @param deletePath nullable path of the key
+ * @param isDirMarker is the entry a directory?
* @throws IOException failure of the previous batch of deletions.
*/
private void queueForDeletion(final String key,
- @Nullable final Path deletePath) throws IOException {
+ @Nullable final Path deletePath,
+ boolean isDirMarker) throws IOException {
LOG.debug("Adding object to delete: \"{}\"", key);
- keys.add(new DeleteObjectsRequest.KeyVersion(key));
+ keys.add(new DeleteEntry(key, isDirMarker));
if (deletePath != null) {
- paths.add(deletePath);
+ if (!isDirMarker) {
+ paths.add(deletePath);
+ }
}
if (keys.size() == pageSize) {
@@ -484,7 +495,7 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
* @return the submitted future or null
*/
private CompletableFuture<Void> submitDelete(
- final List<DeleteObjectsRequest.KeyVersion> keyList,
+ final List<DeleteEntry> keyList,
final List<Path> pathList) {
if (keyList.isEmpty() && pathList.isEmpty()) {
@@ -514,31 +525,59 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
@Retries.RetryTranslated
private void asyncDeleteAction(
final BulkOperationState state,
- final List<DeleteObjectsRequest.KeyVersion> keyList,
+ final List<DeleteEntry> keyList,
final List<Path> pathList,
final boolean auditDeletedKeys)
throws IOException {
+ List<DeleteObjectsResult.DeletedObject> deletedObjects = new ArrayList<>();
try (DurationInfo ignored =
new DurationInfo(LOG, false, "Delete page of keys")) {
DeleteObjectsResult result = null;
List<Path> undeletedObjects = new ArrayList<>();
if (!keyList.isEmpty()) {
- result = Invoker.once("Remove S3 Keys",
+ // first delete the files.
+ List<DeleteObjectsRequest.KeyVersion> files = keyList.stream()
+ .filter(e -> !e.isDirMarker)
+ .map(e -> e.keyVersion)
+ .collect(Collectors.toList());
+ result = Invoker.once("Remove S3 Files",
status.getPath().toString(),
() -> callbacks.removeKeys(
- keyList,
+ files,
false,
undeletedObjects,
state,
!auditDeletedKeys));
+ if (result != null) {
+ deletedObjects.addAll(result.getDeletedObjects());
+ }
+ // now the dirs
+ List<DeleteObjectsRequest.KeyVersion> dirs = keyList.stream()
+ .filter(e -> e.isDirMarker)
+ .map(e -> e.keyVersion)
+ .collect(Collectors.toList());
+ // This is invoked with deleteFakeDir = true, so
+ // S3Guard is not updated.
+ result = Invoker.once("Remove S3 Dir Markers",
+ status.getPath().toString(),
+ () -> callbacks.removeKeys(
+ dirs,
+ true,
+ undeletedObjects,
+ state,
+ !auditDeletedKeys));
+ if (result != null) {
+ deletedObjects.addAll(result.getDeletedObjects());
+ }
}
if (!pathList.isEmpty()) {
+ // delete file paths only. This stops tombstones
+ // being added until the final directory cleanup
+ // (HADOOP-17244)
metadataStore.deletePaths(pathList, state);
}
- if (auditDeletedKeys && result != null) {
+ if (auditDeletedKeys) {
// audit the deleted keys
- List<DeleteObjectsResult.DeletedObject> deletedObjects =
- result.getDeletedObjects();
if (deletedObjects.size() != keyList.size()) {
// size mismatch
LOG.warn("Size mismatch in deletion operation. "
@@ -549,7 +588,7 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
for (DeleteObjectsResult.DeletedObject del : deletedObjects) {
keyList.removeIf(kv -> kv.getKey().equals(del.getKey()));
}
- for (DeleteObjectsRequest.KeyVersion kv : keyList) {
+ for (DeleteEntry kv : keyList) {
LOG.debug("{}", kv.getKey());
}
}
@@ -557,5 +596,31 @@ public class DeleteOperation extends ExecutingStoreOperation<Boolean> {
}
}
+ /**
+ * Deletion entry; dir marker state is tracked to control S3Guard
+ * update policy.
+ */
+ private static final class DeleteEntry {
+ private final DeleteObjectsRequest.KeyVersion keyVersion;
+
+ private final boolean isDirMarker;
+
+ private DeleteEntry(final String key, final boolean isDirMarker) {
+ this.keyVersion = new DeleteObjectsRequest.KeyVersion(key);
+ this.isDirMarker = isDirMarker;
+ }
+
+ public String getKey() {
+ return keyVersion.getKey();
+ }
+
+ @Override
+ public String toString() {
+ return "DeleteEntry{" +
+ "key='" + getKey() + '\'' +
+ ", isDirMarker=" + isDirMarker +
+ '}';
+ }
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
index 0fcf645..3391097 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
@@ -105,7 +105,7 @@ public interface OperationCallbacks {
throws IOException;
/**
- * Recursive list of files and empty directories.
+ * Recursive list of files and directory markers.
*
* @param path path to list from
* @param status optional status of path to list.
@@ -115,7 +115,7 @@ public interface OperationCallbacks {
* @throws IOException failure
*/
@Retries.RetryTranslated
- RemoteIterator<S3ALocatedFileStatus> listFilesAndEmptyDirectories(
+ RemoteIterator<S3ALocatedFileStatus> listFilesAndDirectoryMarkers(
Path path,
S3AFileStatus status,
boolean collectTombstones,
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java
index beb1950..615d014 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java
@@ -400,6 +400,7 @@ Are * @throws IOException failure
destStatus.getPath());
// Although the dir marker policy doesn't always need to do this,
// it's simplest just to be consistent here.
+ // note: updates the metastore as well a S3.
callbacks.deleteObjectAtPath(destStatus.getPath(), dstKey, false, null);
}
@@ -411,7 +412,7 @@ Are * @throws IOException failure
false);
final RemoteIterator<S3ALocatedFileStatus> iterator =
- callbacks.listFilesAndEmptyDirectories(parentPath,
+ callbacks.listFilesAndDirectoryMarkers(parentPath,
sourceStatus,
true,
true);
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index b131320..ac87456 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -717,7 +717,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
public DDBPathMetadata get(Path path, boolean wantEmptyDirectoryFlag)
throws IOException {
checkPath(path);
- LOG.debug("Get from table {} in region {}: {}. wantEmptyDirectory={}",
+ LOG.debug("Get from table {} in region {}: {} ; wantEmptyDirectory={}",
tableName, region, path, wantEmptyDirectoryFlag);
DDBPathMetadata result = innerGet(path, wantEmptyDirectoryFlag);
LOG.debug("result of get {} is: {}", path, result);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
index db3c2b6..41110b9 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
@@ -32,6 +32,7 @@ import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.assertj.core.api.Assertions;
import org.junit.Test;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
@@ -286,4 +287,27 @@ public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
s3.putObject(putObjectRequest);
}
+ @Test
+ public void testDirMarkerDelete() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ assumeFilesystemHasMetadatastore(getFileSystem());
+ Path baseDir = methodPath();
+ Path subFile = new Path(baseDir, "subdir/file.txt");
+ // adds the s3guard entry
+ fs.mkdirs(baseDir);
+ touch(fs, subFile);
+ // PUT a marker
+ createEmptyObject(fs, fs.pathToKey(baseDir) + "/");
+ fs.delete(baseDir, true);
+ assertPathDoesNotExist("Should have been deleted", baseDir);
+
+ // now create the dir again
+ fs.mkdirs(baseDir);
+ FileStatus fileStatus = fs.getFileStatus(baseDir);
+ Assertions.assertThat(fileStatus)
+ .matches(FileStatus::isDirectory, "Not a directory");
+ Assertions.assertThat(fs.listStatus(baseDir))
+ .describedAs("listing of %s", baseDir)
+ .isEmpty();
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java
index 0729f2a..067d813 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java
@@ -333,7 +333,7 @@ public class TestPartialDeleteFailures {
}
@Override
- public RemoteIterator<S3ALocatedFileStatus> listFilesAndEmptyDirectories(
+ public RemoteIterator<S3ALocatedFileStatus> listFilesAndDirectoryMarkers(
Path path,
S3AFileStatus status,
boolean collectTombstones,
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java
index db0542d..abf3567 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java
@@ -148,6 +148,7 @@ public class AbstractS3ACostTest extends AbstractS3ATestBase {
INVOCATION_COPY_FROM_LOCAL_FILE,
OBJECT_COPY_REQUESTS,
OBJECT_DELETE_REQUESTS,
+ OBJECT_DELETE_OBJECTS,
OBJECT_LIST_REQUESTS,
OBJECT_METADATA_REQUESTS,
OBJECT_PUT_BYTES,
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java
index d3d976e..6ccfcdc 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java
@@ -19,15 +19,18 @@
package org.apache.hadoop.fs.s3a.performance;
+import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Collection;
+import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
@@ -37,6 +40,7 @@ import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.performance.OperationCost.*;
import static org.apache.hadoop.fs.s3a.performance.OperationCostValidator.probe;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Use metrics to assert about the cost of file API calls.
@@ -145,7 +149,7 @@ public class ITestS3ADeleteCost extends AbstractS3ACostTest {
boolean rawAndDeleting = isRaw() && isDeleting();
verifyMetrics(() -> {
fs.delete(file1, false);
- return "after fs.delete(file1simpleFile) " + getMetricSummary();
+ return "after fs.delete(file1) " + getMetricSummary();
},
// delete file. For keeping: that's it
probe(rawAndKeeping, OBJECT_METADATA_REQUESTS,
@@ -173,7 +177,11 @@ public class ITestS3ADeleteCost extends AbstractS3ACostTest {
public void testDirMarkersSubdir() throws Throwable {
describe("verify cost of deep subdir creation");
- Path subDir = new Path(methodPath(), "1/2/3/4/5/6");
+ Path parent = methodPath();
+ Path subDir = new Path(parent, "1/2/3/4/5/6");
+ S3AFileSystem fs = getFileSystem();
+ int dirsCreated = 2;
+ fs.mkdirs(parent);
// one dir created, possibly a parent removed
verifyMetrics(() -> {
mkdirs(subDir);
@@ -187,6 +195,43 @@ public class ITestS3ADeleteCost extends AbstractS3ACostTest {
// delete all possible fake dirs above the subdirectory
withWhenDeleting(FAKE_DIRECTORIES_DELETED,
directoriesInPath(subDir) - 1));
+
+ int dirDeleteRequests = 1;
+ int fileDeleteRequests = 0;
+ int totalDeleteRequests = dirDeleteRequests + fileDeleteRequests;
+
+ // now delete the deep tree.
+ verifyMetrics(() ->
+ fs.delete(parent, true),
+
+ // keeping: the parent dir marker needs deletion alongside
+ // the subdir one.
+ with(OBJECT_DELETE_REQUESTS, totalDeleteRequests),
+ withWhenKeeping(OBJECT_DELETE_OBJECTS, dirsCreated),
+
+ // deleting: only the marker at the bottom needs deleting
+ withWhenDeleting(OBJECT_DELETE_OBJECTS, 1));
+
+ // followup with list calls to make sure all is clear.
+ verifyNoListing(parent);
+ verifyNoListing(subDir);
+ // now reinstate the directory, which in HADOOP-17244 hitting problems
+ fs.mkdirs(parent);
+ FileStatus[] children = fs.listStatus(parent);
+ Assertions.assertThat(children)
+ .describedAs("Children of %s", parent)
+ .isEmpty();
+ }
+
+ /**
+ * List a path, verify that there are no direct child entries.
+ * @param path path to scan
+ */
+ protected void verifyNoListing(final Path path) throws Exception {
+ intercept(FileNotFoundException.class, () -> {
+ FileStatus[] statuses = getFileSystem().listStatus(path);
+ return Arrays.deepToString(statuses);
+ });
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org