You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2017/06/15 18:57:44 UTC
hadoop git commit: HADOOP-14488. S3Guard: inconsistency injection not
handling deleted paths properly. Contributed by Sean Mackrory and Aaron
Fabbri.
Repository: hadoop
Updated Branches:
refs/heads/HADOOP-13345 6a06ed834 -> 0db7176ba
HADOOP-14488. S3Guard: inconsistency injection not handling deleted paths properly. Contributed by Sean Mackrory and Aaron Fabbri.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0db7176b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0db7176b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0db7176b
Branch: refs/heads/HADOOP-13345
Commit: 0db7176baaa2a89c61f6f5043ca15c28c3f89831
Parents: 6a06ed8
Author: Sean Mackrory <ma...@apache.org>
Authored: Thu Jun 15 09:26:26 2017 -0600
Committer: Sean Mackrory <ma...@apache.org>
Committed: Thu Jun 15 12:57:00 2017 -0600
----------------------------------------------------------------------
.../fs/s3a/InconsistentAmazonS3Client.java | 117 ++++++++++++++++---
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +-
.../fs/s3a/ITestS3GuardListConsistency.java | 84 +++++++++++++
3 files changed, 184 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
index 5b62c66..85f4a2f 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
@@ -24,14 +24,18 @@ import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest;
+import com.amazonaws.services.s3.model.DeleteObjectsResult;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3ObjectSummary;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -134,10 +138,24 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
}
@Override
+ public DeleteObjectsResult deleteObjects(DeleteObjectsRequest
+ deleteObjectsRequest) throws AmazonClientException,
+ AmazonServiceException
+ {
+ for (DeleteObjectsRequest.KeyVersion keyVersion :
+ deleteObjectsRequest.getKeys()) {
+ registerDeleteObject(keyVersion.getKey(), deleteObjectsRequest
+ .getBucketName());
+ }
+ return super.deleteObjects(deleteObjectsRequest);
+ }
+
+ @Override
public void deleteObject(DeleteObjectRequest deleteObjectRequest)
throws AmazonClientException, AmazonServiceException {
- LOG.debug("key {}", deleteObjectRequest.getKey());
- registerDeleteObject(deleteObjectRequest);
+ String key = deleteObjectRequest.getKey();
+ LOG.debug("key {}", key);
+ registerDeleteObject(key, deleteObjectRequest.getBucketName());
super.deleteObject(deleteObjectRequest);
}
@@ -161,38 +179,100 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
return listing;
}
- private boolean addIfNotPresent(List<S3ObjectSummary> list,
+ private void addSummaryIfNotPresent(List<S3ObjectSummary> list,
S3ObjectSummary item) {
// Behavior of S3ObjectSummary
String key = item.getKey();
for (S3ObjectSummary member : list) {
if (member.getKey().equals(key)) {
- return false;
+ return;
+ }
+ }
+ list.add(item);
+ }
+
+ /**
+ * Add prefix of child to given list. The added prefix will be equal to
+ * ancestor plus one directory past ancestor. e.g.:
+ * if ancestor is "/a/b/c" and child is "/a/b/c/d/e/file" then "a/b/c/d" is
+ * added to list.
+ * @param prefixes list to add to
+ * @param ancestor path we are listing in
+ * @param child full path to get prefix from
+ */
+ private void addPrefixIfNotPresent(List<String> prefixes, String ancestor,
+ String child) {
+ Path prefixCandidate = new Path(child).getParent();
+ Path ancestorPath = new Path(ancestor);
+ Preconditions.checkArgument(child.startsWith(ancestor), "%s does not " +
+ "start with %s", child, ancestor);
+ while (!prefixCandidate.isRoot()) {
+ Path nextParent = prefixCandidate.getParent();
+ if (nextParent.equals(ancestorPath)) {
+ String prefix = prefixCandidate.toString();
+ if (!prefixes.contains(prefix)) {
+ prefixes.add(prefix);
+ }
+ return;
+ }
+ prefixCandidate = nextParent;
+ }
+ }
+
+ /**
+ * Checks that the parent key is an ancestor of the child key.
+ * @param parent key that may be the parent.
+ * @param child key that may be the child.
+ * @param recursive if false, only return true for direct children. If
+ * true, any descendant will count.
+ * @return true if parent is an ancestor of child
+ */
+ private boolean isDescendant(String parent, String child, boolean recursive) {
+ if (recursive) {
+ if (!parent.endsWith("/")) {
+ parent = parent + "/";
}
+ return child.startsWith(parent);
+ } else {
+ Path actualParentPath = new Path(child).getParent();
+ Path expectedParentPath = new Path(parent);
+ return actualParentPath.equals(expectedParentPath);
}
- return list.add(item);
}
+ /**
+ * Simulate eventual consistency of delete for this list operation: Any
+ * recently-deleted keys will be added.
+ * @param request List request
+ * @param rawListing listing returned from underlying S3
+ * @return listing with recently-deleted items restored
+ */
private ObjectListing restoreListObjects(ListObjectsRequest request,
- ObjectListing rawListing) {
+ ObjectListing rawListing) {
List<S3ObjectSummary> outputList = rawListing.getObjectSummaries();
List<String> outputPrefixes = rawListing.getCommonPrefixes();
+ // recursive list has no delimiter, returns everything that matches a
+ // prefix.
+ boolean recursiveObjectList = !("/".equals(request.getDelimiter()));
+
+ // Go through all deleted keys
for (String key : new HashSet<>(delayedDeletes.keySet())) {
Delete delete = delayedDeletes.get(key);
if (isKeyDelayed(delete.time(), key)) {
- // TODO this works fine for flat directories but:
- // if you have a delayed key /a/b/c/d and you are listing /a/b,
- // this incorrectly will add /a/b/c/d to the listing for b
- if (key.startsWith(request.getPrefix())) {
- if (delete.summary == null) {
- if (!outputPrefixes.contains(key)) {
- outputPrefixes.add(key);
- }
- } else {
- addIfNotPresent(outputList, delete.summary());
+ if (isDescendant(request.getPrefix(), key, recursiveObjectList)) {
+ if (delete.summary() != null) {
+ addSummaryIfNotPresent(outputList, delete.summary());
+ }
+ }
+ // Non-recursive list has delimiter: will return rolled-up prefixes for
+ // all keys that are not direct children
+ if (!recursiveObjectList) {
+ if (isDescendant(request.getPrefix(), key, true)) {
+ addPrefixIfNotPresent(outputPrefixes, request.getPrefix(), key);
}
}
} else {
+ // Clean up any expired entries
delayedDeletes.remove(key);
}
}
@@ -240,12 +320,11 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
}
}
- private void registerDeleteObject(DeleteObjectRequest req) {
- String key = req.getKey();
+ private void registerDeleteObject(String key, String bucket) {
if (shouldDelay(key)) {
// Record summary so we can add it back for some time post-deletion
S3ObjectSummary summary = null;
- ObjectListing list = listObjects(req.getBucketName(), key);
+ ObjectListing list = listObjects(bucket, key);
for (S3ObjectSummary result : list.getObjectSummaries()) {
if (result.getKey().equals(key)) {
summary = result;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 4eb94ad..51543f8 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -1595,7 +1595,8 @@ public class S3AFileSystem extends FileSystem {
* @param delimiter any delimiter
* @return the request
*/
- private ListObjectsRequest createListObjectsRequest(String key,
+ @VisibleForTesting
+ ListObjectsRequest createListObjectsRequest(String key,
String delimiter) {
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(bucket);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
index e06afd0..b0da172 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
@@ -18,7 +18,11 @@
package org.apache.hadoop.fs.s3a;
+import com.amazonaws.services.s3.model.ObjectListing;
+import com.amazonaws.services.s3.AmazonS3;
+
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
@@ -34,6 +38,7 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
@@ -452,4 +457,83 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
}
}
+ @Test
+ public void testCommitByRenameOperations() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ Assume.assumeTrue(fs.hasMetadataStore());
+ Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
+ Path task00 = new Path(work, "task00");
+ fs.mkdirs(task00);
+ String name = "part-00";
+ try (FSDataOutputStream out =
+ fs.create(new Path(task00, name), false)) {
+ out.writeChars("hello");
+ }
+ for (FileStatus stat : fs.listStatus(task00)) {
+ fs.rename(stat.getPath(), work);
+ }
+ List<FileStatus> files = new ArrayList<>(2);
+ for (FileStatus stat : fs.listStatus(work)) {
+ if (stat.isFile()) {
+ files.add(stat);
+ }
+ }
+ assertFalse("renamed file " + name + " not found in " + work,
+ files.isEmpty());
+ assertEquals("more files found than expected in " + work
+ + " " + ls(work), 1, files.size());
+ FileStatus status = files.get(0);
+ assertEquals("Wrong filename in " + status,
+ name, status.getPath().getName());
+ }
+
+ @Test
+ public void testInconsistentS3ClientDeletes() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING);
+ for (int i = 0; i < 3; i++) {
+ fs.mkdirs(new Path(root, "dir" + i));
+ touch(fs, new Path(root, "file" + i));
+ for (int j = 0; j < 3; j++) {
+ touch(fs, new Path(new Path(root, "dir" + i), "file" + i + "-" + j));
+ }
+ }
+ Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC);
+
+ AmazonS3 client = fs.getAmazonS3Client();
+ String key = fs.pathToKey(root) + "/";
+
+ ObjectListing preDeleteDelimited = client.listObjects(
+ fs.createListObjectsRequest(key, "/"));
+ ObjectListing preDeleteUndelimited = client.listObjects(
+ fs.createListObjectsRequest(key, null));
+
+ fs.delete(root, true);
+
+ ObjectListing postDeleteDelimited = client.listObjects(
+ fs.createListObjectsRequest(key, "/"));
+ ObjectListing postDeleteUndelimited = client.listObjects(
+ fs.createListObjectsRequest(key, null));
+
+ assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
+ "in a non-recursive listing",
+ preDeleteDelimited.getObjectSummaries().size(),
+ postDeleteDelimited.getObjectSummaries().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
+ "in a non-recursive listing",
+ preDeleteDelimited.getCommonPrefixes().size(),
+ postDeleteDelimited.getCommonPrefixes().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
+ "in a recursive listing",
+ preDeleteUndelimited.getObjectSummaries().size(),
+ postDeleteUndelimited.getObjectSummaries().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
+ "in a recursive listing",
+ preDeleteUndelimited.getCommonPrefixes().size(),
+ postDeleteUndelimited.getCommonPrefixes().size()
+ );
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org