You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2017/05/25 12:13:18 UTC
[6/6] hadoop git commit: Revert "HADOOP-13760. S3Guard: add delete
tracking."
Revert "HADOOP-13760. S3Guard: add delete tracking."
This reverts commit 2f3305db768e4159b9216de88adbd73b686d9a02.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c1775960
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c1775960
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c1775960
Branch: refs/heads/HADOOP-13345
Commit: c17759607e7ce770aa4635a4896ac1233b003f33
Parents: 447d7b8
Author: Sean Mackrory <ma...@apache.org>
Authored: Wed May 24 20:06:28 2017 -0600
Committer: Sean Mackrory <ma...@apache.org>
Committed: Wed May 24 20:06:28 2017 -0600
----------------------------------------------------------------------
.../fs/s3a/InconsistentAmazonS3Client.java | 122 +-------
.../java/org/apache/hadoop/fs/s3a/Listing.java | 91 ------
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 310 ++++++-------------
.../fs/s3a/s3guard/DescendantsIterator.java | 14 +-
.../fs/s3a/s3guard/DirListingMetadata.java | 32 --
.../fs/s3a/s3guard/DynamoDBMetadataStore.java | 86 ++---
.../fs/s3a/s3guard/LocalMetadataStore.java | 61 ++--
.../hadoop/fs/s3a/s3guard/MetadataStore.java | 19 +-
.../s3guard/MetadataStoreListFilesIterator.java | 168 ----------
.../fs/s3a/s3guard/NullMetadataStore.java | 5 -
.../hadoop/fs/s3a/s3guard/PathMetadata.java | 27 +-
.../PathMetadataDynamoDBTranslation.java | 7 +-
.../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 26 +-
.../hadoop/fs/s3a/s3guard/S3GuardTool.java | 5 +-
.../hadoop/fs/s3a/ITestS3GuardEmptyDirs.java | 62 ++--
.../fs/s3a/ITestS3GuardListConsistency.java | 213 +------------
.../org/apache/hadoop/fs/s3a/TestListing.java | 94 ------
.../fs/s3a/s3guard/MetadataStoreTestBase.java | 150 +++------
.../s3a/s3guard/TestDynamoDBMetadataStore.java | 4 +-
.../fs/s3a/s3guard/TestLocalMetadataStore.java | 38 +--
.../AbstractITestS3AMetadataStoreScale.java | 2 +-
21 files changed, 276 insertions(+), 1260 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
index aadcc37..ebca268 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
@@ -23,7 +23,6 @@ import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
-import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
@@ -34,7 +33,6 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -59,50 +57,14 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
private static final Logger LOG =
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
- /**
- * Composite of data we need to track about recently deleted objects:
- * when it was deleted (same was with recently put objects) and the object
- * summary (since we should keep returning it for sometime after its
- * deletion).
- */
- private static class Delete {
- private Long time;
- private S3ObjectSummary summary;
-
- Delete(Long time, S3ObjectSummary summary) {
- this.time = time;
- this.summary = summary;
- }
-
- public Long time() {
- return time;
- }
-
- public S3ObjectSummary summary() {
- return summary;
- }
- }
-
- /** Map of key to delay -> time it was deleted + object summary (object
- * summary is null for prefixes. */
- private Map<String, Delete> delayedDeletes = new HashMap<>();
-
/** Map of key to delay -> time it was created. */
- private Map<String, Long> delayedPutKeys = new HashMap<>();
+ private Map<String, Long> delayedKeys = new HashMap<>();
public InconsistentAmazonS3Client(AWSCredentialsProvider credentials,
ClientConfiguration clientConfiguration) {
super(credentials, clientConfiguration);
}
- @Override
- public void deleteObject(DeleteObjectRequest deleteObjectRequest)
- throws AmazonClientException, AmazonServiceException {
- LOG.debug("key {}", deleteObjectRequest.getKey());
- registerDeleteObject(deleteObjectRequest);
- super.deleteObject(deleteObjectRequest);
- }
-
/* We should only need to override this version of putObject() */
@Override
public PutObjectResult putObject(PutObjectRequest putObjectRequest)
@@ -118,46 +80,10 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
throws AmazonClientException, AmazonServiceException {
LOG.debug("prefix {}", listObjectsRequest.getPrefix());
ObjectListing listing = super.listObjects(listObjectsRequest);
- listing = filterListObjects(listObjectsRequest, listing);
- listing = restoreListObjects(listObjectsRequest, listing);
- return listing;
+ return filterListObjects(listObjectsRequest,
+ listing);
}
- private boolean addIfNotPresent(List<S3ObjectSummary> list,
- S3ObjectSummary item) {
- // Behavior of S3ObjectSummary
- String key = item.getKey();
- for (S3ObjectSummary member : list) {
- if (member.getKey().equals(key)) {
- return false;
- }
- }
- return list.add(item);
- }
-
- private ObjectListing restoreListObjects(ListObjectsRequest request,
- ObjectListing rawListing) {
- List<S3ObjectSummary> outputList = rawListing.getObjectSummaries();
- List<String> outputPrefixes = rawListing.getCommonPrefixes();
- for (String key : new HashSet<>(delayedDeletes.keySet())) {
- Delete delete = delayedDeletes.get(key);
- if (isKeyDelayed(delete.time(), key)) {
- if (key.startsWith(request.getPrefix())) {
- if (delete.summary == null) {
- if (!outputPrefixes.contains(key)) {
- outputPrefixes.add(key);
- }
- } else {
- addIfNotPresent(outputList, delete.summary());
- }
- }
- } else {
- delayedDeletes.remove(key);
- }
- }
-
- return new CustomObjectListing(rawListing, outputList, outputPrefixes);
- }
private ObjectListing filterListObjects(ListObjectsRequest request,
ObjectListing rawListing) {
@@ -165,8 +91,7 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
// Filter object listing
List<S3ObjectSummary> outputList = new ArrayList<>();
for (S3ObjectSummary s : rawListing.getObjectSummaries()) {
- String key = s.getKey();
- if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
+ if (!isVisibilityDelayed(s.getKey())) {
outputList.add(s);
}
}
@@ -174,7 +99,7 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
// Filter prefixes (directories)
List<String> outputPrefixes = new ArrayList<>();
for (String key : rawListing.getCommonPrefixes()) {
- if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
+ if (!isVisibilityDelayed(key)) {
outputPrefixes.add(key);
}
}
@@ -182,43 +107,28 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
return new CustomObjectListing(rawListing, outputList, outputPrefixes);
}
- private boolean isKeyDelayed(Long enqueueTime, String key) {
- if (enqueueTime == null) {
+ private boolean isVisibilityDelayed(String key) {
+ Long createTime = delayedKeys.get(key);
+ if (createTime == null) {
LOG.debug("no delay for key {}", key);
return false;
}
long currentTime = System.currentTimeMillis();
- long deadline = enqueueTime + DELAY_KEY_MILLIS;
+ long deadline = createTime + DELAY_KEY_MILLIS;
if (currentTime >= deadline) {
- delayedDeletes.remove(key);
- LOG.debug("no longer delaying {}", key);
+ delayedKeys.remove(key);
+ LOG.debug("{} no longer delayed", key);
return false;
} else {
- LOG.info("delaying {}", key);
+ LOG.info("{} delaying visibility", key);
return true;
}
}
- private void registerDeleteObject(DeleteObjectRequest req) {
- String key = req.getKey();
- if (shouldDelay(key)) {
- // Record summary so we can add it back for some time post-deletion
- S3ObjectSummary summary = null;
- ObjectListing list = listObjects(req.getBucketName(), key);
- for (S3ObjectSummary result : list.getObjectSummaries()) {
- if (result.getKey().equals(key)) {
- summary = result;
- break;
- }
- }
- delayedDeletes.put(key, new Delete(System.currentTimeMillis(), summary));
- }
- }
-
private void registerPutObject(PutObjectRequest req) {
String key = req.getKey();
if (shouldDelay(key)) {
- enqueueDelayedPut(key);
+ enqueueDelayKey(key);
}
}
@@ -238,9 +148,9 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
* listObject replies for a while, to simulate eventual list consistency.
* @param key key to delay visibility of
*/
- private void enqueueDelayedPut(String key) {
- LOG.debug("delaying put of {}", key);
- delayedPutKeys.put(key, System.currentTimeMillis());
+ private void enqueueDelayKey(String key) {
+ LOG.debug("key {}", key);
+ delayedKeys.put(key, System.currentTimeMillis());
}
/** Since ObjectListing is immutable, we just override it with wrapper. */
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
index 5da99b8..e91f2ec 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
@@ -22,7 +22,6 @@ import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3ObjectSummary;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
@@ -34,7 +33,6 @@ import org.slf4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -127,27 +125,12 @@ public class Listing {
* @param statusIterator an iterator over the remote status entries
* @return a new remote iterator
*/
- @VisibleForTesting
LocatedFileStatusIterator createLocatedFileStatusIterator(
RemoteIterator<FileStatus> statusIterator) {
return new LocatedFileStatusIterator(statusIterator);
}
/**
- * Create an located status iterator that wraps another to filter out a set
- * of recently deleted items.
- * @param iterator an iterator over the remote located status entries.
- * @param tombstones set of paths that are recently deleted and should be
- * filtered.
- * @return a new remote iterator.
- */
- @VisibleForTesting
- TombstoneReconcilingIterator createTombstoneReconcilingIterator(
- RemoteIterator<LocatedFileStatus> iterator, Set<Path> tombstones) {
- return new TombstoneReconcilingIterator(iterator, tombstones);
- }
-
- /**
* Interface to implement by the logic deciding whether to accept a summary
* entry or path as a valid file or directory.
*/
@@ -685,80 +668,6 @@ public class Listing {
}
/**
- * Wraps another iterator and filters out files that appear in the provided
- * set of tombstones. Will read ahead in the iterator when necessary to
- * ensure that emptiness is detected early enough if only deleted objects
- * remain in the source iterator.
- */
- static class TombstoneReconcilingIterator implements
- RemoteIterator<LocatedFileStatus> {
- private LocatedFileStatus next = null;
- private final RemoteIterator<LocatedFileStatus> iterator;
- private final Set<Path> tombstones;
-
- /**
- * @param iterator Source iterator to filter
- * @param tombstones set of tombstone markers to filter out of results
- */
- public TombstoneReconcilingIterator(RemoteIterator<LocatedFileStatus>
- iterator, Set<Path> tombstones) {
- this.iterator = iterator;
- if (tombstones != null) {
- this.tombstones = tombstones;
- } else {
- this.tombstones = Collections.EMPTY_SET;
- }
- }
-
- private boolean fetch() throws IOException {
- while (next == null && iterator.hasNext()) {
- LocatedFileStatus candidate = iterator.next();
- if (!tombstones.contains(candidate.getPath())) {
- next = candidate;
- return true;
- }
- }
- return false;
- }
-
- public boolean hasNext() throws IOException {
- if (next != null) {
- return true;
- }
- return fetch();
- }
-
- public LocatedFileStatus next() throws IOException {
- if (hasNext()) {
- LocatedFileStatus result = next;
- next = null;
- fetch();
- return result;
- }
- throw new NoSuchElementException();
- }
- }
-
- /**
- * Accept all entries except those which map to S3N pseudo directory markers.
- */
- static class AcceptAllButS3nDirs implements FileStatusAcceptor {
-
- public boolean accept(Path keyPath, S3ObjectSummary summary) {
- return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX);
- }
-
- public boolean accept(Path keyPath, String prefix) {
- return !keyPath.toString().endsWith(S3N_FOLDER_SUFFIX);
- }
-
- public boolean accept(FileStatus status) {
- return !status.getPath().toString().endsWith(S3N_FOLDER_SUFFIX);
- }
-
- }
-
- /**
* Accept all entries except the base path and those which map to S3N
* pseudo directory markers.
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 7de94cc..78b3970 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -26,13 +26,11 @@ import java.io.InterruptedIOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
@@ -95,8 +93,8 @@ import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.s3a.s3guard.DescendantsIterator;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
-import org.apache.hadoop.fs.s3a.s3guard.MetadataStoreListFilesIterator;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
@@ -880,44 +878,51 @@ public class S3AFileSystem extends FileSystem {
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
}
- Path parentPath = keyToPath(srcKey);
- RemoteIterator<LocatedFileStatus> iterator = listFilesAndEmptyDirectories(
- parentPath, true);
- while (iterator.hasNext()) {
- LocatedFileStatus status = iterator.next();
- long length = status.getLen();
- String key = pathToKey(status.getPath());
- if (status.isDirectory() && !key.endsWith("/")) {
- key += "/";
- }
- keysToDelete
- .add(new DeleteObjectsRequest.KeyVersion(key));
- String newDstKey =
- dstKey + key.substring(srcKey.length());
- copyFile(key, newDstKey, length);
-
- if (hasMetadataStore()) {
- Path childSrc = keyToQualifiedPath(key);
- Path childDst = keyToQualifiedPath(newDstKey);
- if (objectRepresentsDirectory(key, length)) {
- S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, childSrc,
- childDst, username);
- } else {
- S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc,
- childDst, length, getDefaultBlockSize(childDst), username);
+ ListObjectsRequest request = new ListObjectsRequest();
+ request.setBucketName(bucket);
+ request.setPrefix(srcKey);
+ request.setMaxKeys(maxKeys);
+
+ ObjectListing objects = listObjects(request);
+
+ while (true) {
+ for (S3ObjectSummary summary : objects.getObjectSummaries()) {
+ long length = summary.getSize();
+ keysToDelete
+ .add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
+ String newDstKey =
+ dstKey + summary.getKey().substring(srcKey.length());
+ copyFile(summary.getKey(), newDstKey, length);
+
+ if (hasMetadataStore()) {
+ Path childSrc = keyToQualifiedPath(summary.getKey());
+ Path childDst = keyToQualifiedPath(newDstKey);
+ if (objectRepresentsDirectory(summary.getKey(), length)) {
+ S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, childSrc,
+ childDst, username);
+ } else {
+ S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc,
+ childDst, length, getDefaultBlockSize(childDst), username);
+ }
+ // Ancestor directories may not be listed, so we explicitly add them
+ S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas,
+ keyToQualifiedPath(srcKey), childSrc, childDst, username);
+ }
+
+ if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
+ removeKeys(keysToDelete, true, false);
}
- // Ancestor directories may not be listed, so we explicitly add them
- S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas,
- keyToQualifiedPath(srcKey), childSrc, childDst, username);
}
- if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
- removeKeys(keysToDelete, true, false);
+ if (objects.isTruncated()) {
+ objects = continueListObjects(objects);
+ } else {
+ if (!keysToDelete.isEmpty()) {
+ removeKeys(keysToDelete, false, false);
+ }
+ break;
}
}
- if (!keysToDelete.isEmpty()) {
- removeKeys(keysToDelete, false, false);
- }
// We moved all the children, now move the top-level dir
// Empty directory should have been added as the object summary
@@ -1627,42 +1632,6 @@ public class S3AFileSystem extends FileSystem {
throw translateException("innerMkdirs", path, e);
}
}
-
- private enum DirectoryStatus {
- DOES_NOT_EXIST, EXISTS_AND_IS_DIRECTORY_ON_S3_ONLY,
- EXISTS_AND_IS_DIRECTORY_ON_METADATASTORE, EXISTS_AND_IS_FILE
- }
-
- private DirectoryStatus checkPathForDirectory(Path path) throws
- IOException {
- try {
- if (path.isRoot()) {
- return DirectoryStatus.EXISTS_AND_IS_DIRECTORY_ON_METADATASTORE;
- }
- String key = pathToKey(path);
-
- // Check MetadataStore, if any.
- FileStatus status = null;
- PathMetadata pm = metadataStore.get(path, false);
- if (pm != null) {
- if (pm.isDeleted()) {
- return DirectoryStatus.DOES_NOT_EXIST;
- }
- status = pm.getFileStatus();
- if (status != null && status.isDirectory()) {
- return DirectoryStatus.EXISTS_AND_IS_DIRECTORY_ON_METADATASTORE;
- }
- }
- status = s3GetFileStatus(path, key, null);
- if (status.isDirectory()) {
- return DirectoryStatus.EXISTS_AND_IS_DIRECTORY_ON_S3_ONLY;
- }
- } catch (FileNotFoundException e) {
- return DirectoryStatus.DOES_NOT_EXIST;
- }
- return DirectoryStatus.EXISTS_AND_IS_FILE;
- }
-
/**
*
* Make the given path and all non-existent parents into
@@ -1670,70 +1639,64 @@ public class S3AFileSystem extends FileSystem {
* See {@link #mkdirs(Path, FsPermission)}
* @param p path to create
* @param permission to apply to f
- * @return true if a directory was created or already existed
+ * @return true if a directory was created
* @throws FileAlreadyExistsException there is a file at the path specified
* @throws IOException other IO problems
* @throws AmazonClientException on failures inside the AWS SDK
*/
private boolean innerMkdirs(Path p, FsPermission permission)
throws IOException, FileAlreadyExistsException, AmazonClientException {
- boolean createOnS3 = false;
Path f = qualify(p);
LOG.debug("Making directory: {}", f);
incrementStatistic(INVOCATION_MKDIRS);
+ FileStatus fileStatus;
List<Path> metadataStoreDirs = null;
if (hasMetadataStore()) {
metadataStoreDirs = new ArrayList<>();
}
- DirectoryStatus status = checkPathForDirectory(f);
- if (status == DirectoryStatus.DOES_NOT_EXIST) {
- createOnS3 = true;
- if (metadataStoreDirs != null) {
- metadataStoreDirs.add(f);
+ try {
+ fileStatus = getFileStatus(f);
+
+ if (fileStatus.isDirectory()) {
+ return true;
+ } else {
+ throw new FileAlreadyExistsException("Path is a file: " + f);
}
- } else if (status == DirectoryStatus.EXISTS_AND_IS_DIRECTORY_ON_S3_ONLY) {
+ } catch (FileNotFoundException e) {
+ // Walk path to root, ensuring closest ancestor is a directory, not file
+ Path fPart = f.getParent();
if (metadataStoreDirs != null) {
metadataStoreDirs.add(f);
}
- } else if (status == DirectoryStatus
- .EXISTS_AND_IS_DIRECTORY_ON_METADATASTORE) {
- return true;
- } else if (status == DirectoryStatus.EXISTS_AND_IS_FILE) {
- throw new FileAlreadyExistsException("Path is a file: " + f);
- }
-
- // Walk path to root, ensuring closest ancestor is a directory, not file
- Path fPart = f.getParent();
- do {
- status = checkPathForDirectory(fPart);
- // The fake directory on S3 may not be visible immediately, but
- // only the leaf node has a fake directory on S3 so we can treat both
- // cases the same and just create a metadata store entry.
- if (status == DirectoryStatus.DOES_NOT_EXIST || status ==
- DirectoryStatus.EXISTS_AND_IS_DIRECTORY_ON_S3_ONLY) {
- if (metadataStoreDirs != null) {
- metadataStoreDirs.add(fPart);
+ do {
+ try {
+ fileStatus = getFileStatus(fPart);
+ if (fileStatus.isDirectory()) {
+ break;
+ }
+ if (fileStatus.isFile()) {
+ throw new FileAlreadyExistsException(String.format(
+ "Can't make directory for path '%s' since it is a file.",
+ fPart));
+ }
+ } catch (FileNotFoundException fnfe) {
+ instrumentation.errorIgnored();
+ // We create all missing directories in MetadataStore; it does not
+ // infer directories exist by prefix like S3.
+ if (metadataStoreDirs != null) {
+ metadataStoreDirs.add(fPart);
+ }
}
- } else if (status == DirectoryStatus
- .EXISTS_AND_IS_DIRECTORY_ON_METADATASTORE) {
- // do nothing - just break out of the loop, make whatever child
- // directories are needed on the metadata store, and return the
- // result.
- break;
- } else if (status == DirectoryStatus.EXISTS_AND_IS_FILE) {
- throw new FileAlreadyExistsException("Path is a file: " + f);
- }
- fPart = fPart.getParent();
- } while (fPart != null);
+ fPart = fPart.getParent();
+ } while (fPart != null);
- if (createOnS3) {
String key = pathToKey(f);
createFakeDirectory(key);
+ S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username);
+ deleteUnnecessaryFakeDirectories(f.getParent());
+ return true;
}
- S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username);
- deleteUnnecessaryFakeDirectories(f.getParent());
- return true;
}
/**
@@ -1766,12 +1729,8 @@ public class S3AFileSystem extends FileSystem {
// Check MetadataStore, if any.
PathMetadata pm = metadataStore.get(path, needEmptyDirectoryFlag);
- Set<Path> tombstones = Collections.EMPTY_SET;
if (pm != null) {
- if (pm.isDeleted()) {
- throw new FileNotFoundException("Path " + f + " is recorded as " +
- "deleted by S3Guard");
- }
+ // HADOOP-13760: handle deleted files, i.e. PathMetadata#isDeleted() here
FileStatus msStatus = pm.getFileStatus();
if (needEmptyDirectoryFlag && msStatus.isDirectory()) {
@@ -1779,29 +1738,15 @@ public class S3AFileSystem extends FileSystem {
// We have a definitive true / false from MetadataStore, we are done.
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
} else {
- DirListingMetadata children = metadataStore.listChildren(path);
- if (children != null) {
- tombstones = children.listTombstones();
- }
LOG.debug("MetadataStore doesn't know if dir is empty, using S3.");
}
} else {
// Either this is not a directory, or we don't care if it is empty
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
}
-
- // If the metadata store has no children for it and it's not listed in
- // S3 yet, we'll assume the empty directory is true;
- S3AFileStatus s3FileStatus;
- try {
- s3FileStatus = s3GetFileStatus(path, key, tombstones);
- } catch (FileNotFoundException e) {
- return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
- }
- return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation);
}
- return S3Guard.putAndReturn(metadataStore,
- s3GetFileStatus(path, key, tombstones), instrumentation);
+ return S3Guard.putAndReturn(metadataStore, s3GetFileStatus(path, key),
+ instrumentation);
}
/**
@@ -1812,8 +1757,8 @@ public class S3AFileSystem extends FileSystem {
* @return Status
* @throws IOException
*/
- private S3AFileStatus s3GetFileStatus(final Path path, String key,
- Set<Path> tombstones) throws IOException {
+ private S3AFileStatus s3GetFileStatus(final Path path, String key)
+ throws IOException {
if (!key.isEmpty()) {
try {
ObjectMetadata meta = getObjectMetadata(key);
@@ -1876,18 +1821,17 @@ public class S3AFileSystem extends FileSystem {
ObjectListing objects = listObjects(request);
- Collection<String> prefixes = objects.getCommonPrefixes();
- Collection<S3ObjectSummary> summaries = objects.getObjectSummaries();
- if (!isEmptyOfKeys(prefixes, tombstones) ||
- !isEmptyOfObjects(summaries, tombstones)) {
+ if (!objects.getCommonPrefixes().isEmpty()
+ || !objects.getObjectSummaries().isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found path as directory (with /): {}/{}",
- prefixes.size(), summaries.size());
+ objects.getCommonPrefixes().size() ,
+ objects.getObjectSummaries().size());
- for (S3ObjectSummary summary : summaries) {
+ for (S3ObjectSummary summary : objects.getObjectSummaries()) {
LOG.debug("Summary: {} {}", summary.getKey(), summary.getSize());
}
- for (String prefix : prefixes) {
+ for (String prefix : objects.getCommonPrefixes()) {
LOG.debug("Prefix: {}", prefix);
}
}
@@ -1909,48 +1853,6 @@ public class S3AFileSystem extends FileSystem {
throw new FileNotFoundException("No such file or directory: " + path);
}
- /** Helper function to determine if a collection of paths is empty
- * after accounting for tombstone markers (if provided).
- * @param keys Collection of path (prefixes / directories or keys).
- * @param tombstones Set of tombstone markers, or null if not applicable.
- * @return false if summaries contains objects not accounted for by
- * tombstones.
- */
- private boolean isEmptyOfKeys(Collection<String> keys, Set<Path>
- tombstones) {
- if (tombstones == null) {
- return keys.isEmpty();
- }
- for (String key : keys) {
- Path qualified = keyToQualifiedPath(key);
- if (!tombstones.contains(qualified)) {
- return false;
- }
- }
- return true;
- }
-
- /** Helper function to determine if a collection of object summaries is empty
- * after accounting for tombstone markers (if provided).
- * @param summaries Collection of objects as returned by listObjects.
- * @param tombstones Set of tombstone markers, or null if not applicable.
- * @return false if summaries contains objects not accounted for by
- * tombstones.
- */
- private boolean isEmptyOfObjects(Collection<S3ObjectSummary> summaries,
- Set<Path> tombstones) {
- if (tombstones == null) {
- return summaries.isEmpty();
- }
- Collection<String> stringCollection = new ArrayList<>(summaries.size());
- for(S3ObjectSummary summary : summaries) {
- stringCollection.add(summary.getKey());
- }
- return isEmptyOfKeys(stringCollection, tombstones);
- }
-
-
-
/**
* Raw version of {@link FileSystem#exists(Path)} which uses S3 only:
* S3Guard MetadataStore, if any, will be skipped.
@@ -1960,7 +1862,7 @@ public class S3AFileSystem extends FileSystem {
Path path = qualify(f);
String key = pathToKey(path);
try {
- return s3GetFileStatus(path, key, null) != null;
+ return s3GetFileStatus(path, key) != null;
} catch (FileNotFoundException e) {
return false;
}
@@ -2518,10 +2420,10 @@ public class S3AFileSystem extends FileSystem {
new Listing.AcceptFilesOnly(qualify(f)));
}
- public RemoteIterator<LocatedFileStatus> listFilesAndEmptyDirectories(Path f,
+ public RemoteIterator<LocatedFileStatus> listFilesAndDirectories(Path f,
boolean recursive) throws IOException {
return innerListFiles(f, recursive,
- new Listing.AcceptAllButS3nDirs());
+ new Listing.AcceptAllButSelfAndS3nDirs(qualify(f)));
}
private RemoteIterator<LocatedFileStatus> innerListFiles(Path f, boolean
@@ -2544,37 +2446,23 @@ public class S3AFileSystem extends FileSystem {
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
final RemoteIterator<FileStatus> cachedFilesIterator;
- final Set<Path> tombstones;
if (recursive) {
final PathMetadata pm = metadataStore.get(path, true);
- // shouldn't need to check pm.isDeleted() because that will have
- // been caught by getFileStatus above.
- MetadataStoreListFilesIterator metadataStoreListFilesIterator =
- new MetadataStoreListFilesIterator(metadataStore, pm,
- allowAuthoritative);
- tombstones = metadataStoreListFilesIterator.listTombstones();
- cachedFilesIterator = metadataStoreListFilesIterator;
+ cachedFilesIterator = new DescendantsIterator(metadataStore, pm);
} else {
- DirListingMetadata meta = metadataStore.listChildren(path);
- if (meta != null) {
- tombstones = meta.listTombstones();
- } else {
- tombstones = null;
- }
+ final DirListingMetadata meta = metadataStore.listChildren(path);
cachedFilesIterator = listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
}
- return listing.createTombstoneReconcilingIterator(
- listing.createLocatedFileStatusIterator(
- listing.createFileStatusListingIterator(path,
- createListObjectsRequest(key, delimiter),
- ACCEPT_ALL,
- acceptor,
- cachedFilesIterator)),
- tombstones);
+ return listing.createLocatedFileStatusIterator(
+ listing.createFileStatusListingIterator(path,
+ createListObjectsRequest(key, delimiter),
+ ACCEPT_ALL,
+ acceptor,
+ cachedFilesIterator));
}
} catch (AmazonClientException e) {
// TODO s3guard:
@@ -2626,7 +2514,7 @@ public class S3AFileSystem extends FileSystem {
final String key = maybeAddTrailingSlash(pathToKey(path));
final Listing.FileStatusAcceptor acceptor =
new Listing.AcceptAllButSelfAndS3nDirs(path);
- DirListingMetadata meta = metadataStore.listChildren(path);
+ final DirListingMetadata meta = metadataStore.listChildren(path);
final RemoteIterator<FileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java
index 262a6fa..d008972 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java
@@ -19,7 +19,6 @@
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
-import java.util.Collection;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.Queue;
@@ -104,9 +103,8 @@ public class DescendantsIterator implements RemoteIterator<FileStatus> {
if (meta != null) {
final Path path = meta.getFileStatus().getPath();
if (path.isRoot()) {
- DirListingMetadata rootListing = ms.listChildren(path);
+ final DirListingMetadata rootListing = ms.listChildren(path);
if (rootListing != null) {
- rootListing = rootListing.withoutTombstones();
queue.addAll(rootListing.getListing());
}
} else {
@@ -125,17 +123,11 @@ public class DescendantsIterator implements RemoteIterator<FileStatus> {
if (!hasNext()) {
throw new NoSuchElementException("No more descendants.");
}
- PathMetadata next;
+ final PathMetadata next;
next = queue.poll();
if (next.getFileStatus().isDirectory()) {
final Path path = next.getFileStatus().getPath();
- DirListingMetadata meta = metadataStore.listChildren(path);
- if (meta != null) {
- Collection<PathMetadata> more = meta.withoutTombstones().getListing();
- if (!more.isEmpty()) {
- queue.addAll(more);
- }
- }
+ queue.addAll(metadataStore.listChildren(path).getListing());
}
return next.getFileStatus();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
index 320fa8d..f13b447 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
@@ -25,12 +25,9 @@ import org.apache.hadoop.fs.Path;
import java.net.URI;
import java.net.URISyntaxException;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashSet;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.google.common.base.Preconditions;
@@ -107,26 +104,6 @@ public class DirListingMetadata {
return Collections.unmodifiableCollection(listMap.values());
}
- public Set<Path> listTombstones() {
- Set<Path> tombstones = new HashSet<>();
- for (PathMetadata meta : listMap.values()) {
- if (meta.isDeleted()) {
- tombstones.add(meta.getFileStatus().getPath());
- }
- }
- return tombstones;
- }
-
- public DirListingMetadata withoutTombstones() {
- Collection<PathMetadata> filteredList = new ArrayList<>();
- for (PathMetadata meta : listMap.values()) {
- if (!meta.isDeleted()) {
- filteredList.add(meta);
- }
- }
- return new DirListingMetadata(path, filteredList, isAuthoritative);
- }
-
/**
* @return number of entries tracked. This is not the same as the number
* of entries in the actual directory unless {@link #isAuthoritative()} is
@@ -189,15 +166,6 @@ public class DirListingMetadata {
}
/**
- * Replace an entry with a tombstone.
- * @param childPath path of entry to replace.
- */
- public void markDeleted(Path childPath) {
- checkChildPath(childPath);
- listMap.put(childPath, PathMetadata.tombstone(childPath));
- }
-
- /**
* Remove entry from this directory.
*
* @param childPath path of entry to remove.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 784b815..302541c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -185,9 +185,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
* DynamoDB. Value is {@value} msec. */
public static final long MIN_RETRY_SLEEP_MSEC = 100;
- private static ValueMap deleteTrackingValueMap =
- new ValueMap().withBoolean(":false", false);
-
private DynamoDB dynamoDB;
private String region;
private Table table;
@@ -289,16 +286,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
@Override
public void delete(Path path) throws IOException {
- innerDelete(path, true);
- }
-
- @Override
- public void forgetMetadata(Path path) throws IOException {
- innerDelete(path, false);
- }
-
- private void innerDelete(Path path, boolean tombstone)
- throws IOException {
path = checkPath(path);
LOG.debug("Deleting from table {} in region {}: {}",
tableName, region, path);
@@ -310,13 +297,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
}
try {
- if (tombstone) {
- Item item = PathMetadataDynamoDBTranslation.pathMetadataToItem(
- PathMetadata.tombstone(path));
- table.putItem(item);
- } else {
- table.deleteItem(pathToKey(path));
- }
+ table.deleteItem(pathToKey(path));
} catch (AmazonClientException e) {
throw translateException("delete", path, e);
}
@@ -329,24 +310,17 @@ public class DynamoDBMetadataStore implements MetadataStore {
tableName, region, path);
final PathMetadata meta = get(path);
- if (meta == null || meta.isDeleted()) {
+ if (meta == null) {
LOG.debug("Subtree path {} does not exist; this will be a no-op", path);
return;
}
for (DescendantsIterator desc = new DescendantsIterator(this, meta);
desc.hasNext();) {
- innerDelete(desc.next().getPath(), true);
+ delete(desc.next().getPath());
}
}
- private Item getConsistentItem(PrimaryKey key) {
- final GetItemSpec spec = new GetItemSpec()
- .withPrimaryKey(key)
- .withConsistentRead(true); // strictly consistent read
- return table.getItem(spec);
- }
-
@Override
public PathMetadata get(Path path) throws IOException {
return get(path, false);
@@ -364,7 +338,10 @@ public class DynamoDBMetadataStore implements MetadataStore {
// Root does not persist in the table
meta = new PathMetadata(makeDirStatus(username, path));
} else {
- final Item item = getConsistentItem(pathToKey(path));
+ final GetItemSpec spec = new GetItemSpec()
+ .withPrimaryKey(pathToKey(path))
+ .withConsistentRead(true); // strictly consistent read
+ final Item item = table.getItem(spec);
meta = itemToPathMetadata(item, username);
LOG.debug("Get from table {} in region {} returning for {}: {}",
tableName, region, path, meta);
@@ -377,8 +354,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
final QuerySpec spec = new QuerySpec()
.withHashKey(pathToParentKeyAttribute(path))
.withConsistentRead(true)
- .withFilterExpression(IS_DELETED + " = :false")
- .withValueMap(deleteTrackingValueMap);
+ .withMaxResultSize(1); // limit 1
final ItemCollection<QueryOutcome> items = table.query(spec);
boolean hasChildren = items.iterator().hasNext();
// When this class has support for authoritative
@@ -422,8 +398,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
final List<PathMetadata> metas = new ArrayList<>();
for (Item item : items) {
- PathMetadata meta = itemToPathMetadata(item, username);
- metas.add(meta);
+ metas.add(itemToPathMetadata(item, username));
}
LOG.trace("Listing table {} in region {} for {} returning {}",
tableName, region, path, metas);
@@ -455,9 +430,9 @@ public class DynamoDBMetadataStore implements MetadataStore {
// Following code is to maintain this invariant by putting all ancestor
// directories of the paths to create.
// ancestor paths that are not explicitly added to paths to create
- Collection<PathMetadata> newItems = new ArrayList<>();
+ Collection<PathMetadata> inferredPathsToCreate = null;
if (pathsToCreate != null) {
- newItems.addAll(pathsToCreate);
+ inferredPathsToCreate = new ArrayList<>(pathsToCreate);
// help set for fast look up; we should avoid putting duplicate paths
final Collection<Path> fullPathsToCreate = new HashSet<>();
for (PathMetadata meta : pathsToCreate) {
@@ -473,20 +448,16 @@ public class DynamoDBMetadataStore implements MetadataStore {
LOG.debug("move: auto-create ancestor path {} for child path {}",
parent, meta.getFileStatus().getPath());
final FileStatus status = makeDirStatus(parent, username);
- newItems.add(new PathMetadata(status, Tristate.FALSE, false));
+ inferredPathsToCreate.add(new PathMetadata(status, Tristate.FALSE));
fullPathsToCreate.add(parent);
parent = parent.getParent();
}
}
}
- if (pathsToDelete != null) {
- for (Path meta : pathsToDelete) {
- newItems.add(PathMetadata.tombstone(meta));
- }
- }
try {
- processBatchWriteRequest(null, pathMetadataToItem(newItems));
+ processBatchWriteRequest(pathToKey(pathsToDelete),
+ pathMetadataToItem(inferredPathsToCreate));
} catch (AmazonClientException e) {
throw translateException("move", (String) null, e);
}
@@ -594,10 +565,13 @@ public class DynamoDBMetadataStore implements MetadataStore {
// first existent ancestor
Path path = meta.getFileStatus().getPath().getParent();
while (path != null && !path.isRoot()) {
- final Item item = getConsistentItem(pathToKey(path));
- if (!itemExists(item)) {
+ final GetItemSpec spec = new GetItemSpec()
+ .withPrimaryKey(pathToKey(path))
+ .withConsistentRead(true); // strictly consistent read
+ final Item item = table.getItem(spec);
+ if (item == null) {
final FileStatus status = makeDirStatus(path, username);
- metasToPut.add(new PathMetadata(status, Tristate.FALSE, false));
+ metasToPut.add(new PathMetadata(status, Tristate.FALSE));
path = path.getParent();
} else {
break;
@@ -606,17 +580,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
return metasToPut;
}
- private boolean itemExists(Item item) {
- if (item == null) {
- return false;
- }
- if (item.hasAttribute(IS_DELETED) &&
- item.getBoolean(IS_DELETED)) {
- return false;
- }
- return true;
- }
-
/** Create a directory FileStatus using current system time as mod time. */
static FileStatus makeDirStatus(Path f, String owner) {
return new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
@@ -628,8 +591,9 @@ public class DynamoDBMetadataStore implements MetadataStore {
LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta);
// directory path
- PathMetadata p = new PathMetadata(makeDirStatus(meta.getPath(), username),
- meta.isEmpty(), false);
+ PathMetadata p = new PathMetadata(
+ makeDirStatus(meta.getPath(), username),
+ meta.isEmpty());
// First add any missing ancestors...
final Collection<PathMetadata> metasToPut = fullPathsToPut(p);
@@ -706,13 +670,13 @@ public class DynamoDBMetadataStore implements MetadataStore {
itemCount++;
if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) {
Thread.sleep(delay);
- processBatchWriteRequest(pathToKey(deletionBatch), null);
+ processBatchWriteRequest(pathToKey(deletionBatch), new Item[0]);
deletionBatch.clear();
}
}
if (deletionBatch.size() > 0) {
Thread.sleep(delay);
- processBatchWriteRequest(pathToKey(deletionBatch), null);
+ processBatchWriteRequest(pathToKey(deletionBatch), new Item[0]);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index d7e256d..52e5b2a 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -101,33 +101,29 @@ public class LocalMetadataStore implements MetadataStore {
}
@Override
- public void delete(Path p) throws IOException {
- doDelete(p, false, true);
- }
-
- @Override
- public void forgetMetadata(Path p) throws IOException {
- doDelete(p, false, false);
+ public void delete(Path path) throws IOException {
+ doDelete(path, false);
}
@Override
public void deleteSubtree(Path path) throws IOException {
- doDelete(path, true, true);
+ doDelete(path, true);
}
- private synchronized void doDelete(Path p, boolean recursive, boolean
- tombstone) {
+ private synchronized void doDelete(Path p, boolean recursive) {
Path path = standardize(p);
+ // We could implement positive hit for 'deleted' files. For now we
+ // do not track them.
// Delete entry from file cache, then from cached parent directory, if any
- deleteHashEntries(path, tombstone);
+ removeHashEntries(path);
if (recursive) {
// Remove all entries that have this dir as path prefix.
- deleteHashByAncestor(path, dirHash, tombstone);
- deleteHashByAncestor(path, fileHash, tombstone);
+ clearHashByAncestor(path, dirHash);
+ clearHashByAncestor(path, fileHash);
}
}
@@ -161,7 +157,7 @@ public class LocalMetadataStore implements MetadataStore {
*/
private Tristate isEmptyDirectory(Path p) {
DirListingMetadata dirMeta = dirHash.get(p);
- return dirMeta.withoutTombstones().isEmpty();
+ return dirMeta.isEmpty();
}
@Override
@@ -190,9 +186,9 @@ public class LocalMetadataStore implements MetadataStore {
synchronized (this) {
// 1. Delete pathsToDelete
- for (Path meta : pathsToDelete) {
- LOG.debug("move: deleting metadata {}", meta);
- delete(meta);
+ for (Path p : pathsToDelete) {
+ LOG.debug("move: deleting metadata {}", p);
+ delete(p);
}
// 2. Create new destination path metadata
@@ -327,25 +323,13 @@ public class LocalMetadataStore implements MetadataStore {
}
@VisibleForTesting
- static <T> void deleteHashByAncestor(Path ancestor, Map<Path, T> hash,
- boolean tombstone) {
+ static <T> void clearHashByAncestor(Path ancestor, Map<Path, T> hash) {
for (Iterator<Map.Entry<Path, T>> it = hash.entrySet().iterator();
it.hasNext();) {
Map.Entry<Path, T> entry = it.next();
Path f = entry.getKey();
- T meta = entry.getValue();
if (isAncestorOf(ancestor, f)) {
- if (tombstone) {
- if (meta instanceof PathMetadata) {
- entry.setValue((T) PathMetadata.tombstone(f));
- } else if (meta instanceof DirListingMetadata) {
- it.remove();
- } else {
- throw new IllegalStateException("Unknown type in hash");
- }
- } else {
- it.remove();
- }
+ it.remove();
}
}
}
@@ -367,21 +351,16 @@ public class LocalMetadataStore implements MetadataStore {
* Update fileHash and dirHash to reflect deletion of file 'f'. Call with
* lock held.
*/
- private void deleteHashEntries(Path path, boolean tombstone) {
+ private void removeHashEntries(Path path) {
// Remove target file/dir
LOG.debug("delete file entry for {}", path);
- if (tombstone) {
- fileHash.put(path, PathMetadata.tombstone(path));
- } else {
- fileHash.remove(path);
- }
+ fileHash.remove(path);
// Update this and parent dir listing, if any
/* If this path is a dir, remove its listing */
LOG.debug("removing listing of {}", path);
-
dirHash.remove(path);
/* Remove this path from parent's dir listing */
@@ -390,11 +369,7 @@ public class LocalMetadataStore implements MetadataStore {
DirListingMetadata dir = dirHash.get(parent);
if (dir != null) {
LOG.debug("removing parent's entry for {} ", path);
- if (tombstone) {
- dir.markDeleted(path);
- } else {
- dir.remove(path);
- }
+ dir.remove(path);
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 9502a8a..5511532 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -22,7 +22,6 @@ import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -56,8 +55,7 @@ public interface MetadataStore extends Closeable {
void initialize(Configuration conf) throws IOException;
/**
- * Deletes exactly one path, leaving a tombstone to prevent lingering,
- * inconsistent copies of it from being listed.
+ * Deletes exactly one path.
*
* @param path the path to delete
* @throws IOException if there is an error
@@ -65,20 +63,7 @@ public interface MetadataStore extends Closeable {
void delete(Path path) throws IOException;
/**
- * Removes the record of exactly one path. Does not leave a tombstone (see
- * {@link MetadataStore#delete(Path)}. It is currently intended for testing
- * only, and a need to use it as part of normal FileSystem usage is not
- * anticipated.
- *
- * @param path the path to delete
- * @throws IOException if there is an error
- */
- @VisibleForTesting
- void forgetMetadata(Path path) throws IOException;
-
- /**
- * Deletes the entire sub-tree rooted at the given path, leaving tombstones
- * to prevent lingering, inconsistent copies of it from being listed.
+ * Deletes the entire sub-tree rooted at the given path.
*
* In addition to affecting future calls to {@link #get(Path)},
* implementations must also update any stored {@code DirListingMetadata}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
deleted file mode 100644
index 272b1f4..0000000
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.s3a.s3guard;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.Queue;
-import java.util.Set;
-
-import com.google.common.base.Preconditions;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
- * is similar to {@code DescendantsIterator} but does not return directories
- * that have (or may have) children, and will also provide access to the set of
- * tombstones to allow recently deleted S3 objects to be filtered out from a
- * corresponding request. In other words, it returns tombstones and the same
- * set of objects that should exist in S3: empty directories, and files, and not
- * other directories whose existence is inferred therefrom.
- *
- * For example, assume the consistent store contains metadata representing this
- * file system structure:
- *
- * <pre>
- * /dir1
- * |-- dir2
- * | |-- file1
- * | `-- file2
- * `-- dir3
- * |-- dir4
- * | `-- file3
- * |-- dir5
- * | `-- file4
- * `-- dir6
- * </pre>
- *
- * Consider this code sample:
- * <pre>
- * final PathMetadata dir1 = get(new Path("/dir1"));
- * for (MetadataStoreListFilesIterator files =
- * new MetadataStoreListFilesIterator(dir1); files.hasNext(); ) {
- * final FileStatus status = files.next().getFileStatus();
- * System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
- * status.getPath());
- * }
- * </pre>
- *
- * The output is:
- * <pre>
- * F /dir1/dir2/file1
- * F /dir1/dir2/file2
- * F /dir1/dir3/dir4/file3
- * F /dir1/dir3/dir5/file4
- * D /dir1/dir3/dir6
- * </pre>
- */
-@InterfaceAudience.Private
-@InterfaceStability.Evolving
-public class MetadataStoreListFilesIterator implements
- RemoteIterator<FileStatus> {
- public static final Logger LOG = LoggerFactory.getLogger(
- MetadataStoreListFilesIterator.class);
-
- private final boolean allowAuthoritative;
- private final MetadataStore metadataStore;
- private final Set<Path> tombstones = new HashSet<>();
- private Iterator<FileStatus> leafNodesIterator = null;
-
- public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
- boolean allowAuthoritative) throws IOException {
- Preconditions.checkNotNull(ms);
- this.metadataStore = ms;
- this.allowAuthoritative = allowAuthoritative;
- prefetch(meta);
- }
-
- private void prefetch(PathMetadata meta) throws IOException {
- final Queue<PathMetadata> queue = new LinkedList<>();
- final Collection<FileStatus> leafNodes = new ArrayList<>();
-
- if (meta != null) {
- final Path path = meta.getFileStatus().getPath();
- if (path.isRoot()) {
- DirListingMetadata rootListing = metadataStore.listChildren(path);
- if (rootListing != null) {
- tombstones.addAll(rootListing.listTombstones());
- queue.addAll(rootListing.withoutTombstones().getListing());
- }
- } else {
- queue.add(meta);
- }
- }
-
- while(!queue.isEmpty()) {
- PathMetadata nextMetadata = queue.poll();
- FileStatus nextStatus = nextMetadata.getFileStatus();
- if (nextStatus.isFile()) {
- // All files are leaf nodes by definition
- leafNodes.add(nextStatus);
- continue;
- }
- if (nextStatus.isDirectory()) {
- final Path path = nextStatus.getPath();
- DirListingMetadata children = metadataStore.listChildren(path);
- if (children != null) {
- tombstones.addAll(children.listTombstones());
- Collection<PathMetadata> liveChildren =
- children.withoutTombstones().getListing();
- if (!liveChildren.isEmpty()) {
- // If it's a directory, has children, not all deleted, then we
- // add the children to the queue and move on to the next node
- queue.addAll(liveChildren);
- continue;
- } else if (allowAuthoritative && children.isAuthoritative()) {
- leafNodes.add(nextStatus);
- }
- }
- }
- // Directories that *might* be empty are ignored for now, since we
- // cannot confirm that they are empty without incurring other costs.
- // Users of this class can still discover empty directories via S3's
- // fake directories, subject to the same consistency semantics as before.
- // The only other possibility is a symlink, which is unsupported on S3A.
- }
- leafNodesIterator = leafNodes.iterator();
- }
-
- @Override
- public boolean hasNext() {
- return leafNodesIterator.hasNext();
- }
-
- @Override
- public FileStatus next() {
- return leafNodesIterator.next();
- }
-
- public Set<Path> listTombstones() {
- return tombstones;
- }
-}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index 65019eb..3869d13 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -52,11 +52,6 @@ public class NullMetadataStore implements MetadataStore {
}
@Override
- public void forgetMetadata(Path path) throws IOException {
- return;
- }
-
- @Override
public void deleteSubtree(Path path) throws IOException {
return;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
index d799e16..b5d4f04 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
@@ -22,7 +22,6 @@ import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@@ -35,13 +34,6 @@ public class PathMetadata {
private final FileStatus fileStatus;
private Tristate isEmptyDirectory;
- private boolean isDeleted;
-
- public static PathMetadata tombstone(Path path) {
- long now = System.currentTimeMillis();
- FileStatus status = new FileStatus(0, false, 0, 0, now, path);
- return new PathMetadata(status, Tristate.UNKNOWN, true);
- }
/**
* Creates a new {@code PathMetadata} containing given {@code FileStatus}.
@@ -52,11 +44,6 @@ public class PathMetadata {
}
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) {
- this(fileStatus, isEmptyDir, false);
- }
-
- public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean
- isDeleted) {
Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null");
Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" +
" non-null");
@@ -64,7 +51,6 @@ public class PathMetadata {
" be absolute");
this.fileStatus = fileStatus;
this.isEmptyDirectory = isEmptyDir;
- this.isDeleted = isDeleted;
}
/**
@@ -87,14 +73,6 @@ public class PathMetadata {
this.isEmptyDirectory = isEmptyDirectory;
}
- public boolean isDeleted() {
- return isDeleted;
- }
-
- void setIsDeleted(boolean isDeleted) {
- this.isDeleted = isDeleted;
- }
-
@Override
public boolean equals(Object o) {
if (!(o instanceof PathMetadata)) {
@@ -113,7 +91,6 @@ public class PathMetadata {
return "PathMetadata{" +
"fileStatus=" + fileStatus +
"; isEmptyDirectory=" + isEmptyDirectory +
- "; isDeleted=" + isDeleted +
'}';
}
@@ -122,10 +99,10 @@ public class PathMetadata {
* @param sb target StringBuilder
*/
public void prettyPrint(StringBuilder sb) {
- sb.append(String.format("%-5s %-20s %-7d %-8s %-6s",
+ sb.append(String.format("%-5s %-20s %-7d %s",
fileStatus.isDirectory() ? "dir" : "file",
fileStatus.getPath().toString(), fileStatus.getLen(),
- isEmptyDirectory.name(), isDeleted));
+ isEmptyDirectory.name()));
sb.append(fileStatus);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
index c206a00..1c04016 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
-import org.apache.hadoop.fs.s3a.Tristate;
/**
* Defines methods for translating between domain model objects and their
@@ -63,7 +62,6 @@ final class PathMetadataDynamoDBTranslation {
static final String FILE_LENGTH = "file_length";
@VisibleForTesting
static final String BLOCK_SIZE = "block_size";
- static final String IS_DELETED = "is_deleted";
/** Table version field {@value} in version marker item. */
@VisibleForTesting
@@ -135,10 +133,8 @@ final class PathMetadataDynamoDBTranslation {
fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
username, username, path);
}
- boolean isDeleted =
- item.hasAttribute(IS_DELETED) ? item.getBoolean(IS_DELETED) : false;
- return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
+ return new PathMetadata(fileStatus);
}
/**
@@ -158,7 +154,6 @@ final class PathMetadataDynamoDBTranslation {
.withLong(MOD_TIME, status.getModificationTime())
.withLong(BLOCK_SIZE, status.getBlockSize());
}
- item.withBoolean(IS_DELETED, meta.isDeleted());
return item;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
index 19f0e50..53dc991 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -39,7 +39,6 @@ import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import java.util.Set;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
@@ -144,8 +143,7 @@ public final class S3Guard {
/**
* Convert the data of a directory listing to an array of {@link FileStatus}
- * entries. Tombstones are filtered out at this point. If the listing is null
- * an empty array is returned.
+ * entries. If the listing is null an empty array is returned.
* @param dirMeta directory listing -may be null
* @return a possibly-empty array of file status entries
*/
@@ -155,15 +153,15 @@ public final class S3Guard {
}
Collection<PathMetadata> listing = dirMeta.getListing();
- List<FileStatus> statuses = new ArrayList<>();
+ FileStatus[] statuses = new FileStatus[listing.size()];
+ // HADOOP-13760: filter out deleted entries here
+ int i = 0;
for (PathMetadata pm : listing) {
- if (!pm.isDeleted()) {
- statuses.add(pm.getFileStatus());
- }
+ statuses[i++] = pm.getFileStatus();
}
- return statuses.toArray(new FileStatus[0]);
+ return statuses;
}
/**
@@ -198,8 +196,6 @@ public final class S3Guard {
false);
}
- Set<Path> deleted = dirMeta.listTombstones();
-
// Since we treat the MetadataStore as a "fresher" or "consistent" view
// of metadata, we always use its metadata first.
@@ -207,11 +203,10 @@ public final class S3Guard {
// we will basically start with the set of directory entries in the
// DirListingMetadata, and add any that only exist in the backingStatuses.
+ // HADOOP-13760: filter out deleted files via PathMetadata#isDeleted() here
+
boolean changed = false;
for (FileStatus s : backingStatuses) {
- if (deleted.contains(s.getPath())) {
- continue;
- }
// Minor race condition here. Multiple threads could add to this
// mutable DirListingMetadata. Since it is backed by a
@@ -250,7 +245,7 @@ public final class S3Guard {
* list will contain [/a/b/c/d, /a/b/c, /a/b]. /a/b/c/d is
* an empty, dir, and the other dirs only contain their child
* dir.
- * @param owner Hadoop user name.
+ * @param owner Hadoop user name
*/
public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
String owner) {
@@ -285,8 +280,7 @@ public final class S3Guard {
children = new ArrayList<>(1);
children.add(new PathMetadata(prevStatus));
}
- DirListingMetadata dirMeta =
- new DirListingMetadata(f, children, true);
+ DirListingMetadata dirMeta = new DirListingMetadata(f, children, true);
try {
ms.put(dirMeta);
ms.put(new PathMetadata(status));
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 9bd0cb8..3d4f11d 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -446,7 +446,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
private long importDir(FileStatus status) throws IOException {
Preconditions.checkArgument(status.isDirectory());
RemoteIterator<LocatedFileStatus> it =
- s3a.listFilesAndEmptyDirectories(status.getPath(), true);
+ s3a.listFilesAndDirectories(status.getPath(), true);
long items = 0;
while (it.hasNext()) {
@@ -686,8 +686,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
} catch (FileNotFoundException e) {
}
PathMetadata meta = ms.get(qualified);
- FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
- meta.getFileStatus() : null;
+ FileStatus msStatus = meta != null ? meta.getFileStatus() : null;
compareDir(msStatus, s3Status, out);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
index fb6e370..55e310b 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
@@ -39,47 +39,45 @@ public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
@Test
public void testEmptyDirs() throws Exception {
+
S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
MetadataStore configuredMs = fs.getMetadataStore();
+
+ // 1. Simulate files already existing in the bucket before we started our
+ // cluster. Temporarily disable the MetadataStore so it doesn't witness
+ // us creating these files.
+
+ fs.setMetadataStore(new NullMetadataStore());
+
Path existingDir = path("existing-dir");
+ assertTrue(fs.mkdirs(existingDir));
Path existingFile = path("existing-dir/existing-file");
- try {
- // 1. Simulate files already existing in the bucket before we started our
- // cluster. Temporarily disable the MetadataStore so it doesn't witness
- // us creating these files.
-
- fs.setMetadataStore(new NullMetadataStore());
- assertTrue(fs.mkdirs(existingDir));
- touch(fs, existingFile);
+ touch(fs, existingFile);
- // 2. Simulate (from MetadataStore's perspective) starting our cluster and
- // creating a file in an existing directory.
- fs.setMetadataStore(configuredMs); // "start cluster"
- Path newFile = path("existing-dir/new-file");
- touch(fs, newFile);
+ // 2. Simulate (from MetadataStore's perspective) starting our cluster and
+ // creating a file in an existing directory.
+ fs.setMetadataStore(configuredMs); // "start cluster"
+ Path newFile = path("existing-dir/new-file");
+ touch(fs, newFile);
- S3AFileStatus status = fs.innerGetFileStatus(existingDir, true);
- assertEquals("Should not be empty dir", Tristate.FALSE,
- status.isEmptyDirectory());
+ S3AFileStatus status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should not be empty dir", Tristate.FALSE,
+ status.isEmptyDirectory());
- // 3. Assert that removing the only file the MetadataStore witnessed
- // being created doesn't cause it to think the directory is now empty.
- fs.delete(newFile, false);
- status = fs.innerGetFileStatus(existingDir, true);
- assertEquals("Should not be empty dir", Tristate.FALSE,
- status.isEmptyDirectory());
+ // 3. Assert that removing the only file the MetadataStore witnessed
+ // being created doesn't cause it to think the directory is now empty.
+ fs.delete(newFile, false);
+ status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should not be empty dir", Tristate.FALSE,
+ status.isEmptyDirectory());
- // 4. Assert that removing the final file, that existed "before"
- // MetadataStore started, *does* cause the directory to be marked empty.
- fs.delete(existingFile, false);
- status = fs.innerGetFileStatus(existingDir, true);
- assertEquals("Should be empty dir now", Tristate.TRUE,
- status.isEmptyDirectory());
- } finally {
- configuredMs.forgetMetadata(existingFile);
- configuredMs.forgetMetadata(existingDir);
- }
+ // 4. Assert that removing the final file, that existed "before"
+ // MetadataStore started, *does* cause the directory to be marked empty.
+ fs.delete(existingFile, false);
+ status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should be empty dir now", Tristate.TRUE,
+ status.isEmptyDirectory());
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c1775960/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
index 8771fd2..5e83906 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
@@ -56,185 +56,8 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
return new S3AContract(conf);
}
- /**
- * Helper function for other test cases: does a single rename operation and
- * validates the aftermath.
- * @param mkdirs Directories to create
- * @param srcdirs Source paths for rename operation
- * @param dstdirs Destination paths for rename operation
- * @param yesdirs Files that must exist post-rename (e.g. srcdirs children)
- * @param nodirs Files that must not exist post-rename (e.g. dstdirs children)
- * @throws Exception
- */
- private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
- Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
- S3AFileSystem fs = getFileSystem();
- Assume.assumeTrue(fs.hasMetadataStore());
-
- if (mkdirs != null) {
- for (Path mkdir : mkdirs) {
- assertTrue(fs.mkdirs(mkdir));
- }
- Thread.sleep(InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
- }
-
- assertTrue("srcdirs and dstdirs must have equal length",
- srcdirs.length == dstdirs.length);
- for (int i = 0; i < srcdirs.length; i++) {
- assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
- fs.rename(srcdirs[i], dstdirs[i]));
- }
-
- for (Path yesdir : yesdirs) {
- assertTrue("Path was supposed to exist: " + yesdir, fs.exists(yesdir));
- }
- for (Path nodir : nodirs) {
- assertFalse("Path is not supposed to exist: " + nodir, fs.exists(nodir));
- }
- }
-
- /**
- * Tests that after renaming a directory, the original directory and its
- * contents are indeed missing and the corresponding new paths are visible.
- * @throws Exception
- */
- @Test
- public void testConsistentListAfterRename() throws Exception {
- Path[] mkdirs = {
- path("d1/f"),
- path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)
- };
- Path[] srcdirs = {path("d1")};
- Path[] dstdirs = {path("d2")};
- Path[] yesdirs = {path("d2"), path("d2/f"),
- path("d2/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)};
- Path[] nodirs = {path("d1"), path("d1/f"),
- path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)};
- doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
- getFileSystem().delete(path("d1"), true);
- getFileSystem().delete(path("d2"), true);
- }
-
- /**
- * Tests a circular sequence of renames to verify that overwriting recently
- * deleted files and reading recently created files from rename operations
- * works as expected.
- * @throws Exception
- */
- @Test
- public void testRollingRenames() throws Exception {
- Path[] dir0 = {path("rolling/1")};
- Path[] dir1 = {path("rolling/2")};
- Path[] dir2 = {path("rolling/3")};
- // These sets have to be in reverse order compared to the movement
- Path[] setA = {dir1[0], dir0[0]};
- Path[] setB = {dir2[0], dir1[0]};
- Path[] setC = {dir0[0], dir2[0]};
-
- for(int i = 0; i < 2; i++) {
- Path[] firstSet = i == 0 ? setA : null;
- doTestRenameSequence(firstSet, setA, setB, setB, dir0);
- doTestRenameSequence(null, setB, setC, setC, dir1);
- doTestRenameSequence(null, setC, setA, setA, dir2);
- }
-
- S3AFileSystem fs = getFileSystem();
- assertFalse("Renaming deleted file should have failed",
- fs.rename(dir2[0], dir1[0]));
- assertTrue("Renaming over existing file should have succeeded",
- fs.rename(dir1[0], dir0[0]));
- }
-
- /**
- * Tests that deleted files immediately stop manifesting in list operations
- * even when the effect in S3 is delayed.
- * @throws Exception
- */
- @Test
- public void testConsistentListAfterDelete() throws Exception {
- S3AFileSystem fs = getFileSystem();
- // test will fail if NullMetadataStore (the default) is configured: skip it.
- Assume.assumeTrue(fs.hasMetadataStore());
-
- // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
- // in listObjects() results via InconsistentS3Client
- Path inconsistentPath =
- path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING);
-
- Path[] testDirs = {path("a/b/dir1"),
- path("a/b/dir2"),
- inconsistentPath};
-
- for (Path path : testDirs) {
- assertTrue(fs.mkdirs(path));
- }
- Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
- for (Path path : testDirs) {
- assertTrue(fs.delete(path, false));
- }
-
- FileStatus[] paths = fs.listStatus(path("a/b/"));
- List<Path> list = new ArrayList<>();
- for (FileStatus fileState : paths) {
- list.add(fileState.getPath());
- }
- assertFalse(list.contains(path("a/b/dir1")));
- assertFalse(list.contains(path("a/b/dir2")));
- // This should fail without S3Guard, and succeed with it.
- assertFalse(list.contains(inconsistentPath));
- }
-
- /**
- * Tests that rename immediately after files in the source directory are
- * deleted results in exactly the correct set of destination files and none
- * of the source files.
- * @throws Exception
- */
@Test
- public void testConsistentRenameAfterDelete() throws Exception {
- S3AFileSystem fs = getFileSystem();
- // test will fail if NullMetadataStore (the default) is configured: skip it.
- Assume.assumeTrue(fs.hasMetadataStore());
-
- // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
- // in listObjects() results via InconsistentS3Client
- Path inconsistentPath =
- path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING);
-
- Path[] testDirs = {path("a/b/dir1"),
- path("a/b/dir2"),
- inconsistentPath};
-
- for (Path path : testDirs) {
- assertTrue(fs.mkdirs(path));
- }
- Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
- assertTrue(fs.delete(testDirs[1], false));
- assertTrue(fs.delete(testDirs[2], false));
-
- fs.rename(path("a"), path("a3"));
- FileStatus[] paths = fs.listStatus(path("a3/b"));
- List<Path> list = new ArrayList<>();
- for (FileStatus fileState : paths) {
- list.add(fileState.getPath());
- }
- assertTrue(list.contains(path("a3/b/dir1")));
- assertFalse(list.contains(path("a3/b/dir2")));
- // This should fail without S3Guard, and succeed with it.
- assertFalse(list.contains(
- path("a3/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)));
-
- try {
- RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
- path("a"), true);
- fail("Recently renamed dir should not be visible");
- } catch(FileNotFoundException e) {
- // expected
- }
- }
-
- @Test
- public void testConsistentListStatusAfterPut() throws Exception {
+ public void testConsistentListStatus() throws Exception {
S3AFileSystem fs = getFileSystem();
@@ -267,25 +90,23 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
}
/**
- * Similar to {@link #testConsistentListStatusAfterPut()}, this tests that the
- * FS listLocatedStatus() call will return consistent list.
+ * Similar to {@link #testConsistentListStatus()}, this tests that the FS
+ * listLocatedStatus() call will return consistent list.
*/
@Test
- public void testConsistentListLocatedStatusAfterPut() throws Exception {
+ public void testConsistentListLocatedStatus() throws Exception {
final S3AFileSystem fs = getFileSystem();
// This test will fail if NullMetadataStore (the default) is configured:
// skip it.
Assume.assumeTrue(fs.hasMetadataStore());
- String rootDir = "doTestConsistentListLocatedStatusAfterPut";
- fs.mkdirs(path(rootDir));
+ fs.mkdirs(path("doTestConsistentListLocatedStatus"));
final int[] numOfPaths = {0, 1, 10};
for (int normalPathNum : numOfPaths) {
for (int delayedPathNum : numOfPaths) {
LOG.info("Testing with normalPathNum={}, delayedPathNum={}",
normalPathNum, delayedPathNum);
- doTestConsistentListLocatedStatusAfterPut(fs, rootDir, normalPathNum,
- delayedPathNum);
+ doTestConsistentListLocatedStatus(fs, normalPathNum, delayedPathNum);
}
}
}
@@ -297,18 +118,18 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
* @param delayedPathNum number paths listed with delaying
* @throws Exception
*/
- private void doTestConsistentListLocatedStatusAfterPut(S3AFileSystem fs,
- String rootDir, int normalPathNum, int delayedPathNum) throws Exception {
+ private void doTestConsistentListLocatedStatus(S3AFileSystem fs,
+ int normalPathNum, int delayedPathNum) throws Exception {
final List<Path> testDirs = new ArrayList<>(normalPathNum + delayedPathNum);
int index = 0;
for (; index < normalPathNum; index++) {
- testDirs.add(path(rootDir + "/dir-" +
- index));
+ testDirs.add(path("doTestConsistentListLocatedStatus/dir-" + index));
}
for (; index < normalPathNum + delayedPathNum; index++) {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
- testDirs.add(path(rootDir + "/dir-" + index + DELAY_KEY_SUBSTRING));
+ testDirs.add(path("doTestConsistentListLocatedStatus/dir-" + index
+ + DELAY_KEY_SUBSTRING));
}
for (Path path : testDirs) {
@@ -320,7 +141,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
// this should return the union data from S3 and MetadataStore
final RemoteIterator<LocatedFileStatus> statusIterator =
- fs.listLocatedStatus(path(rootDir + "/"));
+ fs.listLocatedStatus(path("doTestConsistentListLocatedStatus/"));
List<Path> list = new ArrayList<>();
for (; statusIterator.hasNext();) {
list.add(statusIterator.next().getPath());
@@ -402,13 +223,10 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
fileNames.add("file-" + index + "-" + DELAY_KEY_SUBSTRING);
}
- int filesAndEmptyDirectories = 0;
-
// create files under each test directory
for (Path dir : testDirs) {
for (String fileName : fileNames) {
writeTextFile(fs, new Path(dir, fileName), "I, " + fileName, false);
- filesAndEmptyDirectories++;
}
}
@@ -433,7 +251,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
verifyFileIsListed(listedFiles, baseTestDir, fileNames);
} else {
assertEquals("Unexpected number of files returned by listFiles() call",
- filesAndEmptyDirectories,
+ testDirs.size() * (normalFileNum + delayedFileNum),
listedFiles.size());
for (Path dir : testDirs) {
verifyFileIsListed(listedFiles, dir, fileNames);
@@ -545,11 +363,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
assertEquals("Unexpected number of results from metadata store. "
+ "Should have /OnS3 and /OnS3AndMS: " + mdResults,
2, mdResults.numEntries());
-
- // If we don't clean this up, the next test run will fail because it will
- // have recorded /OnS3 being deleted even after it's written to noS3Guard.
- getFileSystem().getMetadataStore().forgetMetadata(
- new Path(directory, "OnS3"));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org