You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/04/15 16:05:42 UTC
[hadoop] branch branch-3.3 updated: HADOOP-16465
listLocatedStatus() optimisation (#1943)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 94da630 HADOOP-16465 listLocatedStatus() optimisation (#1943)
94da630 is described below
commit 94da630cd2d1e056393f83acdac2747cf4b12bad
Author: Mukund Thakur <mu...@users.noreply.github.com>
AuthorDate: Tue Apr 14 21:49:51 2020 +0530
HADOOP-16465 listLocatedStatus() optimisation (#1943)
Contributed by Mukund Thakur
Optimize S3AFileSystem.listLocatedStatus() to perform list
operations directly and then fallback to head checks for files
Change-Id: Ia2c0fa6fcc5967c49b914b92f41135d07dab0464
---
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 87 ++++++++++++++--------
.../hadoop/fs/s3a/ITestS3AFileOperationCost.java | 57 ++++++++++++++
2 files changed, 114 insertions(+), 30 deletions(-)
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 9bd33a4..9630a9e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -4283,42 +4283,69 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
RemoteIterator<? extends LocatedFileStatus> iterator =
once("listLocatedStatus", path.toString(),
() -> {
- // lookup dir triggers existence check
- final S3AFileStatus fileStatus =
- (S3AFileStatus) getFileStatus(path);
- if (fileStatus.isFile()) {
- // simple case: File
- LOG.debug("Path is a file");
- return new Listing.SingleStatusRemoteIterator(
- filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
- } else {
- // directory: trigger a lookup
- final String key = maybeAddTrailingSlash(pathToKey(path));
- final Listing.FileStatusAcceptor acceptor =
- new Listing.AcceptAllButSelfAndS3nDirs(path);
- boolean allowAuthoritative = allowAuthoritative(f);
- DirListingMetadata meta =
- S3Guard.listChildrenWithTtl(metadataStore, path,
- ttlTimeProvider, allowAuthoritative);
- final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
- listing.createProvidedFileStatusIterator(
- S3Guard.dirMetaToStatuses(meta), filter, acceptor);
- return (allowAuthoritative && meta != null
- && meta.isAuthoritative())
- ? listing.createLocatedFileStatusIterator(
- cachedFileStatusIterator)
- : listing.createLocatedFileStatusIterator(
- listing.createFileStatusListingIterator(path,
- createListObjectsRequest(key, "/"),
- filter,
- acceptor,
- cachedFileStatusIterator));
+ // Assuming the path to be a directory,
+ // trigger a list call directly.
+ final RemoteIterator<S3ALocatedFileStatus>
+ locatedFileStatusIteratorForDir =
+ getLocatedFileStatusIteratorForDir(path, filter);
+
+ // If no listing is present then path might be a file.
+ if (!locatedFileStatusIteratorForDir.hasNext()) {
+ final S3AFileStatus fileStatus =
+ (S3AFileStatus) getFileStatus(path);
+ if (fileStatus.isFile()) {
+ // simple case: File
+ LOG.debug("Path is a file");
+ return new Listing.SingleStatusRemoteIterator(
+ filter.accept(path)
+ ? toLocatedFileStatus(fileStatus)
+ : null);
+ }
}
+ // Either empty or non-empty directory.
+ return locatedFileStatusIteratorForDir;
});
return toLocatedFileStatusIterator(iterator);
}
/**
+ * Generate list located status for a directory.
+ * Also performing tombstone reconciliation for guarded directories.
+ * @param dir directory to check.
+ * @param filter a path filter.
+ * @return an iterator that traverses statuses of the given dir.
+ * @throws IOException in case of failure.
+ */
+ private RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
+ Path dir, PathFilter filter) throws IOException {
+ final String key = maybeAddTrailingSlash(pathToKey(dir));
+ final Listing.FileStatusAcceptor acceptor =
+ new Listing.AcceptAllButSelfAndS3nDirs(dir);
+ boolean allowAuthoritative = allowAuthoritative(dir);
+ DirListingMetadata meta =
+ S3Guard.listChildrenWithTtl(metadataStore, dir,
+ ttlTimeProvider, allowAuthoritative);
+ Set<Path> tombstones = meta != null
+ ? meta.listTombstones()
+ : null;
+ final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
+ listing.createProvidedFileStatusIterator(
+ S3Guard.dirMetaToStatuses(meta), filter, acceptor);
+ return (allowAuthoritative && meta != null
+ && meta.isAuthoritative())
+ ? listing.createLocatedFileStatusIterator(
+ cachedFileStatusIterator)
+ : listing.createTombstoneReconcilingIterator(
+ listing.createLocatedFileStatusIterator(
+ listing.createFileStatusListingIterator(dir,
+ createListObjectsRequest(key, "/"),
+ filter,
+ acceptor,
+ cachedFileStatusIterator)),
+ tombstones);
+ }
+
+ /**
* Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance.
* @param status file status
* @return a located status with block locations set up from this FS.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index e2f7fea..b2b983c 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -112,6 +112,63 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
}
@Test
+ public void testCostOfLocatedFileStatusOnFile() throws Throwable {
+ describe("performing listLocatedStatus on a file");
+ Path file = path(getMethodName() + ".txt");
+ S3AFileSystem fs = getFileSystem();
+ touch(fs, file);
+ resetMetricDiffs();
+ fs.listLocatedStatus(file);
+ if (!fs.hasMetadataStore()) {
+ // Unguarded FS.
+ metadataRequests.assertDiffEquals(1);
+ }
+ listRequests.assertDiffEquals(1);
+ }
+
+ @Test
+ public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable {
+ describe("performing listLocatedStatus on an empty dir");
+ Path dir = path(getMethodName());
+ S3AFileSystem fs = getFileSystem();
+ fs.mkdirs(dir);
+ resetMetricDiffs();
+ fs.listLocatedStatus(dir);
+ if (!fs.hasMetadataStore()) {
+ // Unguarded FS.
+ verifyOperationCount(2, 1);
+ } else {
+ if (fs.allowAuthoritative(dir)) {
+ verifyOperationCount(0, 0);
+ } else {
+ verifyOperationCount(0, 1);
+ }
+ }
+ }
+
+ @Test
+ public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
+ describe("performing listLocatedStatus on a non empty dir");
+ Path dir = path(getMethodName() + "dir");
+ S3AFileSystem fs = getFileSystem();
+ fs.mkdirs(dir);
+ Path file = new Path(dir, "file.txt");
+ touch(fs, file);
+ resetMetricDiffs();
+ fs.listLocatedStatus(dir);
+ if (!fs.hasMetadataStore()) {
+ // Unguarded FS.
+ verifyOperationCount(0, 1);
+ } else {
+ if(fs.allowAuthoritative(dir)) {
+ verifyOperationCount(0, 0);
+ } else {
+ verifyOperationCount(0, 1);
+ }
+ }
+ }
+
+ @Test
public void testCostOfGetFileStatusOnFile() throws Throwable {
describe("performing getFileStatus on a file");
Path simpleFile = path("simple.txt");
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org