You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2019/10/14 16:07:54 UTC
[hadoop] branch trunk updated: HADOOP-16635. S3A "directories only"
scan still does a HEAD.
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 74e5018 HADOOP-16635. S3A "directories only" scan still does a HEAD.
74e5018 is described below
commit 74e5018d871bdf712b3ad0706150a37cb8efee5c
Author: Steve Loughran <st...@apache.org>
AuthorDate: Mon Oct 14 17:05:52 2019 +0100
HADOOP-16635. S3A "directories only" scan still does a HEAD.
Contributed by Steve Loughran.
Change-Id: I5e41d7f721364c392e1f4344db83dfa8c5aa06ce
---
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 91 ++++++++++----
.../apache/hadoop/fs/s3a/impl/StatusProbeEnum.java | 16 +++
.../hadoop/fs/s3a/ITestAuthoritativePath.java | 8 +-
.../hadoop/fs/s3a/ITestS3AFileOperationCost.java | 140 ++++++++++++++++++++-
.../org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java | 6 +
5 files changed, 229 insertions(+), 32 deletions(-)
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 26f16a7..ff2ba14 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -2610,7 +2610,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
* @param f The path we want information from
* @param needEmptyDirectoryFlag if true, implementation will calculate
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
- * @param probes probes to make
+ * @param probes probes to make.
* @return a S3AFileStatus object
* @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
@@ -2711,7 +2711,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
// there was no entry in S3Guard
// retrieve the data and update the metadata store in the process.
return S3Guard.putAndReturn(metadataStore,
- s3GetFileStatus(path, key, StatusProbeEnum.ALL, tombstones),
+ s3GetFileStatus(path, key, probes, tombstones),
instrumentation,
ttlTimeProvider);
}
@@ -2719,31 +2719,70 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
/**
* Raw {@code getFileStatus} that talks direct to S3.
- * Used to implement {@link #innerGetFileStatus(Path, boolean)},
+ * Used to implement {@link #innerGetFileStatus(Path, boolean, Set)},
* and for direct management of empty directory blobs.
+ *
+ * Checks made, in order:
+ * <ol>
+ * <li>
+ * Head: look for an object at the given key, provided that
+ * the key doesn't end in "/"
+ * </li>
+ * <li>
+ * DirMarker: look for the directory marker -the key with a trailing /
+ * if not passed in.
+ * If an object was found with size 0 bytes, a directory status entry
+ * is returned which declares that the directory is empty.
+ * </li>
+ * <li>
+ * List: issue a LIST on the key (with / if needed), require one
+ * entry to be found for the path to be considered a non-empty directory.
+ * </li>
+ * </ol>
+ *
+ * Notes:
+ * <ul>
+ * <li>
+ * Objects ending in / which are not 0-bytes long are not treated as
+ * directory markers, but instead as files.
+ * </li>
+ * <li>
+ * There's ongoing discussions about whether a dir marker
+ * should be interpreted as an empty dir.
+ * </li>
+ * <li>
+ * The HEAD requests require the permissions to read an object,
+ * including (we believe) the ability to decrypt the file.
+ * At the very least, for SSE-C markers, you need the same key on
+ * the client for the HEAD to work.
+ * </li>
+ * <li>
+ * The List probe needs list permission; it is also more prone to
+ * inconsistency, even on newly created files.
+ * </li>
+ * </ul>
+ *
* Retry policy: retry translated.
* @param path Qualified path
* @param key Key string for the path
* @param probes probes to make
* @param tombstones tombstones to filter
* @return Status
- * @throws FileNotFoundException when the path does not exist
+ * @throws FileNotFoundException the supplied probes failed.
* @throws IOException on other problems.
*/
+ @VisibleForTesting
@Retries.RetryTranslated
- private S3AFileStatus s3GetFileStatus(final Path path,
- String key,
+ S3AFileStatus s3GetFileStatus(final Path path,
+ final String key,
final Set<StatusProbeEnum> probes,
final Set<Path> tombstones) throws IOException {
- if (!key.isEmpty() && probes.contains(StatusProbeEnum.Head)) {
- try {
- ObjectMetadata meta = getObjectMetadata(key);
-
- if (objectRepresentsDirectory(key, meta.getContentLength())) {
- LOG.debug("Found exact file: fake directory");
- return new S3AFileStatus(Tristate.TRUE, path, username);
- } else {
- LOG.debug("Found exact file: normal file");
+ if (!key.isEmpty()) {
+ if (probes.contains(StatusProbeEnum.Head) && !key.endsWith("/")) {
+ try {
+ // look for the simple file
+ ObjectMetadata meta = getObjectMetadata(key);
+ LOG.debug("Found exact file: normal file {}", key);
return new S3AFileStatus(meta.getContentLength(),
dateToLong(meta.getLastModified()),
path,
@@ -2751,18 +2790,22 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
username,
meta.getETag(),
meta.getVersionId());
- }
- } catch (AmazonServiceException e) {
- if (e.getStatusCode() != SC_404) {
+ } catch (AmazonServiceException e) {
+ // if the response is a 404 error, it just means that there is
+ // no file at that path...the remaining checks will be needed.
+ if (e.getStatusCode() != SC_404) {
+ throw translateException("getFileStatus", path, e);
+ }
+ } catch (AmazonClientException e) {
throw translateException("getFileStatus", path, e);
}
- } catch (AmazonClientException e) {
- throw translateException("getFileStatus", path, e);
}
+ // Either a normal file was not found or the probe was skipped.
+ // because the key ended in "/" or it was not in the set of probes.
// Look for the dir marker
- if (!key.endsWith("/") && probes.contains(StatusProbeEnum.DirMarker)) {
- String newKey = key + "/";
+ if (probes.contains(StatusProbeEnum.DirMarker)) {
+ String newKey = maybeAddTrailingSlash(key);
try {
ObjectMetadata meta = getObjectMetadata(newKey);
@@ -2794,8 +2837,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
// execute the list
if (probes.contains(StatusProbeEnum.List)) {
try {
- key = maybeAddTrailingSlash(key);
- S3ListRequest request = createListObjectsRequest(key, "/", 1);
+ String dirKey = maybeAddTrailingSlash(key);
+ S3ListRequest request = createListObjectsRequest(dirKey, "/", 1);
S3ListResult objects = listObjects(request);
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
index ca2875c..f843b20 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java
@@ -41,4 +41,20 @@ public enum StatusProbeEnum {
public static final Set<StatusProbeEnum> DIRECTORIES =
EnumSet.of(DirMarker, List);
+ /** We only want the HEAD or dir marker. */
+ public static final Set<StatusProbeEnum> HEAD_OR_DIR_MARKER =
+ EnumSet.of(Head, DirMarker);
+
+ /** We only want the HEAD. */
+ public static final Set<StatusProbeEnum> HEAD_ONLY =
+ EnumSet.of(Head);
+
+ /** We only want the dir marker. */
+ public static final Set<StatusProbeEnum> DIR_MARKER_ONLY =
+ EnumSet.of(DirMarker);
+
+ /** We only want the dir marker. */
+ public static final Set<StatusProbeEnum> LIST_ONLY =
+ EnumSet.of(List);
+
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
index c35a585..eb54c0e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java
@@ -84,14 +84,18 @@ public class ITestAuthoritativePath extends AbstractS3ATestBase {
private void cleanUpFS(S3AFileSystem fs) {
// detach from the (shared) metadata store.
- fs.setMetadataStore(new NullMetadataStore());
+ if (fs != null) {
+ fs.setMetadataStore(new NullMetadataStore());
+ }
IOUtils.cleanupWithLogger(LOG, fs);
}
@Override
public void teardown() throws Exception {
- fullyAuthFS.delete(testRoot, true);
+ if (fullyAuthFS != null) {
+ fullyAuthFS.delete(testRoot, true);
+ }
cleanUpFS(fullyAuthFS);
cleanUpFS(rawFS);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index c62176b..e2f7fea 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -18,23 +18,32 @@
package org.apache.hadoop.fs.s3a;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
+import org.assertj.core.api.Assertions;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URI;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
import java.util.UUID;
import java.util.concurrent.Callable;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
@@ -43,7 +52,9 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Use metrics to assert about the cost of file status queries.
* {@link S3AFileSystem#getFileStatus(Path)}.
+ * Parameterized on guarded vs raw.
*/
+@RunWith(Parameterized.class)
public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
private MetricDiff metadataRequests;
@@ -52,9 +63,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AFileOperationCost.class);
+ /**
+ * Parameterization.
+ */
+ @Parameterized.Parameters(name = "{0}")
+ public static Collection<Object[]> params() {
+ return Arrays.asList(new Object[][]{
+ {"raw", false},
+ {"guarded", true}
+ });
+ }
+
+ private final String name;
+
+ private final boolean s3guard;
+
+ public ITestS3AFileOperationCost(final String name, final boolean s3guard) {
+ this.name = name;
+ this.s3guard = s3guard;
+ }
+
+ @Override
+ public Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ String bucketName = getTestBucketName(conf);
+ removeBucketOverrides(bucketName, conf,
+ S3_METADATA_STORE_IMPL);
+ if (!s3guard) {
+ // in a raw run remove all s3guard settings
+ removeBaseAndBucketOverrides(bucketName, conf,
+ S3_METADATA_STORE_IMPL);
+ }
+ disableFilesystemCaching(conf);
+ return conf;
+ }
@Override
public void setup() throws Exception {
super.setup();
+ if (s3guard) {
+ // s3guard is required for those test runs where any of the
+ // guard options are set
+ assumeS3GuardState(true, getConfiguration());
+ }
S3AFileSystem fs = getFileSystem();
metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
@@ -80,6 +130,19 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
reset(metadataRequests, listRequests);
}
+ /**
+ * Verify that the head and list calls match expectations,
+ * then reset the counters ready for the next operation.
+ * @param head expected HEAD count
+ * @param list expected LIST count
+ */
+ private void verifyOperationCount(int head, int list) {
+ metadataRequests.assertDiffEquals(head);
+ listRequests.assertDiffEquals(list);
+ metadataRequests.reset();
+ listRequests.reset();
+ }
+
@Test
public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
describe("performing getFileStatus on an empty directory");
@@ -205,12 +268,6 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
- // As this test uses the s3 metrics to count the number of fake directory
- // operations, it depends on side effects happening internally. With
- // metadata store enabled, it is brittle to change. We disable this test
- // before the internal behavior w/ or w/o metadata store.
-// assumeFalse(fs.hasMetadataStore());
-
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
@@ -389,4 +446,75 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
fs.delete(dest, false);
}
}
+
+ @Test
+ public void testDirProbes() throws Throwable {
+ describe("Test directory probe cost -raw only");
+ S3AFileSystem fs = getFileSystem();
+ assume("Unguarded FS only", !fs.hasMetadataStore());
+ String dir = "testEmptyDirHeadProbe";
+ Path emptydir = path(dir);
+ // Create the empty directory.
+ fs.mkdirs(emptydir);
+
+ // metrics and assertions.
+ resetMetricDiffs();
+
+ intercept(FileNotFoundException.class, () ->
+ fs.innerGetFileStatus(emptydir, false,
+ StatusProbeEnum.HEAD_ONLY));
+ verifyOperationCount(1, 0);
+
+ // a LIST will find it -but it doesn't consider it an empty dir.
+ S3AFileStatus status = fs.innerGetFileStatus(emptydir, true,
+ StatusProbeEnum.LIST_ONLY);
+ verifyOperationCount(0, 1);
+ Assertions.assertThat(status)
+ .describedAs("LIST output is not considered empty")
+ .matches(s -> !s.isEmptyDirectory().equals(Tristate.TRUE), "is empty");
+
+ // finally, skip all probes and expect no operations toThere are
+ // take place
+ intercept(FileNotFoundException.class, () ->
+ fs.innerGetFileStatus(emptydir, false,
+ EnumSet.noneOf(StatusProbeEnum.class)));
+ verifyOperationCount(0, 0);
+
+ // now add a trailing slash to the key and use the
+ // deep internal s3GetFileStatus method call.
+ String emptyDirTrailingSlash = fs.pathToKey(emptydir.getParent())
+ + "/" + dir + "/";
+ // A HEAD request does not probe for keys with a trailing /
+ intercept(FileNotFoundException.class, () ->
+ fs.s3GetFileStatus(emptydir, emptyDirTrailingSlash,
+ StatusProbeEnum.HEAD_ONLY, null));
+ verifyOperationCount(0, 0);
+
+ // but ask for a directory marker and you get the entry
+ status = fs.s3GetFileStatus(emptydir,
+ emptyDirTrailingSlash,
+ StatusProbeEnum.DIR_MARKER_ONLY, null);
+ verifyOperationCount(1, 0);
+ assertEquals(emptydir, status.getPath());
+ }
+
+ @Test
+ public void testCreateCost() throws Throwable {
+ describe("Test file creation cost -raw only");
+ S3AFileSystem fs = getFileSystem();
+ assume("Unguarded FS only", !fs.hasMetadataStore());
+ resetMetricDiffs();
+ Path testFile = path("testCreateCost");
+
+ // when overwrite is false, the path is checked for existence.
+ try (FSDataOutputStream out = fs.create(testFile, false)) {
+ verifyOperationCount(2, 1);
+ }
+
+ // but when true: only the directory checks take place.
+ try (FSDataOutputStream out = fs.create(testFile, true)) {
+ verifyOperationCount(1, 1);
+ }
+
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java
index fb539a8..b068f3d 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java
@@ -96,6 +96,12 @@ public class ITestS3GuardTtl extends AbstractS3ATestBase {
return configuration;
}
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ Assume.assumeTrue(getFileSystem().hasMetadataStore());
+ }
+
@Test
public void testDirectoryListingAuthoritativeTtl() throws Exception {
LOG.info("Authoritative mode: {}", authoritative);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org