You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/10/08 14:38:54 UTC

[hadoop] branch branch-3.3 updated: HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths

This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 963793d  HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths
963793d is described below

commit 963793dd484af7e9518c035783fe1c29b23c3199
Author: Steve Loughran <st...@cloudera.com>
AuthorDate: Thu Oct 8 15:35:57 2020 +0100

    HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths
    
    This reverts changes in HADOOP-13230 to use S3Guard TTL in choosing when
    to issue a HEAD request; fixing tests to compensate.
    
    New org.apache.hadoop.fs.s3a.performance.OperationCost cost,
    S3GUARD_NONAUTH_FILE_STATUS_PROBE for use in cost tests.
    
    Contributed by Steve Loughran.
    
    Change-Id: I418d55d2d2562a48b2a14ec7dee369db49b4e29e
---
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java    | 74 ++++++++--------------
 .../hadoop/fs/s3a/ITestS3AEncryptionSSEC.java      | 14 +++-
 .../hadoop/fs/s3a/ITestS3AFileOperationCost.java   |  6 +-
 .../hadoop/fs/s3a/ITestS3ARemoteFileChanged.java   |  2 +-
 .../fs/s3a/auth/ITestRestrictedReadAccess.java     | 12 ++--
 .../hadoop/fs/s3a/performance/OperationCost.java   |  7 ++
 6 files changed, 55 insertions(+), 60 deletions(-)

diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index d7b576a..1731863 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -2980,55 +2980,31 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
         // a file has been found in a non-auth path and the caller has not said
         // they only care about directories
         LOG.debug("Metadata for {} found in the non-auth metastore.", path);
-        // If the timestamp of the pm is close to "now", we don't need to
-        // bother with a check of S3. that means:
-        // one of : status modtime is close to now,
-        //  or pm.getLastUpdated() == now
-
-        // get the time in which a status modtime is considered valid
-        // in a non-auth metastore
-        long validTime =
-            ttlTimeProvider.getNow() - ttlTimeProvider.getMetadataTtl();
-        final long msModTime = msStatus.getModificationTime();
-
-        if (msModTime < validTime) {
-          LOG.debug("Metastore entry of {} is out of date, probing S3", path);
-          try {
-            S3AFileStatus s3AFileStatus = s3GetFileStatus(path,
-                key,
-                probes,
-                tombstones,
-                needEmptyDirectoryFlag);
-            // if the new status is more current than that in the metastore,
-            // it means S3 has changed and the store needs updating
-            final long s3ModTime = s3AFileStatus.getModificationTime();
-
-            if (s3ModTime > msModTime) {
-              // there's new data in S3
-              LOG.debug("S3Guard metadata for {} is outdated;"
-                      + " s3modtime={}; msModTime={} updating metastore",
-                  path, s3ModTime, msModTime);
-              // add to S3Guard
-              S3Guard.putAndReturn(metadataStore, s3AFileStatus,
-                  ttlTimeProvider);
-            } else {
-              // the modtime of the data is the same as/older than the s3guard
-              // value either an old object has been found, or the existing one
-              // was retrieved in both cases -refresh the S3Guard entry so the
-              // record's TTL is updated.
-              S3Guard.refreshEntry(metadataStore, pm, s3AFileStatus,
-                  ttlTimeProvider);
-            }
-            // return the value
-            // note that the checks for empty dir status below can be skipped
-            // because the call to s3GetFileStatus include the checks there
-            return s3AFileStatus;
-          } catch (FileNotFoundException fne) {
-            // the attempt to refresh the record failed because there was
-            // no entry. Either it is a new file not visible, or it
-            // has been deleted (and therefore S3Guard is out of sync with S3)
-            LOG.warn("Failed to find file {}. Either it is not yet visible, or "
-                + "it has been deleted.", path);
+        final long msModTime = pm.getFileStatus().getModificationTime();
+
+        S3AFileStatus s3AFileStatus;
+        try {
+          s3AFileStatus = s3GetFileStatus(path,
+              key,
+              probes,
+              tombstones,
+              needEmptyDirectoryFlag);
+        } catch (FileNotFoundException fne) {
+          LOG.trace("File Not Found from probes for {}", key, fne);
+          s3AFileStatus = null;
+        }
+        if (s3AFileStatus == null) {
+          LOG.warn("Failed to find file {}. Either it is not yet visible, or "
+              + "it has been deleted.", path);
+        } else {
+          final long s3ModTime = s3AFileStatus.getModificationTime();
+
+          if(s3ModTime > msModTime) {
+            LOG.debug("S3Guard metadata for {} is outdated;"
+                + " s3modtime={}; msModTime={} updating metastore",
+                path, s3ModTime, msModTime);
+            return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
+                ttlTimeProvider);
           }
         }
       }
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
index 1c395b2..852b49a 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
@@ -314,7 +314,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
     fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
 
     //Until this point, no exception is thrown about access
-    if (!fsKeyB.hasMetadataStore()) {
+    if (statusProbesCheckS3(fsKeyB, fileToStat)) {
       intercept(AccessDeniedException.class,
           SERVICE_AMAZON_S3_STATUS_CODE_403,
           () -> fsKeyB.listStatus(fileToStat));
@@ -324,6 +324,16 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
   }
 
   /**
+   * Do file status probes check S3?
+   * @param fs filesystem
+   * @param path file path
+   * @return true if check for a path being a file will issue a HEAD request.
+   */
+  private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) {
+    return !fs.hasMetadataStore() || !fs.allowAuthoritative(path);
+  }
+
+  /**
    * It is possible to delete directories without the proper encryption key and
    * the hierarchy above it.
    *
@@ -340,7 +350,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
     Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
     writeThenReadFile(fileToDelete, TEST_FILE_LEN);
     fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
-    if (!fsKeyB.hasMetadataStore()) {
+    if (statusProbesCheckS3(fsKeyB, fileToDelete)) {
       intercept(AccessDeniedException.class,
           SERVICE_AMAZON_S3_STATUS_CODE_403,
           () -> fsKeyB.delete(fileToDelete, false));
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index 941e701..6461ecd 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -92,7 +92,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
         whenRaw(FILE_STATUS_FILE_PROBE
             .plus(LIST_LOCATED_STATUS_LIST_OP)),
         whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
-        whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
+        whenNonauth(LIST_LOCATED_STATUS_LIST_OP
+            .plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
   }
 
   @Test
@@ -187,7 +188,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
             whenRaw(LIST_STATUS_LIST_OP
                     .plus(GET_FILE_STATUS_ON_FILE)),
             whenAuthoritative(LIST_STATUS_LIST_OP),
-            whenNonauth(LIST_STATUS_LIST_OP));
+            whenNonauth(LIST_STATUS_LIST_OP
+                .plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
   }
 
   @Test
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java
index 01a14ef..3fd70be 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java
@@ -326,7 +326,7 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase {
    * @return a number >= 0.
    */
   private int getFileStatusHeadCount() {
-    return authMode ? 0 : 0;
+    return authMode ? 0 : 1;
   }
 
   /**
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java
index 2848fb7..1555217 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java
@@ -422,7 +422,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
 
     readonlyFS.getFileStatus(emptyDir);
     // now look at a file; the outcome depends on the mode.
-    accessDeniedIf(!s3guard, () ->
+    accessDeniedIf(!guardedInAuthMode, () ->
         readonlyFS.getFileStatus(subdirFile));
 
     // irrespective of mode, the attempt to read the data will fail.
@@ -437,7 +437,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
     // This means that permissions on the file do not get checked.
     // See: HADOOP-16464.
     Optional<FSDataInputStream> optIn = accessDeniedIf(
-        !s3guard, () -> readonlyFS.open(emptyFile));
+        !guardedInAuthMode, () -> readonlyFS.open(emptyFile));
     if (optIn.isPresent()) {
       try (FSDataInputStream is = optIn.get()) {
         Assertions.assertThat(is.read())
@@ -455,8 +455,8 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
     describe("Glob Status operations");
     // baseline: the real filesystem on a subdir
     globFS(getFileSystem(), subdirFile, null, false, 1);
-    // a file fails if not guarded
-    globFS(readonlyFS, subdirFile, null, !s3guard, 1);
+    // a file fails if not in auth mode
+    globFS(readonlyFS, subdirFile, null, !guardedInAuthMode, 1);
     // empty directories don't fail.
     FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
     if (s3guard) {
@@ -554,7 +554,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
         true,
         TEXT_FILE,
         true);
-    accessDeniedIf(!s3guard,
+    accessDeniedIf(!guardedInAuthMode,
         () -> fetcher.getFileStatuses())
         .ifPresent(stats -> {
           Assertions.assertThat(stats)
@@ -619,7 +619,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
   public void checkDeleteOperations() throws Throwable {
     describe("Testing delete operations");
     readonlyFS.delete(emptyDir, true);
-    if (!s3guard) {
+    if (!authMode) {
       // to fail on HEAD
       accessDenied(() -> readonlyFS.delete(emptyFile, true));
     } else {
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
index 54b6866..0a1438d 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
@@ -154,6 +154,13 @@ public final class OperationCost {
   public static final OperationCost CREATE_FILE_NO_OVERWRITE =
       FILE_STATUS_ALL_PROBES;
 
+  /**
+   * S3Guard in non-auth mode always attempts a single file
+   * status call.
+   */
+  public static final OperationCost S3GUARD_NONAUTH_FILE_STATUS_PROBE =
+      FILE_STATUS_FILE_PROBE;
+
   /** Expected HEAD count. */
   private final int head;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org