You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by fa...@apache.org on 2017/06/08 18:09:43 UTC
hadoop git commit: HADOOP-14476 make InconsistentAmazonS3Client
usable in downstream tests (Aaron Fabbri)
Repository: hadoop
Updated Branches:
refs/heads/HADOOP-13345 6f6a61bb5 -> 30baa089d
HADOOP-14476 make InconsistentAmazonS3Client usable in downstream tests (Aaron Fabbri)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/30baa089
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/30baa089
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/30baa089
Branch: refs/heads/HADOOP-13345
Commit: 30baa089d833dbd5dc981de06b0e9a17ead038e8
Parents: 6f6a61b
Author: Aaron Fabbri <fa...@apache.org>
Authored: Wed Jun 7 13:11:15 2017 -0700
Committer: Aaron Fabbri <fa...@apache.org>
Committed: Thu Jun 8 11:09:11 2017 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/fs/s3a/Constants.java | 16 ++
.../hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +-
.../fs/s3a/InconsistentAmazonS3Client.java | 54 ++++++-
.../fs/s3a/InconsistentS3ClientFactory.java | 40 +++++
.../site/markdown/tools/hadoop-aws/testing.md | 127 +++++++++++++++-
.../hadoop/fs/s3a/ITestS3AInconsistency.java | 100 +++++++++++++
.../fs/s3a/ITestS3GuardListConsistency.java | 146 +++----------------
.../hadoop/fs/s3a/ITestS3GuardWriteBack.java | 141 ++++++++++++++++++
.../fs/s3a/InconsistentS3ClientFactory.java | 35 -----
9 files changed, 495 insertions(+), 166 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 0fb1197..1a464d0 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -435,4 +435,20 @@ public final class Constants {
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_DYNAMO
= "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore";
+
+ /**
+ * Inconsistency (visibility delay) injection settings.
+ */
+ @InterfaceStability.Unstable
+ public static final String FAIL_INJECT_INCONSISTENCY_KEY =
+ "fs.s3a.failinject.inconsistency.key.substring";
+
+ @InterfaceStability.Unstable
+ public static final String FAIL_INJECT_INCONSISTENCY_MSEC =
+ "fs.s3a.failinject.inconsistency.msec";
+
+ @InterfaceStability.Unstable
+ public static final String FAIL_INJECT_INCONSISTENCY_PROBABILITY =
+ "fs.s3a.failinject.inconsistency.probability";
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
index a329580..f33b25e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
@@ -43,7 +43,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.intOption;
public class DefaultS3ClientFactory extends Configured implements
S3ClientFactory {
- private static final Logger LOG = S3AFileSystem.LOG;
+ protected static final Logger LOG = S3AFileSystem.LOG;
@Override
public AmazonS3 createS3Client(URI name) throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
index 98ea16a..5b62c66 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java
@@ -29,9 +29,14 @@ import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3ObjectSummary;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -43,22 +48,38 @@ import java.util.Map;
* inconsistency and/or errors. Used for testing S3Guard.
* Currently only delays listing visibility, not affecting GET.
*/
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
public class InconsistentAmazonS3Client extends AmazonS3Client {
/**
* Keys containing this substring will be subject to delayed visibility.
*/
- public static final String DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME";
+ public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME";
/**
* How many seconds affected keys will be delayed from appearing in listing.
* This should probably be a config value.
*/
- public static final long DELAY_KEY_MILLIS = 5 * 1000;
+ public static final long DEFAULT_DELAY_KEY_MSEC = 5 * 1000;
+
+ public static final float DEFAULT_DELAY_KEY_PROBABILITY = 1.0f;
+
+ /** Special config value since we can't store empty strings in XML. */
+ public static final String MATCH_ALL_KEYS = "*";
private static final Logger LOG =
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
+ /** Empty string matches all keys. */
+ private String delayKeySubstring;
+
+ /** Probability to delay visibility of a matching key. */
+ private float delayKeyProbability;
+
+ /** Time in milliseconds to delay visibility of newly modified object. */
+ private long delayKeyMsec;
+
/**
* Composite of data we need to track about recently deleted objects:
* when it was deleted (same was with recently put objects) and the object
@@ -91,8 +112,25 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
private Map<String, Long> delayedPutKeys = new HashMap<>();
public InconsistentAmazonS3Client(AWSCredentialsProvider credentials,
- ClientConfiguration clientConfiguration) {
+ ClientConfiguration clientConfiguration, Configuration conf) {
super(credentials, clientConfiguration);
+ setupConfig(conf);
+ }
+
+ protected void setupConfig(Configuration conf) {
+
+ delayKeySubstring = conf.get(FAIL_INJECT_INCONSISTENCY_KEY,
+ DEFAULT_DELAY_KEY_SUBSTRING);
+ // "" is a substring of all strings, use it to match all keys.
+ if (delayKeySubstring.equals(MATCH_ALL_KEYS)) {
+ delayKeySubstring = "";
+ }
+ delayKeyProbability = conf.getFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY,
+ DEFAULT_DELAY_KEY_PROBABILITY);
+ delayKeyMsec = conf.getLong(FAIL_INJECT_INCONSISTENCY_MSEC,
+ DEFAULT_DELAY_KEY_MSEC);
+ LOG.info("Enabled with {} msec delay, substring {}, probability {}",
+ delayKeyMsec, delayKeySubstring, delayKeyProbability);
}
@Override
@@ -191,7 +229,7 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
return false;
}
long currentTime = System.currentTimeMillis();
- long deadline = enqueueTime + DELAY_KEY_MILLIS;
+ long deadline = enqueueTime + delayKeyMsec;
if (currentTime >= deadline) {
delayedDeletes.remove(key);
LOG.debug("no longer delaying {}", key);
@@ -231,11 +269,17 @@ public class InconsistentAmazonS3Client extends AmazonS3Client {
* @return true if we should delay
*/
private boolean shouldDelay(String key) {
- boolean delay = key.contains(DELAY_KEY_SUBSTRING);
+ boolean delay = key.contains(delayKeySubstring);
+ delay = delay && trueWithProbability(delayKeyProbability);
LOG.debug("{} -> {}", key, delay);
return delay;
}
+
+ private boolean trueWithProbability(float p) {
+ return Math.random() < p;
+ }
+
/**
* Record this key as something that should not become visible in
* listObject replies for a while, to simulate eventual list consistency.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
new file mode 100644
index 0000000..17d268b
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * S3 Client factory used for testing with eventual consistency fault injection.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
+
+ @Override
+ protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
+ ClientConfiguration awsConf) {
+ LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
+ return new InconsistentAmazonS3Client(credentials, awsConf, getConf());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
index 3b83f1f..0bf2261 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
@@ -691,7 +691,7 @@ use requires the presence of secret credentials, where tests may be slow,
and where finding out why something failed from nothing but the test output
is critical.
-#### Subclasses Existing Shared Base Blasses
+#### Subclasses Existing Shared Base Classes
Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable.
These set things up for testing against the object stores, provide good threadnames,
@@ -798,7 +798,7 @@ We really appreciate this — you will too.
### How to keep your credentials really safe
-Although the `auth-keys.xml` file is marged as ignored in git and subversion,
+Although the `auth-keys.xml` file is marked as ignored in git and subversion,
it is still in your source tree, and there's always that risk that it may
creep out.
@@ -813,3 +813,126 @@ using an absolute XInclude reference to it.
</configuration>
```
+
+# Failure Injection
+
+**Warning do not enable any type of failure injection in production. The
+following settings are for test development only.**
+
+## Inconsistency Injection
+
+One of the challenges with S3A integration tests is the fact that S3 is an
+eventually-consistent storage system. In practice, we rarely see delays in
+visibility of recently created objects both in listings (listStatus()) and
+when getting a single file's metadata (getFileStatus()). Since this behavior
+is rare and non-deterministic, thorough integration testing is challenging.
+
+To address this, we developed a shim layer on top of the `AmazonS3Client`
+class which artificially delays certain paths from appearing in listings.
+This is implemented in the class `InconsistentAmazonS3Client`.
+
+### Enabling the InconsistentAmazonS3CClient
+
+There are two ways of enabling the `InconsistentAmazonS3Client`: at
+config-time, or programmatically. For an example of programmatic test usage,
+see `ITestS3GuardListConsistency`.
+
+To enable the inconsistency injecting client via configuration, set the
+following class name for the client factory configuration:
+
+```xml
+<property>
+ <name>fs.s3a.s3.client.factory.impl</name>
+ <value>org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory</value>
+</property>
+```
+
+The inconsistent client works by:
+
+1. Choosing which objects will be "inconsistent" at the time the object is
+created or deleted.
+2. When listObjects is called, any keys that we have marked as
+inconsistent above will not be returned in the results (until the
+configured delay has elapsed). Similarly, deleted items may be *added* to
+missing results to delay the visibility of the delete.
+
+There are two ways of choosing which keys (filenames) will be affected: By
+substring, and by random probability.
+
+```xml
+<property>
+ <name>fs.s3a.failinject.inconsistency.key.substring</name>
+ <value>DELAY_LISTING_ME</value>
+</property>
+
+<property>
+ <name>fs.s3a.failinject.inconsistency.probability</name>
+ <value>1.0</value>
+</property>
+```
+
+By default, any object which has the substring "DELAY_LISTING_ME" in its key
+will subject to delayed visibility. For example, the path
+`s3a://my-bucket/test/DELAY_LISTING_ME/file.txt` would match this condition.
+To match all keys use the value "\*" (a single asterisk). This is a special
+value: *We don't support arbitrary wildcards.*
+
+The default probability of delaying an object is 1.0. This means that *all*
+keys that match the substring will get delayed visibility. Note that we take
+the logical *and* of the two conditions (substring matches *and* probability
+random chance occurs). Here are some example configurations:
+
+```
+| substring | probability | behavior |
+|-----------|-------------|--------------------------------------------|
+| | 0.001 | An empty <value> tag in .xml config will |
+| | | be interpreted as unset and revert to the |
+| | | default value, "DELAY_LISTING_ME" |
+| | | |
+| * | 0.001 | 1/1000 chance of *any* key being delayed. |
+| | | |
+| delay | 0.01 | 1/100 chance of any key containing "delay" |
+| | | |
+| delay | 1.0 | All keys containing substring "delay" .. |
+```
+
+You can also configure how long you want the delay in visibility to last.
+The default is 5000 milliseconds (five seconds).
+
+```xml
+<property>
+ <name>fs.s3a.failinject.inconsistency.msec</name>
+ <value>5000</value>
+</property>
+```
+
+#### Limitations of Inconsistency Injection
+
+Although we can delay visibility of an object or parent directory va the
+`InconsistentAmazonS3Client` we do not keep the key of that object from
+appearing in all prefix searches. For example, if we create the following
+object with the default configuration above, in an otherwise empty bucket:
+
+```
+ s3a://bucket/a/b/c/DELAY_LISTING_ME
+```
+
+Then the following paths will still be visible as directories:
+
+```
+ s3a://bucket/a
+ s3a://bucket/a/b
+```
+
+Whereas getFileStatus() on the following *will* be subject to delayed
+visibility (FileNotFoundException until delay has elapsed):
+
+```
+ s3a://bucket/a/b/c
+ s3a://bucket/a/b/c/DELAY_LISTING_ME
+```
+
+ In real-life S3 inconsistency, however, we expect that all the above paths
+ (including `a` and `b`) will be subject to delayed visiblity.
+
+
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java
new file mode 100644
index 0000000..eb4f70b
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.test.LambdaTestUtils;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.util.concurrent.Callable;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
+
+/**
+ * Tests S3A behavior under forced inconsistency via {@link
+ * InconsistentAmazonS3Client}.
+ *
+ * These tests are for validating expected behavior *without* S3Guard, but
+ * may also run with S3Guard enabled. For tests that validate S3Guard's
+ * consistency features, see {@link ITestS3GuardListConsistency}.
+ */
+public class ITestS3AInconsistency extends AbstractS3ATestBase {
+
+ @Override
+ protected AbstractFSContract createContract(Configuration conf) {
+ conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
+ S3ClientFactory.class);
+ conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
+ conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
+ conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
+ return new S3AContract(conf);
+ }
+
+ @Test
+ public void testGetFileStatus() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+
+ // 1. Make sure no ancestor dirs exist
+ Path dir = path("ancestor");
+ fs.delete(dir, true);
+ waitUntilDeleted(dir);
+
+ // 2. Create a descendant file, which implicitly creates ancestors
+ // This file has delayed visibility.
+ touch(getFileSystem(),
+ path("ancestor/file-" + DEFAULT_DELAY_KEY_SUBSTRING));
+
+ // 3. Assert expected behavior. If S3Guard is enabled, we should be able
+ // to get status for ancestor. If S3Guard is *not* enabled, S3A will
+ // fail to infer the existence of the ancestor since visibility of the
+ // child file is delayed, and its key prefix search will return nothing.
+ try {
+ FileStatus status = fs.getFileStatus(dir);
+ if (fs.hasMetadataStore()) {
+ assertTrue("Ancestor is dir", status.isDirectory());
+ } else {
+ fail("getFileStatus should fail due to delayed visibility.");
+ }
+ } catch (FileNotFoundException e) {
+ if (fs.hasMetadataStore()) {
+ fail("S3Guard failed to list parent of inconsistent child.");
+ }
+ LOG.info("File not found, as expected.");
+ }
+ }
+
+ private void waitUntilDeleted(final Path p) throws Exception {
+ LambdaTestUtils.eventually(30 * 1000, 1000,
+ new Callable<Void>() {
+ @Override
+ public Void call() throws Exception {
+ assertPathDoesNotExist("Dir should be deleted", p);
+ return null;
+ }
+ }
+ );
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
index 8771fd2..e06afd0 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
@@ -20,32 +20,31 @@ package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
-import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.junit.Assume;
import org.junit.Test;
import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.net.URI;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING;
+import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
/**
* Test S3Guard list consistency feature by injecting delayed listObjects()
* visibility via {@link InconsistentAmazonS3Client}.
+ *
+ * Tests here generally:
+ * 1. Use the inconsistency injection mentioned above.
+ * 2. Only run when S3Guard is enabled.
*/
public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
@@ -53,6 +52,10 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
protected AbstractFSContract createContract(Configuration conf) {
conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
S3ClientFactory.class);
+ // Other configs would break test assumptions
+ conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
+ conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
+ conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
return new S3AContract(conf);
}
@@ -75,7 +78,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
for (Path mkdir : mkdirs) {
assertTrue(fs.mkdirs(mkdir));
}
- Thread.sleep(InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
+ Thread.sleep(DEFAULT_DELAY_KEY_MSEC);
}
assertTrue("srcdirs and dstdirs must have equal length",
@@ -102,14 +105,14 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
public void testConsistentListAfterRename() throws Exception {
Path[] mkdirs = {
path("d1/f"),
- path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)
+ path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)
};
Path[] srcdirs = {path("d1")};
Path[] dstdirs = {path("d2")};
Path[] yesdirs = {path("d2"), path("d2/f"),
- path("d2/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)};
+ path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
Path[] nodirs = {path("d1"), path("d1/f"),
- path("d1/f" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)};
+ path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
getFileSystem().delete(path("d1"), true);
getFileSystem().delete(path("d2"), true);
@@ -159,7 +162,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
- path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING);
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
@@ -168,7 +171,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
for (Path path : testDirs) {
assertTrue(fs.mkdirs(path));
}
- Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
+ Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC);
for (Path path : testDirs) {
assertTrue(fs.delete(path, false));
}
@@ -199,7 +202,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
- path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING);
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
@@ -208,7 +211,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
for (Path path : testDirs) {
assertTrue(fs.mkdirs(path));
}
- Thread.sleep(2 * InconsistentAmazonS3Client.DELAY_KEY_MILLIS);
+ Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC);
assertTrue(fs.delete(testDirs[1], false));
assertTrue(fs.delete(testDirs[2], false));
@@ -222,7 +225,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
assertFalse(list.contains(path("a3/b/dir2")));
// This should fail without S3Guard, and succeed with it.
assertFalse(list.contains(
- path("a3/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)));
+ path("a3/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING)));
try {
RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
@@ -245,7 +248,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
- path("a/b/dir3-" + DELAY_KEY_SUBSTRING);
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
@@ -308,7 +311,8 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
for (; index < normalPathNum + delayedPathNum; index++) {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
- testDirs.add(path(rootDir + "/dir-" + index + DELAY_KEY_SUBSTRING));
+ testDirs.add(path(rootDir + "/dir-" + index +
+ DEFAULT_DELAY_KEY_SUBSTRING));
}
for (Path path : testDirs) {
@@ -334,8 +338,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
}
/**
- * Similar to {@link #testConsistentListStatus()}, this tests that the S3AFS
- * listFiles() call will return consistent file list.
+ * Tests that the S3AFS listFiles() call will return consistent file list.
*/
@Test
public void testConsistentListFiles() throws Exception {
@@ -399,7 +402,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
for (; index < normalFileNum + delayedFileNum; index++) {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
- fileNames.add("file-" + index + "-" + DELAY_KEY_SUBSTRING);
+ fileNames.add("file-" + index + "-" + DEFAULT_DELAY_KEY_SUBSTRING);
}
int filesAndEmptyDirectories = 0;
@@ -449,107 +452,4 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
}
}
- private static S3AFileSystem asS3AFS(FileSystem fs) {
- assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem);
- return (S3AFileSystem)fs;
- }
-
- /** Create a separate S3AFileSystem instance for testing. */
- private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard,
- boolean authoritativeMeta)
- throws IOException {
- Configuration conf;
-
- // Create a FileSystem that is S3-backed only
- conf = createConfiguration();
- S3ATestUtils.disableFilesystemCaching(conf);
- if (disableS3Guard) {
- conf.set(Constants.S3_METADATA_STORE_IMPL,
- Constants.S3GUARD_METASTORE_NULL);
- } else {
- S3ATestUtils.maybeEnableS3Guard(conf);
- conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta);
- }
- FileSystem fs = FileSystem.get(fsURI, conf);
- return asS3AFS(fs);
- }
-
- private static void assertPathDoesntExist(FileSystem fs, Path p)
- throws IOException {
- try {
- FileStatus s = fs.getFileStatus(p);
- } catch (FileNotFoundException e) {
- return;
- }
- fail("Path should not exist: " + p);
- }
-
- /**
- * In listStatus(), when S3Guard is enabled, the full listing for a
- * directory is "written back" to the MetadataStore before the listing is
- * returned. Currently this "write back" behavior occurs when
- * fs.s3a.metadatastore.authoritative is true. This test validates this
- * behavior.
- * @throws Exception on failure
- */
- @Test
- public void testListStatusWriteBack() throws Exception {
- Assume.assumeTrue(getFileSystem().hasMetadataStore());
-
- Configuration conf;
- Path directory = path("ListStatusWriteBack");
-
- // "raw" S3AFileSystem without S3Guard
- S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false);
-
- // Another with S3Guard and write-back disabled
- S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false);
-
- // Another S3Guard and write-back enabled
- S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true);
-
- // delete the existing directory (in case of last test failure)
- noS3Guard.delete(directory, true);
- // Create a directory on S3 only
- noS3Guard.mkdirs(new Path(directory, "OnS3"));
- // Create a directory on both S3 and metadata store
- Path p = new Path(directory, "OnS3AndMS");
- assertPathDoesntExist(noWriteBack, p);
- noWriteBack.mkdirs(p);
-
- FileStatus[] fsResults;
- DirListingMetadata mdResults;
-
- // FS should return both even though S3Guard is not writing back to MS
- fsResults = noWriteBack.listStatus(directory);
- assertEquals("Filesystem enabled S3Guard without write back should have "
- + "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
- 2, fsResults.length);
-
- // Metadata store without write-back should still only contain /OnS3AndMS,
- // because newly discovered /OnS3 is not written back to metadata store
- mdResults = noWriteBack.getMetadataStore().listChildren(directory);
- assertEquals("Metadata store without write back should still only know "
- + "about /OnS3AndMS, but it has: " + mdResults,
- 1, mdResults.numEntries());
-
- // FS should return both (and will write it back)
- fsResults = yesWriteBack.listStatus(directory);
- assertEquals("Filesystem enabled S3Guard with write back should have "
- + " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
- 2, fsResults.length);
-
- // Metadata store with write-back should contain both because the newly
- // discovered /OnS3 should have been written back to metadata store
- mdResults = yesWriteBack.getMetadataStore().listChildren(directory);
- assertEquals("Unexpected number of results from metadata store. "
- + "Should have /OnS3 and /OnS3AndMS: " + mdResults,
- 2, mdResults.numEntries());
-
- // If we don't clean this up, the next test run will fail because it will
- // have recorded /OnS3 being deleted even after it's written to noS3Guard.
- getFileSystem().getMetadataStore().forgetMetadata(
- new Path(directory, "OnS3"));
- }
-
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java
new file mode 100644
index 0000000..a63b696
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+
+/**
+ * Test cases that validate S3Guard's behavior for writing things like
+ * directory listings back to the MetadataStore.
+ */
+public class ITestS3GuardWriteBack extends AbstractS3ATestBase {
+
+ /**
+ * In listStatus(), when S3Guard is enabled, the full listing for a
+ * directory is "written back" to the MetadataStore before the listing is
+ * returned. Currently this "write back" behavior occurs when
+ * fs.s3a.metadatastore.authoritative is true. This test validates this
+ * behavior.
+ * @throws Exception on failure
+ */
+ @Test
+ public void testListStatusWriteBack() throws Exception {
+ Assume.assumeTrue(getFileSystem().hasMetadataStore());
+
+ Path directory = path("ListStatusWriteBack");
+
+ // "raw" S3AFileSystem without S3Guard
+ S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false);
+
+ // Another with S3Guard and write-back disabled
+ S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false);
+
+ // Another S3Guard and write-back enabled
+ S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true);
+
+ // delete the existing directory (in case of last test failure)
+ noS3Guard.delete(directory, true);
+ // Create a directory on S3 only
+ noS3Guard.mkdirs(new Path(directory, "OnS3"));
+ // Create a directory on both S3 and metadata store
+ Path p = new Path(directory, "OnS3AndMS");
+ assertPathDoesntExist(noWriteBack, p);
+ noWriteBack.mkdirs(p);
+
+ FileStatus[] fsResults;
+ DirListingMetadata mdResults;
+
+ // FS should return both even though S3Guard is not writing back to MS
+ fsResults = noWriteBack.listStatus(directory);
+ assertEquals("Filesystem enabled S3Guard without write back should have "
+ + "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
+ 2, fsResults.length);
+
+ // Metadata store without write-back should still only contain /OnS3AndMS,
+ // because newly discovered /OnS3 is not written back to metadata store
+ mdResults = noWriteBack.getMetadataStore().listChildren(directory);
+ assertEquals("Metadata store without write back should still only know "
+ + "about /OnS3AndMS, but it has: " + mdResults,
+ 1, mdResults.numEntries());
+
+ // FS should return both (and will write it back)
+ fsResults = yesWriteBack.listStatus(directory);
+ assertEquals("Filesystem enabled S3Guard with write back should have "
+ + " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
+ 2, fsResults.length);
+
+ // Metadata store with write-back should contain both because the newly
+ // discovered /OnS3 should have been written back to metadata store
+ mdResults = yesWriteBack.getMetadataStore().listChildren(directory);
+ assertEquals("Unexpected number of results from metadata store. "
+ + "Should have /OnS3 and /OnS3AndMS: " + mdResults,
+ 2, mdResults.numEntries());
+
+ // If we don't clean this up, the next test run will fail because it will
+ // have recorded /OnS3 being deleted even after it's written to noS3Guard.
+ getFileSystem().getMetadataStore().forgetMetadata(
+ new Path(directory, "OnS3"));
+ }
+
+ /** Create a separate S3AFileSystem instance for testing. */
+ private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard,
+ boolean authoritativeMeta) throws IOException {
+ Configuration conf;
+
+ // Create a FileSystem that is S3-backed only
+ conf = createConfiguration();
+ S3ATestUtils.disableFilesystemCaching(conf);
+ if (disableS3Guard) {
+ conf.set(Constants.S3_METADATA_STORE_IMPL,
+ Constants.S3GUARD_METASTORE_NULL);
+ } else {
+ S3ATestUtils.maybeEnableS3Guard(conf);
+ conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta);
+ }
+ FileSystem fs = FileSystem.get(fsURI, conf);
+ return asS3AFS(fs);
+ }
+
+ private static S3AFileSystem asS3AFS(FileSystem fs) {
+ assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem);
+ return (S3AFileSystem)fs;
+ }
+
+ private static void assertPathDoesntExist(FileSystem fs, Path p)
+ throws IOException {
+ try {
+ FileStatus s = fs.getFileStatus(p);
+ } catch (FileNotFoundException e) {
+ return;
+ }
+ fail("Path should not exist: " + p);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30baa089/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
deleted file mode 100644
index 88a9c78..0000000
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.s3a;
-
-import com.amazonaws.ClientConfiguration;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.services.s3.AmazonS3;
-
-/**
- * S3 Client factory used for testing with eventual consistency fault injection.
- */
-public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
-
- @Override
- protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
- ClientConfiguration awsConf) {
- return new InconsistentAmazonS3Client(credentials, awsConf);
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org