You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2017/08/12 21:00:03 UTC
[3/3] hadoop git commit: HADOOP-14749. review s3guard docs & code
prior to merge. Contributed by Steve Loughran
HADOOP-14749. review s3guard docs & code prior to merge.
Contributed by Steve Loughran
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e531ae25
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e531ae25
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e531ae25
Branch: refs/heads/HADOOP-13345
Commit: e531ae251fac73f727f457039f3e16fb2a10069a
Parents: b114f24
Author: Steve Loughran <st...@apache.org>
Authored: Sat Aug 12 21:59:11 2017 +0100
Committer: Steve Loughran <st...@apache.org>
Committed: Sat Aug 12 21:59:11 2017 +0100
----------------------------------------------------------------------
hadoop-tools/hadoop-aws/pom.xml | 16 +-
.../java/org/apache/hadoop/fs/s3a/Listing.java | 6 +-
.../org/apache/hadoop/fs/s3a/S3AFileStatus.java | 11 -
.../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 72 +--
.../hadoop/fs/s3a/S3AInstrumentation.java | 2 +-
.../org/apache/hadoop/fs/s3a/Statistic.java | 2 +-
.../org/apache/hadoop/fs/s3a/UploadInfo.java | 4 +-
.../fs/s3a/s3guard/DirListingMetadata.java | 21 +-
.../fs/s3a/s3guard/DynamoDBClientFactory.java | 3 +-
.../fs/s3a/s3guard/DynamoDBMetadataStore.java | 94 ++--
.../fs/s3a/s3guard/LocalMetadataStore.java | 13 +-
.../hadoop/fs/s3a/s3guard/MetadataStore.java | 5 +-
.../s3guard/MetadataStoreListFilesIterator.java | 5 +-
.../fs/s3a/s3guard/NullMetadataStore.java | 9 -
.../hadoop/fs/s3a/s3guard/PathMetadata.java | 6 +
.../PathMetadataDynamoDBTranslation.java | 6 +-
.../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 87 ++--
.../hadoop/fs/s3a/s3guard/S3GuardTool.java | 76 +--
.../hadoop/fs/s3a/s3guard/package-info.java | 2 +-
.../src/site/markdown/tools/hadoop-aws/index.md | 2 +-
.../site/markdown/tools/hadoop-aws/s3guard.md | 485 +++++++++++--------
.../site/markdown/tools/hadoop-aws/testing.md | 213 ++++++--
.../apache/hadoop/fs/s3a/S3ATestConstants.java | 2 +-
.../org/apache/hadoop/fs/s3a/S3ATestUtils.java | 47 +-
.../fs/s3a/s3guard/AbstractMSContract.java | 4 +-
.../s3guard/AbstractS3GuardToolTestBase.java | 161 ++++++
.../s3a/s3guard/DynamoDBLocalClientFactory.java | 16 +-
.../s3a/s3guard/ITestS3GuardConcurrentOps.java | 19 +-
.../s3a/s3guard/ITestS3GuardToolDynamoDB.java | 79 +--
.../fs/s3a/s3guard/ITestS3GuardToolLocal.java | 68 +--
.../fs/s3a/s3guard/MetadataStoreTestBase.java | 51 +-
.../fs/s3a/s3guard/S3GuardToolTestBase.java | 159 ------
.../fs/s3a/s3guard/TestDirListingMetadata.java | 12 +-
.../s3a/s3guard/TestDynamoDBMetadataStore.java | 71 ++-
.../fs/s3a/s3guard/TestLocalMetadataStore.java | 12 +-
.../fs/s3a/s3guard/TestNullMetadataStore.java | 9 +-
.../TestPathMetadataDynamoDBTranslation.java | 20 +-
.../hadoop/fs/s3a/s3guard/TestS3Guard.java | 9 +-
.../src/test/resources/log4j.properties | 2 +-
39 files changed, 1114 insertions(+), 767 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index 62371c3..4161a50 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -170,7 +170,7 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
- <!-- s3guard -->
+ <!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
@@ -216,7 +216,7 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
- <!-- s3guard -->
+ <!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
@@ -262,7 +262,7 @@
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
- <!-- s3guard -->
+ <!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
@@ -289,7 +289,7 @@
</properties>
</profile>
- <!-- Turn on s3guard tests-->
+ <!-- Turn on S3Guard tests-->
<profile>
<id>s3guard</id>
<activation>
@@ -302,7 +302,7 @@
</properties>
</profile>
- <!-- Switch to dynamo DB for s3guard. Has no effect unless s3guard is enabled -->
+ <!-- Switch to DynamoDB for S3Guard. Has no effect unless S3Guard is enabled -->
<profile>
<id>dynamo</id>
<activation>
@@ -315,7 +315,7 @@
</properties>
</profile>
- <!-- Switch to DynamoDBLocal for s3guard. Has no effect unless s3guard is enabled -->
+ <!-- Switch to DynamoDBLocal for S3Guard. Has no effect unless S3Guard is enabled -->
<profile>
<id>dynamodblocal</id>
<activation>
@@ -328,8 +328,8 @@
</properties>
</profile>
- <!-- Switch s3guard from Authoritative=false to true
- Has no effect unless s3guard is enabled -->
+ <!-- Switch S3Guard from Authoritative=false to true
+ Has no effect unless S3Guard is enabled -->
<profile>
<id>non-auth</id>
<activation>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
index 5299c6c..8efa218 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
@@ -70,7 +70,8 @@ public class Listing {
* @return the file status iterator
*/
ProvidedFileStatusIterator createProvidedFileStatusIterator(
- FileStatus[] fileStatuses, PathFilter filter,
+ FileStatus[] fileStatuses,
+ PathFilter filter,
FileStatusAcceptor acceptor) {
return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor);
}
@@ -356,7 +357,8 @@ public class Listing {
* If there is data in the local filtered list, return true.
* Else: request more data util that condition is met, or there
* is no more remote listing data.
- * Lastly, return true if the provided file status has left items.
+ * Lastly, return true if the {@code providedStatusIterator}
+ * has left items.
* @return true if a call to {@link #next()} will succeed.
* @throws IOException
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
index eee3c13..be08afe 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
@@ -105,17 +105,6 @@ public class S3AFileStatus extends FileStatus {
return isEmptyDirectory;
}
- /**
- * Should not be called by clients. Only used so {@link org.apache.hadoop
- * .fs.s3a.s3guard.MetadataStore} can maintain this flag when caching
- * FileStatuses on behalf of s3a.
- * @param value for directories: TRUE / FALSE if known empty/not-empty,
- * UNKNOWN otherwise
- */
- public void setIsEmptyDirectory(Tristate value) {
- isEmptyDirectory = value;
- }
-
/** Compare if this object is equal to another object.
* @param o the object to be compared.
* @return true if two file status has the same path name; false if not.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 95fe94f..c22383a 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -295,6 +295,7 @@ public class S3AFileSystem extends FileSystem {
} catch (AmazonClientException e) {
throw translateException("initializing ", new Path(name), e);
}
+
}
/**
@@ -838,8 +839,8 @@ public class S3AFileSystem extends FileSystem {
srcPaths = new HashSet<>(); // srcPaths need fast look up before put
dstMetas = new ArrayList<>();
}
- // HADOOP-13761 s3guard: retries when source paths are not visible yet
- // TODO s3guard: performance: mark destination dirs as authoritative
+ // TODO S3Guard HADOOP-13761: retries when source paths are not visible yet
+ // TODO S3Guard: performance: mark destination dirs as authoritative
// Ok! Time to start
if (srcStatus.isFile()) {
@@ -904,6 +905,8 @@ public class S3AFileSystem extends FileSystem {
copyFile(key, newDstKey, length);
if (hasMetadataStore()) {
+ // with a metadata store, the object entries need to be updated,
+ // including, potentially, the ancestors
Path childSrc = keyToQualifiedPath(key);
Path childDst = keyToQualifiedPath(newDstKey);
if (objectRepresentsDirectory(key, length)) {
@@ -960,12 +963,18 @@ public class S3AFileSystem extends FileSystem {
/**
* Does this Filesystem have a metadata store?
- * @return true if the FS has been instantiated with a metadata store
+ * @return true iff the FS has been instantiated with a metadata store
*/
public boolean hasMetadataStore() {
return !S3Guard.isNullMetadataStore(metadataStore);
}
+ /**
+ * Get the metadata store.
+ * This will always be non-null, but may be bound to the
+ * {@code NullMetadataStore}.
+ * @return the metadata store of this FS instance
+ */
@VisibleForTesting
MetadataStore getMetadataStore() {
return metadataStore;
@@ -1448,7 +1457,7 @@ public class S3AFileSystem extends FileSystem {
if (status.isEmptyDirectory() == Tristate.TRUE) {
LOG.debug("Deleting fake empty directory {}", key);
- // HADOOP-13761 s3guard: retries here
+ // HADOOP-13761 S3Guard: retries here
deleteObject(key);
metadataStore.delete(f);
instrumentation.directoryDeleted();
@@ -1466,7 +1475,7 @@ public class S3AFileSystem extends FileSystem {
LOG.debug("Got object to delete {}", summary.getKey());
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
- // HADOOP-13761 s3guard: retries
+ // TODO: HADOOP-13761 S3Guard: retries
removeKeys(keys, true, false);
}
}
@@ -1475,7 +1484,7 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects);
} else {
if (!keys.isEmpty()) {
- // HADOOP-13761 s3guard: retries
+ // TODO: HADOOP-13761 S3Guard: retries
removeKeys(keys, false, false);
}
break;
@@ -1742,7 +1751,7 @@ public class S3AFileSystem extends FileSystem {
* Return a file status object that represents the path.
* @param f The path we want information from
* @return a FileStatus object
- * @throws java.io.FileNotFoundException when the path does not exist;
+ * @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
*/
public FileStatus getFileStatus(final Path f) throws IOException {
@@ -1750,12 +1759,12 @@ public class S3AFileSystem extends FileSystem {
}
/**
- * Internal version of getFileStatus().
+ * Internal version of {@link #getFileStatus(Path)}.
* @param f The path we want information from
* @param needEmptyDirectoryFlag if true, implementation will calculate
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
* @return a S3AFileStatus object
- * @throws java.io.FileNotFoundException when the path does not exist;
+ * @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
*/
@VisibleForTesting
@@ -1800,19 +1809,25 @@ public class S3AFileSystem extends FileSystem {
} catch (FileNotFoundException e) {
return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
}
+ // entry was found, save in S3Guard
return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation);
+ } else {
+ // there was no entry in S3Guard
+ // retrieve the data and update the metadata store in the process.
+ return S3Guard.putAndReturn(metadataStore,
+ s3GetFileStatus(path, key, tombstones), instrumentation);
}
- return S3Guard.putAndReturn(metadataStore,
- s3GetFileStatus(path, key, tombstones), instrumentation);
}
/**
- * Raw get file status that only uses S3. Used to implement
- * innerGetFileStatus, and for direct management of empty directory blobs.
+ * Raw {@code getFileStatus} that talks direct to S3.
+ * Used to implement {@link #innerGetFileStatus(Path, boolean)},
+ * and for direct management of empty directory blobs.
* @param path Qualified path
* @param key Key string for the path
* @return Status
- * @throws IOException
+ * @throws FileNotFoundException when the path does not exist
+ * @throws IOException on other problems.
*/
private S3AFileStatus s3GetFileStatus(final Path path, String key,
Set<Path> tombstones) throws IOException {
@@ -1826,10 +1841,10 @@ public class S3AFileSystem extends FileSystem {
} else {
LOG.debug("Found exact file: normal file");
return new S3AFileStatus(meta.getContentLength(),
- dateToLong(meta.getLastModified()),
- path,
- getDefaultBlockSize(path),
- username);
+ dateToLong(meta.getLastModified()),
+ path,
+ getDefaultBlockSize(path),
+ username);
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 404) {
@@ -1911,7 +1926,8 @@ public class S3AFileSystem extends FileSystem {
throw new FileNotFoundException("No such file or directory: " + path);
}
- /** Helper function to determine if a collection of paths is empty
+ /**
+ * Helper function to determine if a collection of paths is empty
* after accounting for tombstone markers (if provided).
* @param keys Collection of path (prefixes / directories or keys).
* @param tombstones Set of tombstone markers, or null if not applicable.
@@ -1932,7 +1948,8 @@ public class S3AFileSystem extends FileSystem {
return true;
}
- /** Helper function to determine if a collection of object summaries is empty
+ /**
+ * Helper function to determine if a collection of object summaries is empty
* after accounting for tombstone markers (if provided).
* @param summaries Collection of objects as returned by listObjects.
* @param tombstones Set of tombstone markers, or null if not applicable.
@@ -1945,14 +1962,12 @@ public class S3AFileSystem extends FileSystem {
return summaries.isEmpty();
}
Collection<String> stringCollection = new ArrayList<>(summaries.size());
- for(S3ObjectSummary summary : summaries) {
+ for (S3ObjectSummary summary : summaries) {
stringCollection.add(summary.getKey());
}
return isEmptyOfKeys(stringCollection, tombstones);
}
-
-
/**
* Raw version of {@link FileSystem#exists(Path)} which uses S3 only:
* S3Guard MetadataStore, if any, will be skipped.
@@ -1962,7 +1977,8 @@ public class S3AFileSystem extends FileSystem {
Path path = qualify(f);
String key = pathToKey(path);
try {
- return s3GetFileStatus(path, key, null) != null;
+ s3GetFileStatus(path, key, null);
+ return true;
} catch (FileNotFoundException e) {
return false;
}
@@ -2247,7 +2263,7 @@ public class S3AFileSystem extends FileSystem {
deleteUnnecessaryFakeDirectories(p.getParent());
Preconditions.checkArgument(length >= 0, "content length is negative");
- // See note about failure semantics in s3guard.md doc.
+ // See note about failure semantics in S3Guard documentation
try {
if (hasMetadataStore()) {
S3Guard.addAncestors(metadataStore, p, username);
@@ -2257,7 +2273,7 @@ public class S3AFileSystem extends FileSystem {
S3Guard.putAndReturn(metadataStore, status, instrumentation);
}
} catch (IOException e) {
- LOG.error("s3guard: Error updating MetadataStore for write to {}:",
+ LOG.error("S3Guard: Error updating MetadataStore for write to {}:",
key, e);
instrumentation.errorIgnored();
}
@@ -2593,6 +2609,7 @@ public class S3AFileSystem extends FileSystem {
cachedFilesIterator = listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
+ // metadata listing is authoritative, so return it directly
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
}
@@ -2606,8 +2623,7 @@ public class S3AFileSystem extends FileSystem {
tombstones);
}
} catch (AmazonClientException e) {
- // TODO s3guard:
- // 1. retry on file not found exception
+ // TODO S3Guard: retry on file not found exception
throw translateException("listFiles", path, e);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
index 77804fe..c4b4866 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
@@ -91,7 +91,7 @@ public class S3AInstrumentation {
private final Map<String, MutableCounterLong> streamMetrics =
new HashMap<>(30);
- /** Instantiate this without caring whether or not s3guard is enabled. */
+ /** Instantiate this without caring whether or not S3Guard is enabled. */
private final S3GuardInstrumentation s3GuardInstrumentation
= new S3GuardInstrumentation();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
index bfc3d35..777c161 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -142,7 +142,7 @@ public enum Statistic {
STREAM_WRITE_QUEUE_DURATION("stream_write_queue_duration",
"Total queue duration of all block uploads"),
- // S3guard stats
+ // S3Guard stats
S3GUARD_METADATASTORE_PUT_PATH_REQUEST(
"s3guard_metadatastore_put_path_request",
"s3guard metadata store put one metadata path request"),
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
index b6d31af0..238cd97 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
@@ -24,8 +24,8 @@ import com.amazonaws.services.s3.transfer.Upload;
* Simple struct that contains information about a S3 upload.
*/
public class UploadInfo {
- private Upload upload;
- private long length;
+ private final Upload upload;
+ private final long length;
public UploadInfo(Upload upload, long length) {
this.upload = upload;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
index fe4f9a1..bcbdced 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
@@ -18,11 +18,6 @@
package org.apache.hadoop.fs.s3a.s3guard;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
@@ -34,6 +29,11 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.google.common.base.Preconditions;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@@ -61,7 +61,8 @@ public class DirListingMetadata {
* Create a directory listing metadata container.
*
* @param path Path of the directory. If this path has a host component, then
- * all paths added later via put() must also have the same host.
+ * all paths added later via {@link #put(FileStatus)} must also have
+ * the same host.
* @param listing Entries in the directory.
* @param isAuthoritative true iff listing is the full contents of the
* directory, and the calling client reports that this may be cached as
@@ -257,6 +258,7 @@ public class DirListingMetadata {
prettyPrint(sb);
return sb.toString();
}
+
/**
* Checks that child path is valid.
* @param childPath path to check.
@@ -284,12 +286,15 @@ public class DirListingMetadata {
}
/**
- * For Path's that are handed in directly, we assert they are in consistent
+ * For Paths that are handed in directly, we assert they are in consistent
* format with checkPath(). For paths that are supplied embedded in
- * FileStatus', we attempt to fill in missing scheme and host, when this
+ * FileStatus, we attempt to fill in missing scheme and host, when this
* DirListingMetadata is associated with one.
*
* @return Path suitable for consistent hashtable lookups
+ * @throws NullPointerException null status argument
+ * @throws IllegalArgumentException bad status values or failure to
+ * create a URI.
*/
private Path childStatusToPathKey(FileStatus status) {
Path p = status.getPath();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
index 0c88230..5005bc0 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
@@ -71,7 +71,8 @@ public interface DynamoDBClientFactory extends Configurable {
@Override
public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
throws IOException {
- assert getConf() != null : "Should have been configured before usage";
+ Preconditions.checkNotNull(getConf(),
+ "Should have been configured before usage");
final Configuration conf = getConf();
final AWSCredentialsProvider credentials =
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 97c9135..322361c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs.s3a.s3guard;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.URI;
@@ -50,34 +51,32 @@ import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
import com.amazonaws.services.dynamodbv2.model.ResourceInUseException;
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
-
import com.amazonaws.services.dynamodbv2.model.WriteRequest;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.s3a.Constants;
-import org.apache.hadoop.fs.s3a.S3AInstrumentation;
-import org.apache.hadoop.fs.s3a.Tristate;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AInstrumentation;
+import org.apache.hadoop.fs.s3a.Tristate;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
-import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*;
import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.S3AUtils.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
+import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*;
/**
* DynamoDBMetadataStore is a {@link MetadataStore} that persists
@@ -159,7 +158,7 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
* the S3 bucket that hosts the S3A instance. During initialization, it checks
* the location of the S3 bucket and creates a DynamoDB client connected to the
* same region. The region may also be set explicitly by setting the config
- * parameter fs.s3a.s3guard.ddb.endpoint with the corresponding endpoint.
+ * parameter {@code fs.s3a.s3guard.ddb.region} to the corresponding region.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
@@ -203,6 +202,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
* @param conf the file system configuration
* @param s3Region region of the associated S3 bucket (if any).
* @return DynamoDB instance.
+ * @throws IOException I/O error.
*/
private static DynamoDB createDynamoDB(Configuration conf, String s3Region)
throws IOException {
@@ -255,11 +255,18 @@ public class DynamoDBMetadataStore implements MetadataStore {
* not explicitly relate to any S3 bucket, which be nonexistent.
*
* This is used to operate the metadata store directly beyond the scope of the
- * S3AFileSystem integration, e.g. command line tools. Generally you should
- * use {@link #initialize(FileSystem)} if given an initialized S3 file system.
+ * S3AFileSystem integration, e.g. command line tools.
+ * Generally, callers should use {@link #initialize(FileSystem)}
+ * with an initialized {@code S3AFileSystem} instance.
+ *
+ * Without a filesystem to act as a reference point, the configuration itself
+ * must declare the table name and region in the
+ * {@link Constants#S3GUARD_DDB_TABLE_NAME_KEY} and
+ * {@link Constants#S3GUARD_DDB_REGION_KEY} respectively.
*
* @see #initialize(FileSystem)
* @throws IOException if there is an error
+ * @throws IllegalArgumentException if the configuration is incomplete
*/
@Override
public void initialize(Configuration config) throws IOException {
@@ -267,10 +274,10 @@ public class DynamoDBMetadataStore implements MetadataStore {
// use the bucket as the DynamoDB table name if not specified in config
tableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY);
Preconditions.checkArgument(!StringUtils.isEmpty(tableName),
- "No DynamoDB table name configured!");
+ "No DynamoDB table name configured");
region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
Preconditions.checkArgument(!StringUtils.isEmpty(region),
- "No DynamoDB region configured!");
+ "No DynamoDB region configured");
dynamoDB = createDynamoDB(conf, region);
username = UserGroupInformation.getCurrentUser().getShortUserName();
@@ -279,6 +286,12 @@ public class DynamoDBMetadataStore implements MetadataStore {
initTable();
}
+ /**
+ * Set retry policy. This is driven by the value of
+ * {@link Constants#S3GUARD_DDB_MAX_RETRIES} with an exponential backoff
+ * between each attempt of {@link #MIN_RETRY_SLEEP_MSEC} milliseconds.
+ * @param config
+ */
private void setMaxRetries(Configuration config) {
int maxRetries = config.getInt(S3GUARD_DDB_MAX_RETRIES,
S3GUARD_DDB_MAX_RETRIES_DEFAULT);
@@ -297,6 +310,14 @@ public class DynamoDBMetadataStore implements MetadataStore {
innerDelete(path, false);
}
+ /**
+ * Inner delete option, action based on the {@code tombstone} flag.
+ * No tombstone: delete the entry. Tombstone: create a tombstone entry.
+ * There is no check as to whether the entry exists in the table first.
+ * @param path path to delete
+ * @param tombstone flag to create a tombstone marker
+ * @throws IOException I/O error.
+ */
private void innerDelete(Path path, boolean tombstone)
throws IOException {
path = checkPath(path);
@@ -414,6 +435,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
path = checkPath(path);
LOG.debug("Listing table {} in region {}: {}", tableName, region, path);
+ // find the children in the table
try {
final QuerySpec spec = new QuerySpec()
.withHashKey(pathToParentKeyAttribute(path))
@@ -432,10 +454,12 @@ public class DynamoDBMetadataStore implements MetadataStore {
? null
: new DirListingMetadata(path, metas, false);
} catch (AmazonClientException e) {
+ // failure, including the path not being present
throw translateException("listChildren", path, e);
}
}
+ // build the list of all parent entries.
Collection<PathMetadata> completeAncestry(
Collection<PathMetadata> pathsToCreate) {
// Key on path to allow fast lookup
@@ -677,7 +701,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
return;
}
LOG.info("Deleting DynamoDB table {} in region {}", tableName, region);
- Preconditions.checkNotNull(dynamoDB, "Not connected to Dynamo");
+ Preconditions.checkNotNull(dynamoDB, "Not connected to DynamoDB");
try {
table.delete();
table.waitForDelete();
@@ -766,22 +790,23 @@ public class DynamoDBMetadataStore implements MetadataStore {
LOG.debug("Binding to table {}", tableName);
final String status = table.describe().getTableStatus();
switch (status) {
- case "CREATING":
- case "UPDATING":
- LOG.debug("Table {} in region {} is being created/updated. This may "
- + "indicate that the table is being operated by another "
- + "concurrent thread or process. Waiting for active...",
- tableName, region);
- waitForTableActive(table);
- break;
- case "DELETING":
- throw new IOException("DynamoDB table '" + tableName + "' is being "
- + "deleted in region " + region);
- case "ACTIVE":
- break;
- default:
- throw new IOException("Unknown DynamoDB table status " + status
- + ": tableName='" + tableName + "', region=" + region);
+ case "CREATING":
+ case "UPDATING":
+ LOG.debug("Table {} in region {} is being created/updated. This may"
+ + " indicate that the table is being operated by another "
+ + "concurrent thread or process. Waiting for active...",
+ tableName, region);
+ waitForTableActive(table);
+ break;
+ case "DELETING":
+ throw new FileNotFoundException("DynamoDB table "
+ + "'" + tableName + "' is being "
+ + "deleted in region " + region);
+ case "ACTIVE":
+ break;
+ default:
+ throw new IOException("Unknown DynamoDB table status " + status
+ + ": tableName='" + tableName + "', region=" + region);
}
final Item versionMarker = getVersionMarkerItem();
@@ -799,7 +824,8 @@ public class DynamoDBMetadataStore implements MetadataStore {
createTable(capacity);
} else {
- throw new IOException("DynamoDB table '" + tableName + "' does not "
+ throw new FileNotFoundException("DynamoDB table "
+ + "'" + tableName + "' does not "
+ "exist in region " + region + "; auto-creation is turned off");
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index bd1f8df..a6492f9 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -37,12 +37,13 @@ import java.util.Map;
/**
* This is a local, in-memory, implementation of MetadataStore.
- * This is *not* a coherent cache across processes. It is only
+ * This is <i>not</i> a coherent cache across processes. It is only
* locally-coherent.
*
- * The purpose of this is for unit testing. It could also be used to accelerate
- * local-only operations where only one process is operating on a given object
- * store, or multiple processes are accessing a read-only storage bucket.
+ * The purpose of this is for unit and integration testing.
+ * It could also be used to accelerate local-only operations where only one
+ * process is operating on a given object store, or multiple processes are
+ * accessing a read-only storage bucket.
*
* This MetadataStore does not enforce filesystem rules such as disallowing
* non-recursive removal of non-empty directories. It is assumed the caller
@@ -53,6 +54,10 @@ public class LocalMetadataStore implements MetadataStore {
public static final Logger LOG = LoggerFactory.getLogger(MetadataStore.class);
// TODO HADOOP-13649: use time instead of capacity for eviction.
public static final int DEFAULT_MAX_RECORDS = 128;
+
+ /**
+ * Maximum number of records.
+ */
public static final String CONF_MAX_RECORDS =
"fs.metadatastore.local.max_records";
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 76d5cdd..dd8077b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -48,7 +48,6 @@ public interface MetadataStore extends Closeable {
/**
* Performs one-time initialization of the metadata store via configuration.
- *
* @see #initialize(FileSystem)
* @param conf Configuration.
* @throws IOException if there is an error
@@ -149,8 +148,8 @@ public interface MetadataStore extends Closeable {
* ().
* @throws IOException if there is an error
*/
- void move(Collection<Path> pathsToDelete, Collection<PathMetadata>
- pathsToCreate) throws IOException;
+ void move(Collection<Path> pathsToDelete,
+ Collection<PathMetadata> pathsToCreate) throws IOException;
/**
* Saves metadata for exactly one path.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
index 272b1f4..679f767 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
@@ -28,13 +28,14 @@ import java.util.Queue;
import java.util.Set;
import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index c6a4507..41dd61b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -43,22 +43,18 @@ public class NullMetadataStore implements MetadataStore {
@Override
public void close() throws IOException {
- return;
}
@Override
public void delete(Path path) throws IOException {
- return;
}
@Override
public void forgetMetadata(Path path) throws IOException {
- return;
}
@Override
public void deleteSubtree(Path path) throws IOException {
- return;
}
@Override
@@ -80,22 +76,18 @@ public class NullMetadataStore implements MetadataStore {
@Override
public void move(Collection<Path> pathsToDelete,
Collection<PathMetadata> pathsToCreate) throws IOException {
- return;
}
@Override
public void put(PathMetadata meta) throws IOException {
- return;
}
@Override
public void put(Collection<PathMetadata> meta) throws IOException {
- return;
}
@Override
public void put(DirListingMetadata meta) throws IOException {
- return;
}
@Override
@@ -104,7 +96,6 @@ public class NullMetadataStore implements MetadataStore {
@Override
public void prune(long modTime) {
- return;
}
@Override
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
index d799e16..d8cb447 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
@@ -37,6 +37,11 @@ public class PathMetadata {
private Tristate isEmptyDirectory;
private boolean isDeleted;
+ /**
+ * Create a tombstone from the current time.
+ * @param path path to tombstone
+ * @return the entry.
+ */
public static PathMetadata tombstone(Path path) {
long now = System.currentTimeMillis();
FileStatus status = new FileStatus(0, false, 0, 0, now, path);
@@ -75,6 +80,7 @@ public class PathMetadata {
}
/**
+ * Query if a directory is empty.
* @return Tristate.TRUE if this is known to be an empty directory,
* Tristate.FALSE if known to not be empty, and Tristate.UNKNOWN if the
* MetadataStore does have enough information to determine either way.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
index c206a00..8515bfb 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
@@ -136,7 +136,7 @@ final class PathMetadataDynamoDBTranslation {
username, username, path);
}
boolean isDeleted =
- item.hasAttribute(IS_DELETED) ? item.getBoolean(IS_DELETED) : false;
+ item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
}
@@ -243,8 +243,8 @@ final class PathMetadataDynamoDBTranslation {
}
/**
- * e.g. pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a
- * @param path
+ * e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
+ * @param path path to convert
* @return string for parent key
*/
static String pathToParentKey(Path path) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
index d8ae3fb..edd3830 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -18,8 +18,17 @@
package org.apache.hadoop.fs.s3a.s3guard;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -29,21 +38,13 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
-import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.util.ReflectionUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-
-import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
+import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
/**
* Logic for integrating MetadataStore with S3A.
@@ -76,10 +77,6 @@ public final class S3Guard {
* returning it. Callers must clean up by calling
* {@link MetadataStore#close()} when done using the MetadataStore.
*
- * If this fails to instantiate the class specified in config (fs.s3a
- * .metadatastore.impl), or there is an error initializing it, this will log
- * an error and return an instance of {@link NullMetadataStore} instead.
- *
* @param fs FileSystem whose Configuration specifies which
* implementation to use.
* @return Reference to new MetadataStore.
@@ -124,9 +121,9 @@ public final class S3Guard {
/**
- * Helper function which puts given S3AFileStatus in the MetadataStore and
- * returns the same S3AFileStatus.
- * @param ms MetadataStore to put() into.
+ * Helper function which puts a given S3AFileStatus into the MetadataStore and
+ * returns the same S3AFileStatus. Instrumentation monitors the put operation.
+ * @param ms MetadataStore to {@code put()} into.
* @param status status to store
* @param instrumentation instrumentation of the s3a file system
* @return The same status as passed in
@@ -168,7 +165,7 @@ public final class S3Guard {
}
/**
- * Given directory listing metadata from both the backing store, and the
+ * Given directory listing metadata from both the backing store and the
* MetadataStore, merge the two sources of truth to create a consistent
* view of the current directory contents, which can be returned to clients.
*
@@ -187,7 +184,7 @@ public final class S3Guard {
boolean isAuthoritative) throws IOException {
// Fast-path for NullMetadataStore
- if (ms instanceof NullMetadataStore) {
+ if (isNullMetadataStore(ms)) {
return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
}
@@ -216,7 +213,7 @@ public final class S3Guard {
// Minor race condition here. Multiple threads could add to this
// mutable DirListingMetadata. Since it is backed by a
- // ConcurrentHashMap, the last put() wins. I think this is ok.
+ // ConcurrentHashMap, the last put() wins.
// More concerning is two threads racing on listStatus() and delete().
// Any FileSystem has similar race conditions, but we could persist
// a stale entry longer. We could expose an atomic
@@ -245,6 +242,9 @@ public final class S3Guard {
/**
* Update MetadataStore to reflect creation of the given directories.
+ *
+ * If an IOException is raised while trying to update the entry, this
+ * operation catches the exception and returns.
* @param ms MetadataStore to update.
* @param dirs null, or an ordered list of directories from leaf to root.
* E.g. if /a/ exists, and mkdirs(/a/b/c/d) is called, this
@@ -285,7 +285,7 @@ public final class S3Guard {
Path f = dirs.get(i);
assertQualified(f);
FileStatus status =
- S3AUtils.createUploadFileStatus(f, true, 0, 0, owner);
+ createUploadFileStatus(f, true, 0, 0, owner);
// We only need to put a DirListingMetadata if we are setting
// authoritative bit
@@ -315,15 +315,16 @@ public final class S3Guard {
}
/**
- * Helper function that records the move of directory path, adding
- * resulting metadata to the supplied lists. Does not store in MetadataStore.
+ * Helper function that records the move of directory paths, adding
+ * resulting metadata to the supplied lists.
+ * Does not store in MetadataStore.
* @param ms MetadataStore, used to make this a no-op, when it is
* NullMetadataStore.
* @param srcPaths stores the source path here
* @param dstMetas stores destination metadata here
* @param srcPath source path to store
* @param dstPath destination path to store
- * @param owner Hadoop user name
+ * @param owner file owner to use in created records
*/
public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
@@ -331,10 +332,9 @@ public final class S3Guard {
if (isNullMetadataStore(ms)) {
return;
}
- assertQualified(srcPath);
- assertQualified(dstPath);
- FileStatus dstStatus = S3AUtils.createUploadFileStatus(dstPath, true, 0,
- 0, owner);
+ assertQualified(srcPath, dstPath);
+
+ FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
@@ -349,7 +349,7 @@ public final class S3Guard {
* @param dstPath destination path to store
* @param size length of file moved
* @param blockSize blocksize to associate with destination file
- * @param owner Hadoop user name
+ * @param owner file owner to use in created records
*/
public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
@@ -357,9 +357,8 @@ public final class S3Guard {
if (isNullMetadataStore(ms)) {
return;
}
- assertQualified(srcPath);
- assertQualified(dstPath);
- FileStatus dstStatus = S3AUtils.createUploadFileStatus(dstPath, false,
+ assertQualified(srcPath, dstPath);
+ FileStatus dstStatus = createUploadFileStatus(dstPath, false,
size, blockSize, owner);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
@@ -390,9 +389,7 @@ public final class S3Guard {
return;
}
- assertQualified(srcRoot);
- assertQualified(srcPath);
- assertQualified(dstPath);
+ assertQualified(srcRoot, srcPath, dstPath);
if (srcPath.equals(srcRoot)) {
LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
@@ -438,6 +435,11 @@ public final class S3Guard {
dstMetas.add(new PathMetadata(dstStatus));
}
+ /**
+ * Assert that the path is qualified with a host and scheme.
+ * @param p path to check
+ * @throws NullPointerException if either argument does not hold
+ */
public static void assertQualified(Path p) {
URI uri = p.toUri();
// Paths must include bucket in case MetadataStore is shared between
@@ -447,4 +449,15 @@ public final class S3Guard {
// I believe this should never fail, since there is a host?
Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
}
+
+ /**
+ * Assert that all paths are valid.
+ * @param paths path to check
+ * @throws NullPointerException if either argument does not hold
+ */
+ public static void assertQualified(Path...paths) {
+ for (Path path : paths) {
+ assertQualified(path);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 9bd0cb8..ea34734 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -18,8 +18,23 @@
package org.apache.hadoop.fs.s3a.s3guard;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
@@ -34,25 +49,11 @@ import org.apache.hadoop.fs.shell.CommandFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.fs.s3a.Constants.*;
/**
- * Manage S3Guard Metadata Store.
+ * CLI to manage S3Guard Metadata Store.
*/
public abstract class S3GuardTool extends Configured implements Tool {
private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
@@ -74,6 +75,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
"\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
"\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
"\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
+ private static final String DATA_IN_S3_IS_PRESERVED
+ = "(all data in S3 is preserved";
abstract public String getUsage();
@@ -100,7 +103,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
* Constructor a S3Guard tool with HDFS configuration.
* @param conf Configuration.
*/
- public S3GuardTool(Configuration conf) {
+ protected S3GuardTool(Configuration conf) {
super(conf);
commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
@@ -253,8 +256,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
}
/**
- * Parse CLI arguments and returns the position arguments. The options are
- * stored in {@link #commandFormat}
+ * Parse CLI arguments and returns the position arguments.
+ * The options are stored in {@link #commandFormat}
*
* @param args command line arguments.
* @return the position arguments from CLI.
@@ -332,7 +335,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
*/
static class Destroy extends S3GuardTool {
private static final String NAME = "destroy";
- public static final String PURPOSE = "destroy metadata repository";
+ public static final String PURPOSE = "destroy Metadata Store data "
+ + DATA_IN_S3_IS_PRESERVED;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
@@ -367,8 +371,16 @@ public abstract class S3GuardTool extends Configured implements Tool {
return INVALID_ARGUMENT;
}
- initMetadataStore(false);
- Preconditions.checkState(ms != null, "Metadata store is not initialized");
+ try {
+ initMetadataStore(false);
+ } catch (FileNotFoundException e) {
+ // indication that the table was not found
+ LOG.debug("Failed to bind to store to be destroyed", e);
+ LOG.info("Metadata Store does not exist.");
+ return SUCCESS;
+ }
+
+ Preconditions.checkState(ms != null, "Metadata Store is not initialized");
ms.destroy();
LOG.info("Metadata store is deleted.");
@@ -440,8 +452,9 @@ public abstract class S3GuardTool extends Configured implements Tool {
}
/**
- * Recursively import every path under path
+ * Recursively import every path under path.
* @return number of items inserted into MetadataStore
+ * @throws IOException on I/O errors.
*/
private long importDir(FileStatus status) throws IOException {
Preconditions.checkArgument(status.isDirectory());
@@ -625,7 +638,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
* @param msDir the directory FileStatus obtained from the metadata store.
* @param s3Dir the directory FileStatus obtained from S3.
* @param out the output stream to generate diff results.
- * @throws IOException
+ * @throws IOException on I/O errors.
*/
private void compareDir(FileStatus msDir, FileStatus s3Dir,
PrintStream out) throws IOException {
@@ -676,7 +689,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
*
* @param path the path to be compared.
* @param out the output stream to display results.
- * @throws IOException
+ * @throws IOException on I/O errors.
*/
private void compareRoot(Path path, PrintStream out) throws IOException {
Path qualified = s3a.qualify(path);
@@ -732,7 +745,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
static class Prune extends S3GuardTool {
private static final String NAME = "prune";
public static final String PURPOSE = "truncate older metadata from " +
- "repository";
+ "repository "
+ + DATA_IN_S3_IS_PRESERVED;;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
@@ -830,8 +844,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
private static void printHelp() {
if (cmd == null) {
System.err.println("Usage: hadoop " + USAGE);
- System.err.println("\tperform metadata store " +
- "administrative commands for s3a filesystem.");
+ System.err.println("\tperform S3Guard metadata store " +
+ "administrative commands.");
} else {
System.err.println("Usage: hadoop " + cmd.getUsage());
}
@@ -881,7 +895,11 @@ public abstract class S3GuardTool extends Configured implements Tool {
return ToolRunner.run(conf, cmd, otherArgs);
}
- public static void main(String[] args) throws Exception {
+ /**
+ * Main entry point. Calls {@code System.exit()} on all execution paths.
+ * @param args argument list
+ */
+ public static void main(String[] args) {
try {
int ret = run(args, new Configuration());
System.exit(ret);
@@ -889,7 +907,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
System.err.println(e.getMessage());
printHelp();
System.exit(INVALID_ARGUMENT);
- } catch (Exception e) {
+ } catch (Throwable e) {
e.printStackTrace(System.err);
System.exit(ERROR);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
index 34f36b8..d430315 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 0347b2a..b8d37c6 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -1553,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
encrypt the objects. The user may specify a specific CMK or leave the
-`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
+`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
used in a in a S3 bucket in a different region. There is can also be policies
assigned to the CMK that prohibit or restrict its use for users causing S3A
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org