You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2017/09/29 10:29:42 UTC
[5/8] hadoop git commit: HADOOP-13345 S3Guard: Improved Consistency
for S3A. Contributed by: Chris Nauroth, Aaron Fabbri, Mingliang Liu,
Lei (Eddy) Xu, Sean Mackrory, Steve Loughran and others.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
new file mode 100644
index 0000000..8515bfb
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.Collection;
+
+import com.amazonaws.services.dynamodbv2.document.Item;
+import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
+import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
+import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
+import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.amazonaws.services.dynamodbv2.model.KeyType;
+import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.Tristate;
+
+/**
+ * Defines methods for translating between domain model objects and their
+ * representations in the DynamoDB schema.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+final class PathMetadataDynamoDBTranslation {
+
+ /** The HASH key name of each item. */
+ @VisibleForTesting
+ static final String PARENT = "parent";
+ /** The RANGE key name of each item. */
+ @VisibleForTesting
+ static final String CHILD = "child";
+ @VisibleForTesting
+ static final String IS_DIR = "is_dir";
+ @VisibleForTesting
+ static final String MOD_TIME = "mod_time";
+ @VisibleForTesting
+ static final String FILE_LENGTH = "file_length";
+ @VisibleForTesting
+ static final String BLOCK_SIZE = "block_size";
+ static final String IS_DELETED = "is_deleted";
+
+ /** Table version field {@value} in version marker item. */
+ @VisibleForTesting
+ static final String TABLE_VERSION = "table_version";
+
+ /** Table creation timestampfield {@value} in version marker item. */
+ @VisibleForTesting
+ static final String TABLE_CREATED = "table_created";
+
+ /** The version marker field is invalid. */
+ static final String E_NOT_VERSION_MARKER = "Not a version marker: ";
+
+ /**
+ * Returns the key schema for the DynamoDB table.
+ *
+ * @return DynamoDB key schema
+ */
+ static Collection<KeySchemaElement> keySchema() {
+ return Arrays.asList(
+ new KeySchemaElement(PARENT, KeyType.HASH),
+ new KeySchemaElement(CHILD, KeyType.RANGE));
+ }
+
+ /**
+ * Returns the attribute definitions for the DynamoDB table.
+ *
+ * @return DynamoDB attribute definitions
+ */
+ static Collection<AttributeDefinition> attributeDefinitions() {
+ return Arrays.asList(
+ new AttributeDefinition(PARENT, ScalarAttributeType.S),
+ new AttributeDefinition(CHILD, ScalarAttributeType.S));
+ }
+
+ /**
+ * Converts a DynamoDB item to a {@link PathMetadata}.
+ *
+ * @param item DynamoDB item to convert
+ * @return {@code item} converted to a {@link PathMetadata}
+ */
+ static PathMetadata itemToPathMetadata(Item item, String username)
+ throws IOException {
+ if (item == null) {
+ return null;
+ }
+
+ String parentStr = item.getString(PARENT);
+ Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
+ String childStr = item.getString(CHILD);
+ Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
+
+ // Skip table version markers, which are only non-absolute paths stored.
+ Path rawPath = new Path(parentStr, childStr);
+ if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
+ return null;
+ }
+
+ Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
+ Path path = new Path(parent, childStr);
+
+ boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
+ final FileStatus fileStatus;
+ if (isDir) {
+ fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username);
+ } else {
+ long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
+ long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
+ long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
+ fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
+ username, username, path);
+ }
+ boolean isDeleted =
+ item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
+
+ return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
+ }
+
+ /**
+ * Converts a {@link PathMetadata} to a DynamoDB item.
+ *
+ * @param meta {@link PathMetadata} to convert
+ * @return {@code meta} converted to DynamoDB item
+ */
+ static Item pathMetadataToItem(PathMetadata meta) {
+ Preconditions.checkNotNull(meta);
+ final FileStatus status = meta.getFileStatus();
+ final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
+ if (status.isDirectory()) {
+ item.withBoolean(IS_DIR, true);
+ } else {
+ item.withLong(FILE_LENGTH, status.getLen())
+ .withLong(MOD_TIME, status.getModificationTime())
+ .withLong(BLOCK_SIZE, status.getBlockSize());
+ }
+ item.withBoolean(IS_DELETED, meta.isDeleted());
+ return item;
+ }
+
+ /**
+ * The version marker has a primary key whose PARENT is {@code name};
+ * this MUST NOT be a value which represents an absolute path.
+ * @param name name of the version marker
+ * @param version version number
+ * @param timestamp creation timestamp
+ * @return an item representing a version marker.
+ */
+ static Item createVersionMarker(String name, int version, long timestamp) {
+ return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name))
+ .withInt(TABLE_VERSION, version)
+ .withLong(TABLE_CREATED, timestamp);
+ }
+
+ /**
+ * Create the primary key of the version marker.
+ * @param name key name
+ * @return the key to use when registering or resolving version markers
+ */
+ static PrimaryKey createVersionMarkerPrimaryKey(String name) {
+ return new PrimaryKey(PARENT, name, CHILD, name);
+ }
+
+ /**
+ * Extract the version from a version marker item.
+ * @param marker version marker item
+ * @return the extracted version field
+ * @throws IOException if the item is not a version marker
+ */
+ static int extractVersionFromMarker(Item marker) throws IOException {
+ if (marker.hasAttribute(TABLE_VERSION)) {
+ return marker.getInt(TABLE_VERSION);
+ } else {
+ throw new IOException(E_NOT_VERSION_MARKER + marker);
+ }
+ }
+
+ /**
+ * Extract the creation time, if present.
+ * @param marker version marker item
+ * @return the creation time, or null
+ * @throws IOException if the item is not a version marker
+ */
+ static Long extractCreationTimeFromMarker(Item marker) throws IOException {
+ if (marker.hasAttribute(TABLE_CREATED)) {
+ return marker.getLong(TABLE_CREATED);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Converts a collection {@link PathMetadata} to a collection DynamoDB items.
+ *
+ * @see #pathMetadataToItem(PathMetadata)
+ */
+ static Item[] pathMetadataToItem(Collection<PathMetadata> metas) {
+ if (metas == null) {
+ return null;
+ }
+
+ final Item[] items = new Item[metas.size()];
+ int i = 0;
+ for (PathMetadata meta : metas) {
+ items[i++] = pathMetadataToItem(meta);
+ }
+ return items;
+ }
+
+ /**
+ * Converts a {@link Path} to a DynamoDB equality condition on that path as
+ * parent, suitable for querying all direct children of the path.
+ *
+ * @param path the path; can not be null
+ * @return DynamoDB equality condition on {@code path} as parent
+ */
+ static KeyAttribute pathToParentKeyAttribute(Path path) {
+ return new KeyAttribute(PARENT, pathToParentKey(path));
+ }
+
+ /**
+ * e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
+ * @param path path to convert
+ * @return string for parent key
+ */
+ static String pathToParentKey(Path path) {
+ Preconditions.checkNotNull(path);
+ Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
+ URI uri = path.toUri();
+ String bucket = uri.getHost();
+ Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
+ "Path missing bucket");
+ String pKey = "/" + bucket + uri.getPath();
+
+ // Strip trailing slash
+ if (pKey.endsWith("/")) {
+ pKey = pKey.substring(0, pKey.length() - 1);
+ }
+ return pKey;
+ }
+
+ /**
+ * Converts a {@link Path} to a DynamoDB key, suitable for getting the item
+ * matching the path.
+ *
+ * @param path the path; can not be null
+ * @return DynamoDB key for item matching {@code path}
+ */
+ static PrimaryKey pathToKey(Path path) {
+ Preconditions.checkArgument(!path.isRoot(),
+ "Root path is not mapped to any PrimaryKey");
+ return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
+ path.getName());
+ }
+
+ /**
+ * Converts a collection of {@link Path} to a collection of DynamoDB keys.
+ *
+ * @see #pathToKey(Path)
+ */
+ static PrimaryKey[] pathToKey(Collection<Path> paths) {
+ if (paths == null) {
+ return null;
+ }
+
+ final PrimaryKey[] keys = new PrimaryKey[paths.size()];
+ int i = 0;
+ for (Path p : paths) {
+ keys[i++] = pathToKey(p);
+ }
+ return keys;
+ }
+
+ /**
+ * There is no need to instantiate this class.
+ */
+ private PathMetadataDynamoDBTranslation() {
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
new file mode 100644
index 0000000..7e4aec1
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -0,0 +1,463 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AInstrumentation;
+import org.apache.hadoop.fs.s3a.Tristate;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
+import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
+
+/**
+ * Logic for integrating MetadataStore with S3A.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public final class S3Guard {
+ private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class);
+
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ @VisibleForTesting
+ public static final String S3GUARD_DDB_CLIENT_FACTORY_IMPL =
+ "fs.s3a.s3guard.ddb.client.factory.impl";
+
+ static final Class<? extends DynamoDBClientFactory>
+ S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT =
+ DynamoDBClientFactory.DefaultDynamoDBClientFactory.class;
+ private static final FileStatus[] EMPTY_LISTING = new FileStatus[0];
+
+ // Utility class. All static functions.
+ private S3Guard() { }
+
+ /* Utility functions. */
+
+ /**
+ * Create a new instance of the configured MetadataStore.
+ * The returned MetadataStore will have been initialized via
+ * {@link MetadataStore#initialize(FileSystem)} by this function before
+ * returning it. Callers must clean up by calling
+ * {@link MetadataStore#close()} when done using the MetadataStore.
+ *
+ * @param fs FileSystem whose Configuration specifies which
+ * implementation to use.
+ * @return Reference to new MetadataStore.
+ * @throws IOException if the metadata store cannot be instantiated
+ */
+ public static MetadataStore getMetadataStore(FileSystem fs)
+ throws IOException {
+ Preconditions.checkNotNull(fs);
+ Configuration conf = fs.getConf();
+ Preconditions.checkNotNull(conf);
+ MetadataStore msInstance;
+ try {
+ Class<? extends MetadataStore> msClass = getMetadataStoreClass(conf);
+ msInstance = ReflectionUtils.newInstance(msClass, conf);
+ LOG.debug("Using {} metadata store for {} filesystem",
+ msClass.getSimpleName(), fs.getScheme());
+ msInstance.initialize(fs);
+ return msInstance;
+ } catch (RuntimeException | IOException e) {
+ String message = "Failed to instantiate metadata store " +
+ conf.get(S3_METADATA_STORE_IMPL)
+ + " defined in " + S3_METADATA_STORE_IMPL
+ + ": " + e;
+ LOG.error(message, e);
+ if (e instanceof IOException) {
+ throw e;
+ } else {
+ throw new IOException(message, e);
+ }
+ }
+ }
+
+ private static Class<? extends MetadataStore> getMetadataStoreClass(
+ Configuration conf) {
+ if (conf == null) {
+ return NullMetadataStore.class;
+ }
+
+ return conf.getClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+ MetadataStore.class);
+ }
+
+
+ /**
+ * Helper function which puts a given S3AFileStatus into the MetadataStore and
+ * returns the same S3AFileStatus. Instrumentation monitors the put operation.
+ * @param ms MetadataStore to {@code put()} into.
+ * @param status status to store
+ * @param instrumentation instrumentation of the s3a file system
+ * @return The same status as passed in
+ * @throws IOException if metadata store update failed
+ */
+ public static S3AFileStatus putAndReturn(MetadataStore ms,
+ S3AFileStatus status,
+ S3AInstrumentation instrumentation) throws IOException {
+ long startTimeNano = System.nanoTime();
+ ms.put(new PathMetadata(status));
+ instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
+ (System.nanoTime() - startTimeNano));
+ instrumentation.incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
+ return status;
+ }
+
+ /**
+ * Convert the data of a directory listing to an array of {@link FileStatus}
+ * entries. Tombstones are filtered out at this point. If the listing is null
+ * an empty array is returned.
+ * @param dirMeta directory listing -may be null
+ * @return a possibly-empty array of file status entries
+ */
+ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
+ if (dirMeta == null) {
+ return EMPTY_LISTING;
+ }
+
+ Collection<PathMetadata> listing = dirMeta.getListing();
+ List<FileStatus> statuses = new ArrayList<>();
+
+ for (PathMetadata pm : listing) {
+ if (!pm.isDeleted()) {
+ statuses.add(pm.getFileStatus());
+ }
+ }
+
+ return statuses.toArray(new FileStatus[0]);
+ }
+
+ /**
+ * Given directory listing metadata from both the backing store and the
+ * MetadataStore, merge the two sources of truth to create a consistent
+ * view of the current directory contents, which can be returned to clients.
+ *
+ * Also update the MetadataStore to reflect the resulting directory listing.
+ *
+ * @param ms MetadataStore to use.
+ * @param path path to directory
+ * @param backingStatuses Directory listing from the backing store.
+ * @param dirMeta Directory listing from MetadataStore. May be null.
+ * @param isAuthoritative State of authoritative mode
+ * @return Final result of directory listing.
+ * @throws IOException if metadata store update failed
+ */
+ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
+ List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
+ boolean isAuthoritative) throws IOException {
+
+ // Fast-path for NullMetadataStore
+ if (isNullMetadataStore(ms)) {
+ return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
+ }
+
+ assertQualified(path);
+
+ if (dirMeta == null) {
+ // The metadataStore had zero state for this directory
+ dirMeta = new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR,
+ false);
+ }
+
+ Set<Path> deleted = dirMeta.listTombstones();
+
+ // Since we treat the MetadataStore as a "fresher" or "consistent" view
+ // of metadata, we always use its metadata first.
+
+ // Since the authoritative case is already handled outside this function,
+ // we will basically start with the set of directory entries in the
+ // DirListingMetadata, and add any that only exist in the backingStatuses.
+
+ boolean changed = false;
+ for (FileStatus s : backingStatuses) {
+ if (deleted.contains(s.getPath())) {
+ continue;
+ }
+
+ // Minor race condition here. Multiple threads could add to this
+ // mutable DirListingMetadata. Since it is backed by a
+ // ConcurrentHashMap, the last put() wins.
+ // More concerning is two threads racing on listStatus() and delete().
+ // Any FileSystem has similar race conditions, but we could persist
+ // a stale entry longer. We could expose an atomic
+ // DirListingMetadata#putIfNotPresent()
+ boolean updated = dirMeta.put(s);
+ changed = changed || updated;
+ }
+
+ if (changed && isAuthoritative) {
+ dirMeta.setAuthoritative(true); // This is the full directory contents
+ ms.put(dirMeta);
+ }
+
+ return dirMetaToStatuses(dirMeta);
+ }
+
+ /**
+ * Although NullMetadataStore does nothing, callers may wish to avoid work
+ * (fast path) when the NullMetadataStore is in use.
+ * @param ms The MetadataStore to test
+ * @return true iff the MetadataStore is the null, or no-op, implementation.
+ */
+ public static boolean isNullMetadataStore(MetadataStore ms) {
+ return (ms instanceof NullMetadataStore);
+ }
+
+ /**
+ * Update MetadataStore to reflect creation of the given directories.
+ *
+ * If an IOException is raised while trying to update the entry, this
+ * operation catches the exception and returns.
+ * @param ms MetadataStore to update.
+ * @param dirs null, or an ordered list of directories from leaf to root.
+ * E.g. if /a/ exists, and mkdirs(/a/b/c/d) is called, this
+ * list will contain [/a/b/c/d, /a/b/c, /a/b]. /a/b/c/d is
+ * an empty, dir, and the other dirs only contain their child
+ * dir.
+ * @param owner Hadoop user name.
+ * @param authoritative Whether to mark new directories as authoritative.
+ */
+ public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
+ String owner, boolean authoritative) {
+ if (dirs == null) {
+ return;
+ }
+
+ /* We discussed atomicity of this implementation.
+ * The concern is that multiple clients could race to write different
+ * cached directories to the MetadataStore. Two solutions are proposed:
+ * 1. Move mkdirs() into MetadataStore interface and let implementations
+ * ensure they are atomic.
+ * 2. Specify that the semantics of MetadataStore#putListStatus() is
+ * always additive, That is, if MetadataStore has listStatus() state
+ * for /a/b that contains [/a/b/file0, /a/b/file1], and we then call
+ * putListStatus(/a/b -> [/a/b/file2, /a/b/file3], isAuthoritative=true),
+ * then we will end up with final state of
+ * [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative =
+ * true
+ */
+ FileStatus prevStatus = null;
+
+ // Use new batched put to reduce round trips.
+ List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
+
+ try {
+ // Iterate from leaf to root
+ for (int i = 0; i < dirs.size(); i++) {
+ boolean isLeaf = (prevStatus == null);
+ Path f = dirs.get(i);
+ assertQualified(f);
+ FileStatus status =
+ createUploadFileStatus(f, true, 0, 0, owner);
+
+ // We only need to put a DirListingMetadata if we are setting
+ // authoritative bit
+ DirListingMetadata dirMeta = null;
+ if (authoritative) {
+ Collection<PathMetadata> children;
+ if (isLeaf) {
+ children = DirListingMetadata.EMPTY_DIR;
+ } else {
+ children = new ArrayList<>(1);
+ children.add(new PathMetadata(prevStatus));
+ }
+ dirMeta = new DirListingMetadata(f, children, authoritative);
+ ms.put(dirMeta);
+ }
+
+ pathMetas.add(new PathMetadata(status));
+ prevStatus = status;
+ }
+
+ // Batched put
+ ms.put(pathMetas);
+ } catch (IOException ioe) {
+ LOG.error("MetadataStore#put() failure:", ioe);
+ }
+ }
+
+ /**
+ * Helper function that records the move of directory paths, adding
+ * resulting metadata to the supplied lists.
+ * Does not store in MetadataStore.
+ * @param ms MetadataStore, used to make this a no-op, when it is
+ * NullMetadataStore.
+ * @param srcPaths stores the source path here
+ * @param dstMetas stores destination metadata here
+ * @param srcPath source path to store
+ * @param dstPath destination path to store
+ * @param owner file owner to use in created records
+ */
+ public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
+ Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
+ String owner) {
+ if (isNullMetadataStore(ms)) {
+ return;
+ }
+ assertQualified(srcPath, dstPath);
+
+ FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
+ addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
+ }
+
+ /**
+ * Like {@link #addMoveDir(MetadataStore, Collection, Collection, Path,
+ * Path, String)} (), but for files.
+ * @param ms MetadataStore, used to make this a no-op, when it is
+ * NullMetadataStore.
+ * @param srcPaths stores the source path here
+ * @param dstMetas stores destination metadata here
+ * @param srcPath source path to store
+ * @param dstPath destination path to store
+ * @param size length of file moved
+ * @param blockSize blocksize to associate with destination file
+ * @param owner file owner to use in created records
+ */
+ public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
+ Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
+ long size, long blockSize, String owner) {
+ if (isNullMetadataStore(ms)) {
+ return;
+ }
+ assertQualified(srcPath, dstPath);
+ FileStatus dstStatus = createUploadFileStatus(dstPath, false,
+ size, blockSize, owner);
+ addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
+ }
+
+ /**
+ * Helper method that records the move of all ancestors of a path.
+ *
+ * In S3A, an optimization is to delete unnecessary fake directory objects if
+ * the directory is non-empty. In that case, for a nested child to move, S3A
+ * is not listing and thus moving all its ancestors (up to source root). So we
+ * take care of those inferred directories of this path explicitly.
+ *
+ * As {@link #addMoveFile} and {@link #addMoveDir}, this method adds resulting
+ * metadata to the supplied lists. It does not store in MetadataStore.
+ *
+ * @param ms MetadataStore, no-op if it is NullMetadataStore
+ * @param srcPaths stores the source path here
+ * @param dstMetas stores destination metadata here
+ * @param srcRoot source root up to which (exclusive) should we add ancestors
+ * @param srcPath source path of the child to add ancestors
+ * @param dstPath destination path of the child to add ancestors
+ * @param owner Hadoop user name
+ */
+ public static void addMoveAncestors(MetadataStore ms,
+ Collection<Path> srcPaths, Collection<PathMetadata> dstMetas,
+ Path srcRoot, Path srcPath, Path dstPath, String owner) {
+ if (isNullMetadataStore(ms)) {
+ return;
+ }
+
+ assertQualified(srcRoot, srcPath, dstPath);
+
+ if (srcPath.equals(srcRoot)) {
+ LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
+ return;
+ }
+
+ Path parentSrc = srcPath.getParent();
+ Path parentDst = dstPath.getParent();
+ while (parentSrc != null
+ && !parentSrc.isRoot()
+ && !parentSrc.equals(srcRoot)
+ && !srcPaths.contains(parentSrc)) {
+ LOG.debug("Renaming non-listed parent {} to {}", parentSrc, parentDst);
+ S3Guard.addMoveDir(ms, srcPaths, dstMetas, parentSrc, parentDst, owner);
+ parentSrc = parentSrc.getParent();
+ parentDst = parentDst.getParent();
+ }
+ }
+
+ public static void addAncestors(MetadataStore metadataStore,
+ Path qualifiedPath, String username) throws IOException {
+ Collection<PathMetadata> newDirs = new ArrayList<>();
+ Path parent = qualifiedPath.getParent();
+ while (!parent.isRoot()) {
+ PathMetadata directory = metadataStore.get(parent);
+ if (directory == null || directory.isDeleted()) {
+ FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
+ null, parent);
+ PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
+ newDirs.add(meta);
+ } else {
+ break;
+ }
+ parent = parent.getParent();
+ }
+ metadataStore.put(newDirs);
+ }
+
+ private static void addMoveStatus(Collection<Path> srcPaths,
+ Collection<PathMetadata> dstMetas,
+ Path srcPath,
+ FileStatus dstStatus) {
+ srcPaths.add(srcPath);
+ dstMetas.add(new PathMetadata(dstStatus));
+ }
+
+ /**
+ * Assert that the path is qualified with a host and scheme.
+ * @param p path to check
+ * @throws NullPointerException if either argument does not hold
+ */
+ public static void assertQualified(Path p) {
+ URI uri = p.toUri();
+ // Paths must include bucket in case MetadataStore is shared between
+ // multiple S3AFileSystem instances
+ Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri);
+
+ // This should never fail, but is retained for completeness.
+ Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
+ }
+
+ /**
+ * Assert that all paths are valid.
+ * @param paths path to check
+ * @throws NullPointerException if either argument does not hold
+ */
+ public static void assertQualified(Path...paths) {
+ for (Path path : paths) {
+ assertQualified(path);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
new file mode 100644
index 0000000..be271ae
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -0,0 +1,924 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.shell.CommandFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
+/**
+ * CLI to manage S3Guard Metadata Store.
+ */
+public abstract class S3GuardTool extends Configured implements Tool {
+ private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
+
+ private static final String NAME = "s3guard";
+ private static final String COMMON_USAGE =
+ "When possible and not overridden by more specific options, metadata\n" +
+ "repository information will be inferred from the S3A URL (if provided)" +
+ "\n\n" +
+ "Generic options supported are:\n" +
+ " -conf <config file> - specify an application configuration file\n" +
+ " -D <property=value> - define a value for a given property\n";
+
+ private static final String USAGE = NAME +
+ " [command] [OPTIONS] [s3a://BUCKET]\n\n" +
+ "Commands: \n" +
+ "\t" + Init.NAME + " - " + Init.PURPOSE + "\n" +
+ "\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" +
+ "\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
+ "\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
+ "\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
+ private static final String DATA_IN_S3_IS_PRESERVED
+ = "(all data in S3 is preserved";
+
+ abstract public String getUsage();
+
+ // Exit codes
+ static final int SUCCESS = 0;
+ static final int INVALID_ARGUMENT = 1;
+ static final int ERROR = 99;
+
+ private S3AFileSystem filesystem;
+ private MetadataStore store;
+ private final CommandFormat commandFormat;
+
+ private static final String META_FLAG = "meta";
+ private static final String DAYS_FLAG = "days";
+ private static final String HOURS_FLAG = "hours";
+ private static final String MINUTES_FLAG = "minutes";
+ private static final String SECONDS_FLAG = "seconds";
+
+ private static final String REGION_FLAG = "region";
+ private static final String READ_FLAG = "read";
+ private static final String WRITE_FLAG = "write";
+
+ /**
+ * Constructor a S3Guard tool with HDFS configuration.
+ * @param conf Configuration.
+ */
+ protected S3GuardTool(Configuration conf) {
+ super(conf);
+
+ commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
+ // For metadata store URI
+ commandFormat.addOptionWithValue(META_FLAG);
+ // DDB region.
+ commandFormat.addOptionWithValue(REGION_FLAG);
+ }
+
+ /**
+ * Return sub-command name.
+ */
+ abstract String getName();
+
+ /**
+ * Parse DynamoDB region from either -m option or a S3 path.
+ *
+ * This function should only be called from {@link Init} or
+ * {@link Destroy}.
+ *
+ * @param paths remaining parameters from CLI.
+ * @return false for invalid parameters.
+ * @throws IOException on I/O errors.
+ */
+ boolean parseDynamoDBRegion(List<String> paths) throws IOException {
+ Configuration conf = getConf();
+ String fromCli = getCommandFormat().getOptValue(REGION_FLAG);
+ String fromConf = conf.get(S3GUARD_DDB_REGION_KEY);
+ boolean hasS3Path = !paths.isEmpty();
+
+ if (fromCli != null) {
+ if (fromCli.isEmpty()) {
+ System.err.println("No region provided with -" + REGION_FLAG + " flag");
+ return false;
+ }
+ if (hasS3Path) {
+ System.err.println("Providing both an S3 path and the -" + REGION_FLAG
+ + " flag is not supported. If you need to specify a different "
+ + "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY);
+ return false;
+ }
+ conf.set(S3GUARD_DDB_REGION_KEY, fromCli);
+ return true;
+ }
+
+ if (fromConf != null) {
+ if (fromConf.isEmpty()) {
+ System.err.printf("No region provided with config %s, %n",
+ S3GUARD_DDB_REGION_KEY);
+ return false;
+ }
+ return true;
+ }
+
+ if (hasS3Path) {
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+ return true;
+ }
+
+ System.err.println("No region found from -" + REGION_FLAG + " flag, " +
+ "config, or S3 bucket");
+ return false;
+ }
+
+ /**
+ * Parse metadata store from command line option or HDFS configuration.
+ *
+ * @param forceCreate override the auto-creation setting to true.
+ * @return a initialized metadata store.
+ */
+ MetadataStore initMetadataStore(boolean forceCreate) throws IOException {
+ if (getStore() != null) {
+ return getStore();
+ }
+ Configuration conf;
+ if (filesystem == null) {
+ conf = getConf();
+ } else {
+ conf = filesystem.getConf();
+ }
+ String metaURI = getCommandFormat().getOptValue(META_FLAG);
+ if (metaURI != null && !metaURI.isEmpty()) {
+ URI uri = URI.create(metaURI);
+ LOG.info("create metadata store: {}", uri + " scheme: "
+ + uri.getScheme());
+ switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) {
+ case "local":
+ setStore(new LocalMetadataStore());
+ break;
+ case "dynamodb":
+ setStore(new DynamoDBMetadataStore());
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
+ if (forceCreate) {
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ }
+ break;
+ default:
+ throw new IOException(
+ String.format("Metadata store %s is not supported", uri));
+ }
+ } else {
+ // CLI does not specify metadata store URI, it uses default metadata store
+ // DynamoDB instead.
+ setStore(new DynamoDBMetadataStore());
+ if (forceCreate) {
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ }
+ }
+
+ if (filesystem == null) {
+ getStore().initialize(conf);
+ } else {
+ getStore().initialize(filesystem);
+ }
+ LOG.info("Metadata store {} is initialized.", getStore());
+ return getStore();
+ }
+
+ /**
+ * Initialize S3A FileSystem instance.
+ *
+ * @param path s3a URI
+ * @throws IOException
+ */
+ void initS3AFileSystem(String path) throws IOException {
+ URI uri;
+ try {
+ uri = new URI(path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ // Make sure that S3AFileSystem does not hold an actual MetadataStore
+ // implementation.
+ Configuration conf = getConf();
+ conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+ MetadataStore.class);
+ FileSystem fs = FileSystem.get(uri, getConf());
+ if (!(fs instanceof S3AFileSystem)) {
+ throw new IOException(
+ String.format("URI %s is not a S3A file system: %s", uri,
+ fs.getClass().getName()));
+ }
+ filesystem = (S3AFileSystem) fs;
+ }
+
+ /**
+ * Parse CLI arguments and returns the position arguments.
+ * The options are stored in {@link #commandFormat}
+ *
+ * @param args command line arguments.
+ * @return the position arguments from CLI.
+ */
+ List<String> parseArgs(String[] args) {
+ return getCommandFormat().parse(args, 1);
+ }
+
+ protected S3AFileSystem getFilesystem() {
+ return filesystem;
+ }
+
+ protected void setFilesystem(S3AFileSystem filesystem) {
+ this.filesystem = filesystem;
+ }
+
+ @VisibleForTesting
+ public MetadataStore getStore() {
+ return store;
+ }
+
+ @VisibleForTesting
+ protected void setStore(MetadataStore store) {
+ Preconditions.checkNotNull(store);
+ this.store = store;
+ }
+
+ protected CommandFormat getCommandFormat() {
+ return commandFormat;
+ }
+
+ /**
+ * Create the metadata store.
+ */
+ static class Init extends S3GuardTool {
+ private static final String NAME = "init";
+ public static final String PURPOSE = "initialize metadata repository";
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ " -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
+ " -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Init(Configuration conf) {
+ super(conf);
+ // read capacity.
+ getCommandFormat().addOptionWithValue(READ_FLAG);
+ // write capacity.
+ getCommandFormat().addOptionWithValue(WRITE_FLAG);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ List<String> paths = parseArgs(args);
+
+ String readCap = getCommandFormat().getOptValue(READ_FLAG);
+ if (readCap != null && !readCap.isEmpty()) {
+ int readCapacity = Integer.parseInt(readCap);
+ getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
+ }
+ String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
+ if (writeCap != null && !writeCap.isEmpty()) {
+ int writeCapacity = Integer.parseInt(writeCap);
+ getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
+ }
+
+ // Validate parameters.
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ initMetadataStore(true);
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Destroy a metadata store.
+ */
+ static class Destroy extends S3GuardTool {
+ private static final String NAME = "destroy";
+ public static final String PURPOSE = "destroy Metadata Store data "
+ + DATA_IN_S3_IS_PRESERVED;
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Destroy(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ public int run(String[] args) throws IOException {
+ List<String> paths = parseArgs(args);
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+
+ try {
+ initMetadataStore(false);
+ } catch (FileNotFoundException e) {
+ // indication that the table was not found
+ LOG.debug("Failed to bind to store to be destroyed", e);
+ LOG.info("Metadata Store does not exist.");
+ return SUCCESS;
+ }
+
+ Preconditions.checkState(getStore() != null,
+ "Metadata Store is not initialized");
+
+ getStore().destroy();
+ LOG.info("Metadata store is deleted.");
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Import s3 metadata to the metadata store.
+ */
+ static class Import extends S3GuardTool {
+ private static final String NAME = "import";
+ public static final String PURPOSE = "import metadata from existing S3 " +
+ "data";
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ private final Set<Path> dirCache = new HashSet<>();
+
+ Import(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ /**
+ * Put parents into MS and cache if the parents are not presented.
+ *
+ * @param f the file or an empty directory.
+ * @throws IOException on I/O errors.
+ */
+ private void putParentsIfNotPresent(FileStatus f) throws IOException {
+ Preconditions.checkNotNull(f);
+ Path parent = f.getPath().getParent();
+ while (parent != null) {
+ if (dirCache.contains(parent)) {
+ return;
+ }
+ FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
+ f.getOwner());
+ getStore().put(new PathMetadata(dir));
+ dirCache.add(parent);
+ parent = parent.getParent();
+ }
+ }
+
+ /**
+ * Recursively import every path under path.
+ * @return number of items inserted into MetadataStore
+ * @throws IOException on I/O errors.
+ */
+ private long importDir(FileStatus status) throws IOException {
+ Preconditions.checkArgument(status.isDirectory());
+ RemoteIterator<LocatedFileStatus> it = getFilesystem()
+ .listFilesAndEmptyDirectories(status.getPath(), true);
+ long items = 0;
+
+ while (it.hasNext()) {
+ LocatedFileStatus located = it.next();
+ FileStatus child;
+ if (located.isDirectory()) {
+ child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
+ located.getOwner());
+ dirCache.add(child.getPath());
+ } else {
+ child = new S3AFileStatus(located.getLen(),
+ located.getModificationTime(),
+ located.getPath(),
+ located.getBlockSize(),
+ located.getOwner());
+ }
+ putParentsIfNotPresent(child);
+ getStore().put(new PathMetadata(child));
+ items++;
+ }
+ return items;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ List<String> paths = parseArgs(args);
+ if (paths.isEmpty()) {
+ System.err.println(getUsage());
+ return INVALID_ARGUMENT;
+ }
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+
+ URI uri;
+ try {
+ uri = new URI(s3Path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ String filePath = uri.getPath();
+ if (filePath.isEmpty()) {
+ // If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider
+ // root to be the path
+ filePath = "/";
+ }
+ Path path = new Path(filePath);
+ FileStatus status = getFilesystem().getFileStatus(path);
+
+ initMetadataStore(false);
+
+ long items = 1;
+ if (status.isFile()) {
+ PathMetadata meta = new PathMetadata(status);
+ getStore().put(meta);
+ } else {
+ items = importDir(status);
+ }
+
+ System.out.printf("Inserted %d items into Metadata Store%n", items);
+
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Show diffs between the s3 and metadata store.
+ */
+ static class Diff extends S3GuardTool {
+ private static final String NAME = "diff";
+ public static final String PURPOSE = "report on delta between S3 and " +
+ "repository";
+ private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ private static final String SEP = "\t";
+ static final String S3_PREFIX = "S3";
+ static final String MS_PREFIX = "MS";
+
+ Diff(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ /**
+ * Formats the output of printing a FileStatus in S3guard diff tool.
+ * @param status the status to print.
+ * @return the string of output.
+ */
+ private static String formatFileStatus(FileStatus status) {
+ return String.format("%s%s%d%s%s",
+ status.isDirectory() ? "D" : "F",
+ SEP,
+ status.getLen(),
+ SEP,
+ status.getPath().toString());
+ }
+
+ /**
+ * Compares metadata from 2 S3 FileStatus's to see if they differ.
+ * @param thisOne
+ * @param thatOne
+ * @return true if the metadata is not identical
+ */
+ private static boolean differ(FileStatus thisOne, FileStatus thatOne) {
+ Preconditions.checkArgument(!(thisOne == null && thatOne == null));
+ return (thisOne == null || thatOne == null) ||
+ (thisOne.getLen() != thatOne.getLen()) ||
+ (thisOne.isDirectory() != thatOne.isDirectory()) ||
+ (!thisOne.isDirectory() &&
+ thisOne.getModificationTime() != thatOne.getModificationTime());
+ }
+
+ /**
+ * Print difference, if any, between two file statuses to the output stream.
+ *
+ * @param msStatus file status from metadata store.
+ * @param s3Status file status from S3.
+ * @param out output stream.
+ */
+ private static void printDiff(FileStatus msStatus,
+ FileStatus s3Status,
+ PrintStream out) {
+ Preconditions.checkArgument(!(msStatus == null && s3Status == null));
+ if (msStatus != null && s3Status != null) {
+ Preconditions.checkArgument(
+ msStatus.getPath().equals(s3Status.getPath()),
+ String.format("The path from metadata store and s3 are different:" +
+ " ms=%s s3=%s", msStatus.getPath(), s3Status.getPath()));
+ }
+
+ if (differ(msStatus, s3Status)) {
+ if (s3Status != null) {
+ out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status));
+ }
+ if (msStatus != null) {
+ out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus));
+ }
+ }
+ }
+
+ /**
+ * Compare the metadata of the directory with the same path, on S3 and
+ * the metadata store, respectively. If one of them is null, consider the
+ * metadata of the directory and all its subdirectories are missing from
+ * the source.
+ *
+ * Pass the FileStatus obtained from s3 and metadata store to avoid one
+ * round trip to fetch the same metadata twice, because the FileStatus
+ * hve already been obtained from listStatus() / listChildren operations.
+ *
+ * @param msDir the directory FileStatus obtained from the metadata store.
+ * @param s3Dir the directory FileStatus obtained from S3.
+ * @param out the output stream to generate diff results.
+ * @throws IOException on I/O errors.
+ */
+ private void compareDir(FileStatus msDir, FileStatus s3Dir,
+ PrintStream out) throws IOException {
+ Preconditions.checkArgument(!(msDir == null && s3Dir == null));
+ if (msDir != null && s3Dir != null) {
+ Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
+ String.format("The path from metadata store and s3 are different:" +
+ " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
+ }
+
+ Map<Path, FileStatus> s3Children = new HashMap<>();
+ if (s3Dir != null && s3Dir.isDirectory()) {
+ for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) {
+ s3Children.put(status.getPath(), status);
+ }
+ }
+
+ Map<Path, FileStatus> msChildren = new HashMap<>();
+ if (msDir != null && msDir.isDirectory()) {
+ DirListingMetadata dirMeta =
+ getStore().listChildren(msDir.getPath());
+
+ if (dirMeta != null) {
+ for (PathMetadata meta : dirMeta.getListing()) {
+ FileStatus status = meta.getFileStatus();
+ msChildren.put(status.getPath(), status);
+ }
+ }
+ }
+
+ Set<Path> allPaths = new HashSet<>(s3Children.keySet());
+ allPaths.addAll(msChildren.keySet());
+
+ for (Path path : allPaths) {
+ FileStatus s3Status = s3Children.get(path);
+ FileStatus msStatus = msChildren.get(path);
+ printDiff(msStatus, s3Status, out);
+ if ((s3Status != null && s3Status.isDirectory()) ||
+ (msStatus != null && msStatus.isDirectory())) {
+ compareDir(msStatus, s3Status, out);
+ }
+ }
+ out.flush();
+ }
+
+ /**
+ * Compare both metadata store and S3 on the same path.
+ *
+ * @param path the path to be compared.
+ * @param out the output stream to display results.
+ * @throws IOException on I/O errors.
+ */
+ private void compareRoot(Path path, PrintStream out) throws IOException {
+ Path qualified = getFilesystem().qualify(path);
+ FileStatus s3Status = null;
+ try {
+ s3Status = getFilesystem().getFileStatus(qualified);
+ } catch (FileNotFoundException e) {
+ }
+ PathMetadata meta = getStore().get(qualified);
+ FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
+ meta.getFileStatus() : null;
+ compareDir(msStatus, s3Status, out);
+ }
+
+ @VisibleForTesting
+ public int run(String[] args, PrintStream out) throws IOException {
+ List<String> paths = parseArgs(args);
+ if (paths.isEmpty()) {
+ out.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+ initMetadataStore(true);
+
+ URI uri;
+ try {
+ uri = new URI(s3Path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ Path root;
+ if (uri.getPath().isEmpty()) {
+ root = new Path("/");
+ } else {
+ root = new Path(uri.getPath());
+ }
+ root = getFilesystem().qualify(root);
+ compareRoot(root, out);
+ out.flush();
+ return SUCCESS;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ return run(args, System.out);
+ }
+ }
+
+ /**
+ * Prune metadata that has not been modified recently.
+ */
+ static class Prune extends S3GuardTool {
+ private static final String NAME = "prune";
+ public static final String PURPOSE = "truncate older metadata from " +
+ "repository "
+ + DATA_IN_S3_IS_PRESERVED;;
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Prune(Configuration conf) {
+ super(conf);
+
+ CommandFormat format = getCommandFormat();
+ format.addOptionWithValue(DAYS_FLAG);
+ format.addOptionWithValue(HOURS_FLAG);
+ format.addOptionWithValue(MINUTES_FLAG);
+ format.addOptionWithValue(SECONDS_FLAG);
+ }
+
+ @VisibleForTesting
+ void setMetadataStore(MetadataStore ms) {
+ Preconditions.checkNotNull(ms);
+ this.setStore(ms);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ private long getDeltaComponent(TimeUnit unit, String arg) {
+ String raw = getCommandFormat().getOptValue(arg);
+ if (raw == null || raw.isEmpty()) {
+ return 0;
+ }
+ Long parsed = Long.parseLong(raw);
+ return unit.toMillis(parsed);
+ }
+
+ @VisibleForTesting
+ public int run(String[] args, PrintStream out) throws
+ InterruptedException, IOException {
+ List<String> paths = parseArgs(args);
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ initMetadataStore(false);
+
+ Configuration conf = getConf();
+ long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
+
+ long cliDelta = 0;
+ cliDelta += getDeltaComponent(TimeUnit.DAYS, "days");
+ cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours");
+ cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes");
+ cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds");
+
+ if (confDelta <= 0 && cliDelta <= 0) {
+ System.err.println(
+ "You must specify a positive age for metadata to prune.");
+ }
+
+ // A delta provided on the CLI overrides if one is configured
+ long delta = confDelta;
+ if (cliDelta > 0) {
+ delta = cliDelta;
+ }
+
+ long now = System.currentTimeMillis();
+ long divide = now - delta;
+
+ getStore().prune(divide);
+
+ out.flush();
+ return SUCCESS;
+ }
+
+ @Override
+ public int run(String[] args) throws InterruptedException, IOException {
+ return run(args, System.out);
+ }
+ }
+
+ private static S3GuardTool command;
+
+ private static void printHelp() {
+ if (command == null) {
+ System.err.println("Usage: hadoop " + USAGE);
+ System.err.println("\tperform S3Guard metadata store " +
+ "administrative commands.");
+ } else {
+ System.err.println("Usage: hadoop " + command.getUsage());
+ }
+ System.err.println();
+ System.err.println(COMMON_USAGE);
+ }
+
+ /**
+ * Execute the command with the given arguments.
+ *
+ * @param args command specific arguments.
+ * @param conf Hadoop configuration.
+ * @return exit code.
+ * @throws Exception on I/O errors.
+ */
+ public static int run(String[] args, Configuration conf) throws
+ Exception {
+ /* ToolRunner.run does this too, but we must do it before looking at
+ subCommand or instantiating the cmd object below */
+ String[] otherArgs = new GenericOptionsParser(conf, args)
+ .getRemainingArgs();
+ if (otherArgs.length == 0) {
+ printHelp();
+ return INVALID_ARGUMENT;
+ }
+ final String subCommand = otherArgs[0];
+ switch (subCommand) {
+ case Init.NAME:
+ command = new Init(conf);
+ break;
+ case Destroy.NAME:
+ command = new Destroy(conf);
+ break;
+ case Import.NAME:
+ command = new Import(conf);
+ break;
+ case Diff.NAME:
+ command = new Diff(conf);
+ break;
+ case Prune.NAME:
+ command = new Prune(conf);
+ break;
+ default:
+ printHelp();
+ return INVALID_ARGUMENT;
+ }
+ return ToolRunner.run(conf, command, otherArgs);
+ }
+
+ /**
+ * Main entry point. Calls {@code System.exit()} on all execution paths.
+ * @param args argument list
+ */
+ public static void main(String[] args) {
+ try {
+ int ret = run(args, new Configuration());
+ System.exit(ret);
+ } catch (CommandFormat.UnknownOptionException e) {
+ System.err.println(e.getMessage());
+ printHelp();
+ System.exit(INVALID_ARGUMENT);
+ } catch (Throwable e) {
+ e.printStackTrace(System.err);
+ System.exit(ERROR);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
new file mode 100644
index 0000000..d430315
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains classes related to S3Guard: a feature of S3A to mask
+ * the eventual consistency behavior of S3 and optimize access patterns by
+ * coordinating with a strongly consistent external store for file system
+ * metadata.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
index 862ce6b..ce79284 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
@@ -105,6 +105,10 @@ public final class S3xLoginHelper {
* @return a login tuple, possibly empty.
*/
public static Login extractLoginDetails(URI name) {
+ if (name == null) {
+ return Login.EMPTY;
+ }
+
try {
String authority = name.getAuthority();
if (authority == null) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 160b148..9318ed1 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -41,6 +41,7 @@ See also:
* [Testing](testing.html)
* [Troubleshooting S3a](troubleshooting_s3a.html)
+* [S3Guard](s3guard.html)
### Warning #1: Object Stores are not filesystems
@@ -1595,7 +1596,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
encrypt the objects. The user may specify a specific CMK or leave the
-`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
+`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
used in a in a S3 bucket in a different region. There is can also be policies
assigned to the CMK that prohibit or restrict its use for users causing S3A
http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1afc6aa/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
new file mode 100644
index 0000000..fe67d69
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
@@ -0,0 +1,610 @@
+<!---
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+# S3Guard: Consistency and Metadata Caching for S3A
+
+**Experimental Feature**
+
+<!-- MACRO{toc|fromDepth=0|toDepth=5} -->
+
+## Overview
+
+*S3Guard* is an experimental feature for the S3A client of the S3 object store,
+which can use a (consistent) database as the store of metadata about objects
+in an S3 bucket.
+
+S3Guard
+
+1. May improve performance on directory listing/scanning operations,
+including those which take place during the partitioning period of query
+execution, the process where files are listed and the work divided up amongst
+processes.
+
+1. Permits a consistent view of the object store. Without this, changes in
+objects may not be immediately visible, especially in listing operations.
+
+1. Offers a platform for future performance improvements for running Hadoop
+workloads on top of object stores
+
+The basic idea is that, for each operation in the Hadoop S3 client (s3a) that
+reads or modifies metadata, a shadow copy of that metadata is stored in a
+separate MetadataStore implementation. Each MetadataStore implementation
+offers HDFS-like consistency for the metadata, and may also provide faster
+lookups for things like file status or directory listings.
+
+For links to early design documents and related patches, see
+[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345).
+
+*Important*
+
+* S3Guard is experimental and should be considered unstable.
+
+* While all underlying data is persisted in S3, if, for some reason,
+the S3Guard-cached metadata becomes inconsistent with that in S3,
+queries on the data may become incorrect.
+For example, new datasets may be omitted, objects may be overwritten,
+or clients may not be aware that some data has been deleted.
+It is essential for all clients writing to an S3Guard-enabled
+S3 Repository to use the feature. Clients reading the data may work directly
+with the S3A data, in which case the normal S3 consistency guarantees apply.
+
+
+## Setting up S3Guard
+
+The latest configuration parameters are defined in `core-default.xml`. You
+should consult that file for full information, but a summary is provided here.
+
+
+### 1. Choose the Database
+
+A core concept of S3Guard is that the directory listing data of the object
+store, *the metadata* is replicated in a higher-performance, consistent,
+database. In S3Guard, this database is called *The Metadata Store*
+
+By default, S3Guard is not enabled.
+
+The Metadata Store to use in production is bonded to Amazon's DynamoDB
+database service. The following setting will enable this Metadata Store:
+
+```xml
+<property>
+ <name>fs.s3a.metadatastore.impl</name>
+ <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
+</property>
+```
+
+Note that the `NullMetadataStore` store can be explicitly requested if desired.
+This offers no metadata storage, and effectively disables S3Guard.
+
+```xml
+<property>
+ <name>fs.s3a.metadatastore.impl</name>
+ <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
+</property>
+```
+
+### 2. Configure S3Guard Settings
+
+More settings will may be added in the future.
+Currently the only Metadata Store-independent setting, besides the
+implementation class above, is the *allow authoritative* flag.
+
+It is recommended that you leave the default setting here:
+
+```xml
+<property>
+ <name>fs.s3a.metadatastore.authoritative</name>
+ <value>false</value>
+</property>
+
+```
+
+Setting this to `true` is currently an experimental feature. When true, the
+S3A client will avoid round-trips to S3 when getting directory listings, if
+there is a fully-cached version of the directory stored in the Metadata Store.
+
+Note that if this is set to true, it may exacerbate or persist existing race
+conditions around multiple concurrent modifications and listings of a given
+directory tree.
+
+In particular: **If the Metadata Store is declared as authoritative,
+all interactions with the S3 bucket(s) must be through S3A clients sharing
+the same Metadata Store**
+
+
+### 3. Configure the Metadata Store.
+
+Here are the `DynamoDBMetadataStore` settings. Other Metadata Store
+implementations will have their own configuration parameters.
+
+
+### 4. Name Your Table
+
+First, choose the name of the table you wish to use for the S3Guard metadata
+storage in your DynamoDB instance. If you leave it unset/empty, a
+separate table will be created for each S3 bucket you access, and that
+bucket's name will be used for the name of the DynamoDB table. For example,
+this sets the table name to `my-ddb-table-name`
+
+```xml
+<property>
+ <name>fs.s3a.s3guard.ddb.table</name>
+ <value>my-ddb-table-name</value>
+ <description>
+ The DynamoDB table name to operate. Without this property, the respective
+ S3 bucket names will be used.
+ </description>
+</property>
+```
+
+It is good to share a table across multiple buckets for multiple reasons.
+
+1. You are billed for the I/O capacity allocated to the table,
+*even when the table is not used*. Sharing capacity can reduce costs.
+
+1. You can share the "provision burden" across the buckets. That is, rather
+than allocating for the peak load on a single bucket, you can allocate for
+the peak load *across all the buckets*, which is likely to be significantly
+lower.
+
+1. It's easier to measure and tune the load requirements and cost of
+S3Guard, because there is only one table to review and configure in the
+AWS management console.
+
+When wouldn't you want to share a table?
+
+1. When you do explicitly want to provision I/O capacity to a specific bucket
+and table, isolated from others.
+
+1. When you are using separate billing for specific buckets allocated
+to specific projects.
+
+1. When different users/roles have different access rights to different buckets.
+As S3Guard requires all users to have R/W access to the table, all users will
+be able to list the metadata in all buckets, even those to which they lack
+read access.
+
+### 5. Locate your Table
+
+You may also wish to specify the region to use for DynamoDB. If a region
+is not configured, S3A will assume that it is in the same region as the S3
+bucket. A list of regions for the DynamoDB service can be found in
+[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region).
+In this example, to use the US West 2 region:
+
+```xml
+<property>
+ <name>fs.s3a.s3guard.ddb.region</name>
+ <value>us-west-2</value>
+</property>
+```
+
+When working with S3Guard-managed buckets from EC2 VMs running in AWS
+infrastructure, using a local DynamoDB region ensures the lowest latency
+and highest reliability, as well as avoiding all long-haul network charges.
+The S3Guard tables, and indeed, the S3 buckets, should all be in the same
+region as the VMs.
+
+### 6. Optional: Create your Table
+
+Next, you can choose whether or not the table will be automatically created
+(if it doesn't already exist). If you want this feature, set the
+`fs.s3a.s3guard.ddb.table.create` option to `true`.
+
+```xml
+<property>
+ <name>fs.s3a.s3guard.ddb.table.create</name>
+ <value>true</value>
+ <description>
+ If true, the S3A client will create the table if it does not already exist.
+ </description>
+</property>
+```
+
+### 7. If creating a table: Set your DynamoDB IO Capacity
+
+Next, you need to set the DynamoDB read and write throughput requirements you
+expect to need for your cluster. Setting higher values will cost you more
+money. *Note* that these settings only affect table creation when
+`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
+an existing table, use the AWS console or CLI tool.
+
+For more details on DynamoDB capacity units, see the AWS page on [Capacity
+Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
+
+The charges are incurred per hour for the life of the table, *even when the
+table and the underlying S3 buckets are not being used*.
+
+There are also charges incurred for data storage and for data IO outside of the
+region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names
+and summary details of objects —the actual data is stored in S3, so billed at S3
+rates.
+
+```xml
+<property>
+ <name>fs.s3a.s3guard.ddb.table.capacity.read</name>
+ <value>500</value>
+ <description>
+ Provisioned throughput requirements for read operations in terms of capacity
+ units for the DynamoDB table. This config value will only be used when
+ creating a new DynamoDB table, though later you can manually provision by
+ increasing or decreasing read capacity as needed for existing tables.
+ See DynamoDB documents for more information.
+ </description>
+</property>
+
+<property>
+ <name>fs.s3a.s3guard.ddb.table.capacity.write</name>
+ <value>100</value>
+ <description>
+ Provisioned throughput requirements for write operations in terms of
+ capacity units for the DynamoDB table. Refer to related config
+ fs.s3a.s3guard.ddb.table.capacity.read before usage.
+ </description>
+</property>
+```
+
+Attempting to perform more IO than the capacity requested simply throttles the
+IO; small capacity numbers are recommended when initially experimenting
+with S3Guard.
+
+## Authenticating with S3Guard
+
+The DynamoDB metadata store takes advantage of the fact that the DynamoDB
+service uses the same authentication mechanisms as S3. S3Guard
+gets all its credentials from the S3A client that is using it.
+
+All existing S3 authentication mechanisms can be used, except for one
+exception. Credentials placed in URIs are not supported for S3Guard, for security
+reasons.
+
+## Per-bucket S3Guard configuration
+
+In production, it is likely only some buckets will have S3Guard enabled;
+those which are read-only may have disabled, for example. Equally importantly,
+buckets in different regions should have different tables, each
+in the relevant region.
+
+These options can be managed through S3A's [per-bucket configuration
+mechanism](./index.html#Configuring_different_S3_buckets).
+All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated
+to the options `fs.s3a.KEY` *for that bucket only*.
+
+As an example, here is a configuration to use different metadata stores
+and tables for different buckets
+
+First, we define shortcuts for the metadata store classnames
+
+
+```xml
+<property>
+ <name>s3guard.null</name>
+ <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
+</property>
+
+<property>
+ <name>s3guard.dynamo</name>
+ <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
+</property>
+```
+
+Next, Amazon's public landsat database is configured with no
+metadata store
+
+```xml
+<property>
+ <name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
+ <value>${s3guard.null}</value>
+ <description>The read-only landsat-pds repository isn't
+ managed by S3Guard</description>
+</property>
+```
+
+Next the `ireland-2` and `ireland-offline` buckets are configured with
+DynamoDB as the store, and a shared table `production-table`
+
+
+```xml
+<property>
+ <name>fs.s3a.bucket.ireland-2.metadatastore.impl</name>
+ <value>${s3guard.dynamo}</value>
+</property>
+
+<property>
+ <name>fs.s3a.bucket.ireland-offline.metadatastore.impl</name>
+ <value>${s3guard.dynamo}</value>
+</property>
+
+<property>
+ <name>fs.s3a.bucket.ireland-2.s3guard.ddb.table</name>
+ <value>production-table</value>
+</property>
+```
+
+The region of this table is automatically set to be that of the buckets,
+here `eu-west-1`; the same table name may actually be used in different
+regions.
+
+Together then, this configuration enables the DynamoDB Metadata Store
+for two buckets with a shared table, while disabling it for the public
+bucket.
+
+
+## S3Guard Command Line Interface (CLI)
+
+Note that in some cases an AWS region or `s3a://` URI can be provided.
+
+Metadata store URIs include a scheme that designates the backing store. For
+example (e.g. `dynamodb://table_name`;). As documented above, the
+AWS region can be inferred if the URI to an existing bucket is provided.
+
+
+The S3A URI must also be provided for per-bucket configuration options
+to be picked up. That is: when an s3a URL is provided on the command line,
+all its "resolved" per-bucket settings are used to connect to, authenticate
+with and configure the S3Guard table. If no such URL is provided, then
+the base settings are picked up.
+
+
+### Create a table: `s3guard init`
+
+```bash
+hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
+```
+
+Creates and initializes an empty metadata store.
+
+A DynamoDB metadata store can be initialized with additional parameters
+pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
+
+```bash
+[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
+```
+
+Example 1
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
+```
+
+Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
+in the same location as the bucket "ireland-1".
+
+
+Example 2
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com"
+
+
+### Import a bucket: `s3guard import`
+
+```bash
+hadoop s3guard import [-meta URI] s3a://BUCKET
+```
+
+Pre-populates a metadata store according to the current contents of an S3
+bucket. If the `-meta` option is omitted, the binding information is taken
+from the `core-site.xml` configuration.
+
+Example
+
+```bash
+hadoop s3guard import s3a://ireland-1
+```
+
+### Audit a table: `s3guard diff`
+
+```bash
+hadoop s3guard diff [-meta URI] s3a://BUCKET
+```
+
+Lists discrepancies between a metadata store and bucket. Note that depending on
+how S3Guard is used, certain discrepancies are to be expected.
+
+Example
+
+```bash
+hadoop s3guard diff s3a://ireland-1
+```
+
+### Delete a table: `s3guard destroy`
+
+
+Deletes a metadata store. With DynamoDB as the store, this means
+the specific DynamoDB table use to store the metadata.
+
+```bash
+hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET )
+```
+
+This *does not* delete the bucket, only the S3Guard table which it is bound
+to.
+
+
+Examples
+
+```bash
+hadoop s3guard destroy s3a://ireland-1
+```
+
+Deletes the table which the bucket ireland-1 is configured to use
+as its MetadataStore.
+
+```bash
+hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1
+```
+
+
+
+### Clean up a table, `s3guard prune`
+
+Delete all file entries in the MetadataStore table whose object "modification
+time" is older than the specified age.
+
+```bash
+hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
+ [-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
+```
+
+A time value must be supplied.
+
+1. This does not delete the entries in the bucket itself.
+1. The modification time is effectively the creation time of the objects
+in the S3 Bucket.
+1. Even when an S3A URI is supplied, all entries in the table older than
+a specific age are deleted — even those from other buckets.
+
+Example
+
+```bash
+hadoop s3guard prune -days 7 s3a://ireland-1
+```
+
+Deletes all entries in the S3Guard table for files older than seven days from
+the table associated with `s3a://ireland-1`.
+
+```bash
+hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Delete all entries more than 90 minutes old from the table "ireland-team" in
+the region "eu-west-1".
+
+
+
+## Debugging and Error Handling
+
+If you run into network connectivity issues, or have a machine failure in the
+middle of an operation, you may end up with your metadata store having state
+that differs from S3. The S3Guard CLI commands, covered in the CLI section
+above, can be used to diagnose and repair these issues.
+
+There are some logs whose log level can be increased to provide more
+information.
+
+```properties
+# Log S3Guard classes
+log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
+
+# Log all S3A classes
+log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+
+# Enable debug logging of AWS DynamoDB client
+log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB
+
+# Log all HTTP requests made; includes S3 interaction. This may
+# include sensitive information such as account IDs in HTTP headers.
+log4j.logger.com.amazonaws.request=DEBUG
+
+```
+
+If all else fails, S3Guard is designed to allow for easy recovery by deleting
+the metadata store data. In DynamoDB, this can be accomplished by simply
+deleting the table, and allowing S3Guard to recreate it from scratch. Note
+that S3Guard tracks recent changes to file metadata to implement consistency.
+Deleting the metadata store table will simply result in a period of eventual
+consistency for any file modifications that were made right before the table
+was deleted.
+
+### Failure Semantics
+
+Operations which modify metadata will make changes to S3 first. If, and only
+if, those operations succeed, the equivalent changes will be made to the
+Metadata Store.
+
+These changes to S3 and Metadata Store are not fully-transactional: If the S3
+operations succeed, and the subsequent Metadata Store updates fail, the S3
+changes will *not* be rolled back. In this case, an error message will be
+logged.
+
+### Versioning
+
+S3Guard tables are created with a version marker, an entry with the primary
+key and child entry of `../VERSION`; the use of a relative path guarantees
+that it will not be resolved.
+
+#### Versioning policy.
+
+1. The version number of an S3Guard table will only be incremented when
+an incompatible change is made to the table structure —that is, the structure
+has changed so that it is no longer readable by older versions, or because
+it has added new mandatory fields which older versions do not create.
+1. The version number of S3Guard tables will only be changed by incrementing
+the value.
+1. Updated versions of S3Guard MAY continue to support older version tables.
+1. If an incompatible change is made such that existing tables are not compatible,
+then a means shall be provided to update existing tables. For example:
+an option in the Command Line Interface, or an option to upgrade tables
+during S3Guard initialization.
+
+*Note*: this policy does not indicate any intent to upgrade table structures
+in an incompatible manner. The version marker in tables exists to support
+such an option if it ever becomes necessary, by ensuring that all S3Guard
+client can recognise any version mismatch.
+
+### Security
+
+All users of the DynamoDB table must have write access to it. This
+effectively means they must have write access to the entire object store.
+
+There's not been much testing of using a S3Guard Metadata Store
+with a read-only S3 Bucket. It *should* work, provided all users
+have write access to the DynamoDB table. And, as updates to the Metadata Store
+are only made after successful file creation, deletion and rename, the
+store is *unlikely* to get out of sync, it is still something which
+merits more testing before it could be considered reliable.
+
+### Troubleshooting
+
+#### Error: `S3Guard table lacks version marker.`
+
+The table which was intended to be used as a S3guard metadata store
+does not have any version marker indicating that it is a S3Guard table.
+
+It may be that this is not a S3Guard table.
+
+* Make sure that this is the correct table name.
+* Delete the table, so it can be rebuilt.
+
+#### Error: `Database table is from an incompatible S3Guard version`
+
+This indicates that the version of S3Guard which created (or possibly updated)
+the database table is from a different version that that expected by the S3A
+client.
+
+This error will also include the expected and actual version numbers.
+
+If the expected version is lower than the actual version, then the version
+of the S3A client library is too old to interact with this S3Guard-managed
+bucket. Upgrade the application/library.
+
+If the expected version is higher than the actual version, then the table
+itself will need upgrading.
+
+#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"`
+
+S3Guard could not find the DynamoDB table for the Metadata Store,
+and it was not configured to create it. Either the table was missing,
+or the configuration is preventing S3Guard from finding the table.
+
+1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct.
+1. If the region for an existing table has been set in
+`fs.s3a.s3guard.ddb.region`, verify that the value is correct.
+1. If the region is not set, verify that the table exists in the same
+region as the bucket being used.
+1. Create the table if necessary.
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org