You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/09/04 13:58:17 UTC
[hadoop] branch trunk updated: HADOOP-17227. S3A Marker Tool tuning
(#2254)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 5346cc3 HADOOP-17227. S3A Marker Tool tuning (#2254)
5346cc3 is described below
commit 5346cc3263757d2761f27ebcc4f3587592c5692e
Author: Steve Loughran <st...@cloudera.com>
AuthorDate: Fri Sep 4 14:58:03 2020 +0100
HADOOP-17227. S3A Marker Tool tuning (#2254)
Contributed by Steve Loughran.
---
.../apache/hadoop/fs/s3a/s3guard/S3GuardTool.java | 30 +-
.../org/apache/hadoop/fs/s3a/tools/MarkerTool.java | 423 +++++++++++++++++----
.../markdown/tools/hadoop-aws/directory_markers.md | 7 +-
.../apache/hadoop/fs/s3a/AbstractS3ATestBase.java | 19 +-
.../fs/s3a/tools/AbstractMarkerToolTest.java | 15 +-
.../hadoop/fs/s3a/tools/ITestMarkerTool.java | 9 +-
6 files changed, 394 insertions(+), 109 deletions(-)
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index f89777f..1aa310d 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -46,6 +46,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -758,8 +759,8 @@ public abstract class S3GuardTool extends Configured implements Tool,
*/
static class Destroy extends S3GuardTool {
public static final String NAME = "destroy";
- public static final String PURPOSE = "destroy Metadata Store data "
- + DATA_IN_S3_IS_PRESERVED;
+ public static final String PURPOSE = "destroy the Metadata Store including its"
+ + " contents" + DATA_IN_S3_IS_PRESERVED;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
@@ -1252,7 +1253,7 @@ public abstract class S3GuardTool extends Configured implements Tool,
@VisibleForTesting
public static final String IS_MARKER_AWARE =
- "The S3A connector is compatible with buckets where"
+ "\tThe S3A connector is compatible with buckets where"
+ " directory markers are not deleted";
public BucketInfo(Configuration conf) {
@@ -1328,8 +1329,9 @@ public abstract class S3GuardTool extends Configured implements Tool,
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
- println(out, "\tMetadata time to live: %s=%s milliseconds",
- METADATASTORE_METADATA_TTL, ttl);
+ println(out, "\tMetadata time to live: (set in %s) = %s",
+ METADATASTORE_METADATA_TTL,
+ DurationFormatUtils.formatDurationHMS(ttl));
printStoreDiagnostics(out, store);
} else {
println(out, "Filesystem %s is not using S3Guard", fsUri);
@@ -1463,10 +1465,18 @@ public abstract class S3GuardTool extends Configured implements Tool,
private void processMarkerOption(final PrintStream out,
final S3AFileSystem fs,
final String marker) {
+ println(out, "%nSecurity");
DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy();
String desc = markerPolicy.describe();
- println(out, "%nThe directory marker policy is \"%s\"%n", desc);
-
+ println(out, "\tThe directory marker policy is \"%s\"", desc);
+
+ String pols = DirectoryPolicyImpl.availablePolicies()
+ .stream()
+ .map(DirectoryPolicy.MarkerPolicy::getOptionName)
+ .collect(Collectors.joining(", "));
+ println(out, "\tAvailable Policies: %s", pols);
+ printOption(out, "\tAuthoritative paths",
+ AUTHORITATIVE_PATH, "");
DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy();
String desiredMarker = marker == null
@@ -1478,12 +1488,6 @@ public abstract class S3GuardTool extends Configured implements Tool,
// simple awareness test -provides a way to validate compatibility
// on the command line
println(out, IS_MARKER_AWARE);
- String pols = DirectoryPolicyImpl.availablePolicies()
- .stream()
- .map(DirectoryPolicy.MarkerPolicy::getOptionName)
- .collect(Collectors.joining(", "));
- println(out, "Available Policies: %s", pols);
-
} else {
// compare with current policy
if (!optionName.equalsIgnoreCase(desiredMarker)) {
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
index 6855c52..f817223 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.UnknownStoreException;
import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker;
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
+import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
import org.apache.hadoop.fs.shell.CommandFormat;
@@ -113,9 +114,14 @@ public final class MarkerTool extends S3GuardTool {
public static final String CLEAN = "-" + OPT_CLEAN;
/**
- * Expected number of markers to find: {@value}.
+ * Min number of markers to find: {@value}.
*/
- public static final String OPT_EXPECTED = "expected";
+ public static final String OPT_MIN = "min";
+
+ /**
+ * Max number of markers to find: {@value}.
+ */
+ public static final String OPT_MAX = "max";
/**
* Name of a file to save the list of markers to: {@value}.
@@ -152,12 +158,20 @@ public final class MarkerTool extends S3GuardTool {
/**
+ * Constant to use when there is no minimum number of
+ * markers: {@value}.
+ */
+ public static final int UNLIMITED_MIN_MARKERS = -1;
+
+
+ /**
* Usage string: {@value}.
*/
private static final String USAGE = MARKERS
+ " (-" + OPT_AUDIT
+ " | -" + OPT_CLEAN + ")"
- + " [-" + OPT_EXPECTED + " <count>]"
+ + " [-" + OPT_MIN + " <count>]"
+ + " [-" + OPT_MAX + " <count>]"
+ " [-" + OPT_OUT + " <filename>]"
+ " [-" + OPT_LIMIT + " <limit>]"
+ " [-" + OPT_NONAUTH + "]"
@@ -195,7 +209,8 @@ public final class MarkerTool extends S3GuardTool {
VERBOSE,
OPT_NONAUTH);
CommandFormat format = getCommandFormat();
- format.addOptionWithValue(OPT_EXPECTED);
+ format.addOptionWithValue(OPT_MIN);
+ format.addOptionWithValue(OPT_MAX);
format.addOptionWithValue(OPT_LIMIT);
format.addOptionWithValue(OPT_OUT);
}
@@ -231,8 +246,7 @@ public final class MarkerTool extends S3GuardTool {
if (parsedArgs.size() != 1) {
errorln(getUsage());
println(out, "Supplied arguments: ["
- + parsedArgs.stream()
- .collect(Collectors.joining(", "))
+ + String.join(", ", parsedArgs)
+ "]");
throw new ExitUtil.ExitException(EXIT_USAGE,
String.format(E_ARGUMENTS, parsedArgs.size()));
@@ -241,12 +255,11 @@ public final class MarkerTool extends S3GuardTool {
CommandFormat command = getCommandFormat();
verbose = command.getOpt(VERBOSE);
- // How many markers are expected?
- int expected = 0;
- String value = command.getOptValue(OPT_EXPECTED);
- if (value != null && !value.isEmpty()) {
- expected = Integer.parseInt(value);
- }
+ // minimum number of markers expected
+ int expectedMin = getOptValue(OPT_MIN, 0);
+ // max number of markers allowed
+ int expectedMax = getOptValue(OPT_MAX, 0);
+
// determine the action
boolean audit = command.getOpt(OPT_AUDIT);
@@ -258,11 +271,7 @@ public final class MarkerTool extends S3GuardTool {
throw new ExitUtil.ExitException(EXIT_USAGE,
"Exactly one of " + AUDIT + " and " + CLEAN);
}
- int limit = UNLIMITED_LISTING;
- value = command.getOptValue(OPT_LIMIT);
- if (value != null && !value.isEmpty()) {
- limit = Integer.parseInt(value);
- }
+ int limit = getOptValue(OPT_LIMIT, UNLIMITED_LISTING);
final String dir = parsedArgs.get(0);
Path path = new Path(dir);
URI uri = path.toUri();
@@ -271,13 +280,17 @@ public final class MarkerTool extends S3GuardTool {
path = new Path(path, "/");
}
FileSystem fs = path.getFileSystem(getConf());
+ boolean nonAuth = command.getOpt(OPT_NONAUTH);
ScanResult result = execute(
- fs,
- path,
- clean,
- expected,
- limit,
- command.getOpt(OPT_NONAUTH));
+ new ScanArgsBuilder()
+ .withSourceFS(fs)
+ .withPath(path)
+ .withDoPurge(clean)
+ .withMinMarkerCount(expectedMin)
+ .withMaxMarkerCount(expectedMax)
+ .withLimit(limit)
+ .withNonAuth(nonAuth)
+ .build());
if (verbose) {
dumpFileSystemStatistics(out);
}
@@ -300,30 +313,43 @@ public final class MarkerTool extends S3GuardTool {
IOUtils.writeLines(surplus, "\n", writer);
}
}
- return result.exitCode;
+
+ return result.finish();
+ }
+
+ /**
+ * Get the value of an option, or the default if the option
+ * is unset/empty.
+ * @param option option key
+ * @param defVal default
+ * @return the value to use
+ */
+ private int getOptValue(String option, int defVal) {
+ CommandFormat command = getCommandFormat();
+ String value = command.getOptValue(option);
+ if (value != null && !value.isEmpty()) {
+ try {
+ return Integer.parseInt(value);
+ } catch (NumberFormatException e) {
+ throw new ExitUtil.ExitException(EXIT_USAGE,
+ String.format("Argument for %s is not a number: %s",
+ option, value));
+ }
+ } else {
+ return defVal;
+ }
}
/**
* Execute the scan/purge.
- * @param sourceFS source FS; must be or wrap an S3A FS.
- * @param path path to scan.
- * @param doPurge purge?
- * @param expectedMarkerCount expected marker count
- * @param limit limit of files to scan; -1 for 'unlimited'
- * @param nonAuth consider only markers in nonauth paths as errors
- * @return scan+purge result.
+ *
+ * @param scanArgs@return scan+purge result.
* @throws IOException failure
*/
@VisibleForTesting
- ScanResult execute(
- final FileSystem sourceFS,
- final Path path,
- final boolean doPurge,
- final int expectedMarkerCount,
- final int limit,
- final boolean nonAuth)
+ ScanResult execute(final ScanArgs scanArgs)
throws IOException {
- S3AFileSystem fs = bindFilesystem(sourceFS);
+ S3AFileSystem fs = bindFilesystem(scanArgs.getSourceFS());
// extract the callbacks needed for the rest of the work
storeContext = fs.createStoreContext();
@@ -344,6 +370,7 @@ public final class MarkerTool extends S3GuardTool {
println(out, "Authoritative path list is \"%s\"", authPath);
}
// qualify the path
+ Path path = scanArgs.getPath();
Path target = path.makeQualified(fs.getUri(), new Path("/"));
// initial safety check: does the path exist?
try {
@@ -360,10 +387,19 @@ public final class MarkerTool extends S3GuardTool {
}
// the default filter policy is that all entries should be deleted
- DirectoryPolicy filterPolicy = nonAuth
- ? activePolicy
- : null;
- ScanResult result = scan(target, doPurge, expectedMarkerCount, limit,
+ DirectoryPolicy filterPolicy;
+ if (scanArgs.isNonAuth()) {
+ filterPolicy = new DirectoryPolicyImpl(
+ DirectoryPolicy.MarkerPolicy.Authoritative,
+ fs::allowAuthoritative);
+ } else {
+ filterPolicy = null;
+ }
+ ScanResult result = scan(target,
+ scanArgs.isDoPurge(),
+ scanArgs.getMaxMarkerCount(),
+ scanArgs.getMinMarkerCount(),
+ scanArgs.getLimit(),
filterPolicy);
return result;
}
@@ -379,6 +415,22 @@ public final class MarkerTool extends S3GuardTool {
private int exitCode;
/**
+ * Text to include if raising an exception.
+ */
+ private String exitText = "";
+
+ /**
+ * Count of all markers found.
+ */
+ private int totalMarkerCount;
+
+ /**
+ * Count of all markers found after excluding
+ * any from a [-nonauth] qualification.
+ */
+ private int filteredMarkerCount;
+
+ /**
* The tracker.
*/
private DirMarkerTracker tracker;
@@ -395,6 +447,9 @@ public final class MarkerTool extends S3GuardTool {
public String toString() {
return "ScanResult{" +
"exitCode=" + exitCode +
+ ", exitText=" + exitText +
+ ", totalMarkerCount=" + totalMarkerCount +
+ ", filteredMarkerCount=" + filteredMarkerCount +
", tracker=" + tracker +
", purgeSummary=" + purgeSummary +
'}';
@@ -414,13 +469,34 @@ public final class MarkerTool extends S3GuardTool {
public MarkerPurgeSummary getPurgeSummary() {
return purgeSummary;
}
+
+ public int getTotalMarkerCount() {
+ return totalMarkerCount;
+ }
+
+ public int getFilteredMarkerCount() {
+ return filteredMarkerCount;
+ }
+
+ /**
+ * Throw an exception if the exit code is non-zero.
+ * @return 0 if everything is good.
+ * @throws ExitUtil.ExitException if code != 0
+ */
+ public int finish() throws ExitUtil.ExitException {
+ if (exitCode != 0) {
+ throw new ExitUtil.ExitException(exitCode, exitText);
+ }
+ return 0;
+ }
}
/**
* Do the scan/purge.
* @param path path to scan.
- * @param clean purge?
- * @param expectedMarkerCount expected marker count
+ * @param doPurge purge rather than just scan/audit?
+ * @param minMarkerCount min marker count (ignored on purge)
+ * @param maxMarkerCount max marker count (ignored on purge)
* @param limit limit of files to scan; 0 for 'unlimited'
* @param filterPolicy filter policy on a nonauth scan; may be null
* @return result.
@@ -430,8 +506,9 @@ public final class MarkerTool extends S3GuardTool {
@Retries.RetryTranslated
private ScanResult scan(
final Path path,
- final boolean clean,
- final int expectedMarkerCount,
+ final boolean doPurge,
+ final int minMarkerCount,
+ final int maxMarkerCount,
final int limit,
final DirectoryPolicy filterPolicy)
throws IOException, ExitUtil.ExitException {
@@ -458,13 +535,16 @@ public final class MarkerTool extends S3GuardTool {
= tracker.getSurplusMarkers();
Map<Path, DirMarkerTracker.Marker> leafMarkers
= tracker.getLeafMarkers();
- int surplus = surplusMarkers.size();
- if (surplus == 0) {
+ // determine marker count
+ int markerCount = surplusMarkers.size();
+ result.totalMarkerCount = markerCount;
+ result.filteredMarkerCount = markerCount;
+ if (markerCount == 0) {
println(out, "No surplus directory markers were found under %s", path);
} else {
println(out, "Found %d surplus directory marker%s under %s",
- surplus,
- suffix(surplus),
+ markerCount,
+ suffix(markerCount),
path);
for (Path markers : surplusMarkers.keySet()) {
@@ -482,9 +562,9 @@ public final class MarkerTool extends S3GuardTool {
println(out, "These are required to indicate empty directories");
}
- if (clean) {
+ if (doPurge) {
// clean: remove the markers, do not worry about their
- // presence when reporting success/failiure
+ // presence when reporting success/failure
int deletePageSize = storeContext.getConfiguration()
.getInt(BULK_DELETE_PAGE_SIZE,
BULK_DELETE_PAGE_SIZE_DEFAULT);
@@ -503,29 +583,46 @@ public final class MarkerTool extends S3GuardTool {
allowed.forEach(p -> println(out, p.toString()));
}
// recalculate the marker size
- surplus = surplusMarkers.size();
+ markerCount = surplusMarkers.size();
+ result.filteredMarkerCount = markerCount;
}
- if (surplus > expectedMarkerCount) {
+ if (markerCount < minMarkerCount || markerCount > maxMarkerCount) {
// failure
- if (expectedMarkerCount > 0) {
- println(out, "Expected %d marker%s", expectedMarkerCount,
- suffix(surplus));
- }
- println(out, "Surplus markers were found -failing audit");
-
- result.exitCode = EXIT_NOT_ACCEPTABLE;
+ return failScan(result, EXIT_NOT_ACCEPTABLE,
+ "Marker count %d out of range "
+ + "[%d - %d]",
+ markerCount, minMarkerCount, maxMarkerCount);
}
}
// now one little check for whether a limit was reached.
if (!completed) {
- println(out, "Listing limit reached before completing the scan");
- result.exitCode = EXIT_INTERRUPTED;
+ failScan(result, EXIT_INTERRUPTED,
+ "Listing limit (%d) reached before completing the scan", limit);
}
return result;
}
/**
+ * Fail the scan; print the formatted error and update the result.
+ * @param result result to update
+ * @param code Exit code
+ * @param message Error message
+ * @param args arguments for the error message
+ * @return scan result
+ */
+ private ScanResult failScan(
+ ScanResult result,
+ int code,
+ String message,
+ Object...args) {
+ String text = String.format(message, args);
+ result.exitCode = code;
+ result.exitText = text;
+ return result;
+ }
+
+ /**
* Suffix for plurals.
* @param size size to generate a suffix for
* @return "" or "s", depending on size
@@ -587,7 +684,7 @@ public final class MarkerTool extends S3GuardTool {
* Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)};
* included in {@link ScanResult} so must share visibility.
*/
- static final class MarkerPurgeSummary {
+ public static final class MarkerPurgeSummary {
/** Number of markers deleted. */
private int markersDeleted;
@@ -613,14 +710,26 @@ public final class MarkerTool extends S3GuardTool {
}
+ /**
+ * Count of markers deleted.
+ * @return a number, zero when prune==false.
+ */
int getMarkersDeleted() {
return markersDeleted;
}
+ /**
+ * Count of bulk delete requests issued.
+ * @return count of calls made to S3.
+ */
int getDeleteRequests() {
return deleteRequests;
}
+ /**
+ * Total duration of delete requests.
+ * @return a time interval in millis.
+ */
long getTotalDeleteRequestDuration() {
return totalDeleteRequestDuration;
}
@@ -699,25 +808,181 @@ public final class MarkerTool extends S3GuardTool {
/**
* Execute the marker tool, with no checks on return codes.
*
- * @param sourceFS filesystem to use
- * @param path path to scan
- * @param doPurge should markers be purged
- * @param expectedMarkers number of markers expected
- * @param limit limit of files to scan; -1 for 'unlimited'
- * @param nonAuth only use nonauth path count for failure rules
+ * @param scanArgs set of args for the scanner.
* @return the result
*/
@SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
public static MarkerTool.ScanResult execMarkerTool(
- final FileSystem sourceFS,
- final Path path,
- final boolean doPurge,
- final int expectedMarkers,
- final int limit, boolean nonAuth) throws IOException {
- MarkerTool tool = new MarkerTool(sourceFS.getConf());
+ ScanArgs scanArgs) throws IOException {
+ MarkerTool tool = new MarkerTool(scanArgs.getSourceFS().getConf());
tool.setVerbose(LOG.isDebugEnabled());
- return tool.execute(sourceFS, path, doPurge,
- expectedMarkers, limit, nonAuth);
+ return tool.execute(scanArgs);
+ }
+
+ /**
+ * Arguments for the scan.
+ * <p></p>
+ * Uses a builder/argument object because too many arguments were
+ * being created and it was making maintenance harder.
+ */
+ public static final class ScanArgs {
+
+ /** Source FS; must be or wrap an S3A FS. */
+ private final FileSystem sourceFS;
+
+ /** Path to scan. */
+ private final Path path;
+
+ /** Purge? */
+ private final boolean doPurge;
+
+ /** Min marker count (ignored on purge). */
+ private final int minMarkerCount;
+
+ /** Max marker count (ignored on purge). */
+ private final int maxMarkerCount;
+
+ /** Limit of files to scan; 0 for 'unlimited'. */
+ private final int limit;
+
+ /** Consider only markers in nonauth paths as errors. */
+ private final boolean nonAuth;
+
+ /**
+ * @param sourceFS source FS; must be or wrap an S3A FS.
+ * @param path path to scan.
+ * @param doPurge purge?
+ * @param minMarkerCount min marker count (ignored on purge)
+ * @param maxMarkerCount max marker count (ignored on purge)
+ * @param limit limit of files to scan; 0 for 'unlimited'
+ * @param nonAuth consider only markers in nonauth paths as errors
+ */
+ private ScanArgs(final FileSystem sourceFS,
+ final Path path,
+ final boolean doPurge,
+ final int minMarkerCount,
+ final int maxMarkerCount,
+ final int limit,
+ final boolean nonAuth) {
+ this.sourceFS = sourceFS;
+ this.path = path;
+ this.doPurge = doPurge;
+ this.minMarkerCount = minMarkerCount;
+ this.maxMarkerCount = maxMarkerCount;
+ this.limit = limit;
+ this.nonAuth = nonAuth;
+ }
+
+ FileSystem getSourceFS() {
+ return sourceFS;
+ }
+
+ Path getPath() {
+ return path;
+ }
+
+ boolean isDoPurge() {
+ return doPurge;
+ }
+
+ int getMinMarkerCount() {
+ return minMarkerCount;
+ }
+
+ int getMaxMarkerCount() {
+ return maxMarkerCount;
+ }
+
+ int getLimit() {
+ return limit;
+ }
+
+ boolean isNonAuth() {
+ return nonAuth;
+ }
+ }
+
+ /**
+ * Builder of the scan arguments.
+ */
+ public static final class ScanArgsBuilder {
+
+ /** Source FS; must be or wrap an S3A FS. */
+ private FileSystem sourceFS;
+
+ /** Path to scan. */
+ private Path path;
+
+ /** Purge? */
+ private boolean doPurge = false;
+
+ /** Min marker count (ignored on purge). */
+ private int minMarkerCount = 0;
+
+ /** Max marker count (ignored on purge). */
+ private int maxMarkerCount = 0;
+
+ /** Limit of files to scan; 0 for 'unlimited'. */
+ private int limit = UNLIMITED_LISTING;
+
+ /** Consider only markers in nonauth paths as errors. */
+ private boolean nonAuth = false;
+
+ /** Source FS; must be or wrap an S3A FS. */
+ public ScanArgsBuilder withSourceFS(final FileSystem source) {
+ this.sourceFS = source;
+ return this;
+ }
+
+ /** Path to scan. */
+ public ScanArgsBuilder withPath(final Path p) {
+ this.path = p;
+ return this;
+ }
+
+ /** Purge? */
+ public ScanArgsBuilder withDoPurge(final boolean d) {
+ this.doPurge = d;
+ return this;
+ }
+
+ /** Min marker count (ignored on purge). */
+ public ScanArgsBuilder withMinMarkerCount(final int min) {
+ this.minMarkerCount = min;
+ return this;
+ }
+
+ /** Max marker count (ignored on purge). */
+ public ScanArgsBuilder withMaxMarkerCount(final int max) {
+ this.maxMarkerCount = max;
+ return this;
+ }
+
+ /** Limit of files to scan; 0 for 'unlimited'. */
+ public ScanArgsBuilder withLimit(final int l) {
+ this.limit = l;
+ return this;
+ }
+
+ /** Consider only markers in nonauth paths as errors. */
+ public ScanArgsBuilder withNonAuth(final boolean b) {
+ this.nonAuth = b;
+ return this;
+ }
+
+ /**
+ * Build the actual argument instance.
+ * @return the arguments to pass in
+ */
+ public ScanArgs build() {
+ return new ScanArgs(sourceFS,
+ path,
+ doPurge,
+ minMarkerCount,
+ maxMarkerCount,
+ limit,
+ nonAuth);
+ }
}
}
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md
index e9622ce..53030d6 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md
@@ -366,7 +366,7 @@ Syntax
```
> hadoop s3guard markers -verbose -nonauth
-markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH>
+markers (-audit | -clean) [-min <count>] [-max <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH>
View and manipulate S3 directory markers
```
@@ -377,7 +377,8 @@ markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>]
|-------------------------|-------------------------|
| `-audit` | Audit the path for surplus markers |
| `-clean` | Clean all surplus markers under a path |
-| `-expected <count>]` | Expected number of markers to find (primarily for testing) |
+| `-min <count>` | Minimum number of markers an audit must find (default: 0) |
+| `-max <count>]` | Minimum number of markers an audit must find (default: 0) |
| `-limit <count>]` | Limit the number of objects to scan |
| `-nonauth` | Only consider markers in non-authoritative paths as errors |
| `-out <filename>` | Save a list of all markers found to the nominated file |
@@ -489,7 +490,7 @@ The `markers clean` command will clean the directory tree of all surplus markers
The `-verbose` option prints more detail on the operation as well as some IO statistics
```
-> hadoop s3guard markers -verbose -clean s3a://london/
+> hadoop s3guard markers -clean -verbose s3a://london/
2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths
The directory marker policy of s3a://london is "Authoritative"
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
index a2ee9ea..da679cd 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
@@ -90,10 +90,23 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
&& !fs.getDirectoryMarkerPolicy()
.keepDirectoryMarkers(methodPath)
&& fs.isDirectory(methodPath)) {
- MarkerTool.ScanResult result = MarkerTool.execMarkerTool(fs,
- methodPath, true, 0, UNLIMITED_LISTING, false);
- assertEquals("Audit of " + methodPath + " failed: " + result,
+ MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
+ new MarkerTool.ScanArgsBuilder()
+ .withSourceFS(fs)
+ .withPath(methodPath)
+ .withDoPurge(true)
+ .withMinMarkerCount(0)
+ .withMaxMarkerCount(0)
+ .withLimit(UNLIMITED_LISTING)
+ .withNonAuth(false)
+ .build());
+ final String resultStr = result.toString();
+ assertEquals("Audit of " + methodPath + " failed: "
+ + resultStr,
0, result.getExitCode());
+ assertEquals("Marker Count under " + methodPath
+ + " non-zero: " + resultStr,
+ 0, result.getFilteredMarkerCount());
}
} catch (FileNotFoundException ignored) {
} catch (Exception e) {
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java
index 00e62d9..797d33c 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java
@@ -309,11 +309,15 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
final boolean nonAuth) throws IOException {
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
- sourceFS,
- path,
- doPurge,
- expectedMarkers,
- limit, nonAuth);
+ new MarkerTool.ScanArgsBuilder()
+ .withSourceFS(sourceFS)
+ .withPath(path)
+ .withDoPurge(doPurge)
+ .withMinMarkerCount(expectedMarkers)
+ .withMaxMarkerCount(expectedMarkers)
+ .withLimit(limit)
+ .withNonAuth(nonAuth)
+ .build());
Assertions.assertThat(result.getExitCode())
.describedAs("Exit code of marker(%s, %s, %d) -> %s",
path, doPurge, expectedMarkers, result)
@@ -330,5 +334,4 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
return "-" + s;
}
-
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java
index 4a81b1a..4c11a33 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java
@@ -259,7 +259,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
AUDIT,
m(OPT_LIMIT), 0,
m(OPT_OUT), audit,
- m(OPT_EXPECTED), expectedMarkersWithBaseDir,
+ m(OPT_MIN), expectedMarkersWithBaseDir,
+ m(OPT_MAX), expectedMarkersWithBaseDir,
createdPaths.base);
expectMarkersInOutput(audit, expectedMarkersWithBaseDir);
}
@@ -286,9 +287,6 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
m(OPT_LIMIT), 2,
CLEAN,
createdPaths.base);
- run(MARKERS, V,
- AUDIT,
- createdPaths.base);
}
/**
@@ -302,7 +300,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
describe("Audit a few thousand landsat objects");
final File audit = tempAuditFile();
- run(MARKERS,
+ runToFailure(EXIT_INTERRUPTED,
+ MARKERS,
AUDIT,
m(OPT_LIMIT), 3000,
m(OPT_OUT), audit,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org