You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2016/06/03 16:33:31 UTC
[3/6] hadoop git commit: HADOOP-13171. Add StorageStatistics to S3A;
instrument some more operations. Contributed by Steve Loughran.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
new file mode 100644
index 0000000..d29cb2f
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+/**
+ * Statistic which are collected in S3A.
+ * These statistics are available at a low level in {@link S3AStorageStatistics}
+ * and as metrics in {@link S3AInstrumentation}
+ */
+public enum Statistic {
+
+ DIRECTORIES_CREATED("directories_created",
+ "Total number of directories created through the object store."),
+ DIRECTORIES_DELETED("directories_deleted",
+ "Total number of directories deleted through the object store."),
+ FILES_COPIED("files_copied",
+ "Total number of files copied within the object store."),
+ FILES_COPIED_BYTES("files_copied_bytes",
+ "Total number of bytes copied within the object store."),
+ FILES_CREATED("files_created",
+ "Total number of files created through the object store."),
+ FILES_DELETED("files_deleted",
+ "Total number of files deleted from the object store."),
+ IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
+ INVOCATION_COPY_FROM_LOCAL_FILE("invocations_copyfromlocalfile",
+ "Calls of copyFromLocalFile()"),
+ INVOCATION_EXISTS("invocations_exists",
+ "Calls of exists()"),
+ INVOCATION_GET_FILE_STATUS("invocations_getfilestatus",
+ "Calls of getFileStatus()"),
+ INVOCATION_GLOB_STATUS("invocations_globstatus",
+ "Calls of globStatus()"),
+ INVOCATION_IS_DIRECTORY("invocations_is_directory",
+ "Calls of isDirectory()"),
+ INVOCATION_IS_FILE("invocations_is_file",
+ "Calls of isFile()"),
+ INVOCATION_LIST_FILES("invocations_listfiles",
+ "Calls of listFiles()"),
+ INVOCATION_LIST_LOCATED_STATUS("invocations_listlocatedstatus",
+ "Calls of listLocatedStatus()"),
+ INVOCATION_LIST_STATUS("invocations_liststatus",
+ "Calls of listStatus()"),
+ INVOCATION_MKDIRS("invocations_mdkirs",
+ "Calls of mkdirs()"),
+ INVOCATION_RENAME("invocations_rename",
+ "Calls of rename()"),
+ OBJECT_COPY_REQUESTS("object_copy_requests", "Object copy requests"),
+ OBJECT_DELETE_REQUESTS("object_delete_requests", "Object delete requests"),
+ OBJECT_LIST_REQUESTS("object_list_requests",
+ "Number of object listings made"),
+ OBJECT_METADATA_REQUESTS("object_metadata_requests",
+ "Number of requests for object metadata"),
+ OBJECT_MULTIPART_UPLOAD_ABORTED("object_multipart_aborted",
+ "Object multipart upload aborted"),
+ OBJECT_PUT_REQUESTS("object_put_requests",
+ "Object put/multipart upload count"),
+ OBJECT_PUT_BYTES("object_put_bytes", "number of bytes uploaded"),
+ STREAM_ABORTED("streamAborted",
+ "Count of times the TCP stream was aborted"),
+ STREAM_BACKWARD_SEEK_OPERATIONS("streamBackwardSeekOperations",
+ "Number of executed seek operations which went backwards in a stream"),
+ STREAM_CLOSED("streamClosed", "Count of times the TCP stream was closed"),
+ STREAM_CLOSE_OPERATIONS("streamCloseOperations",
+ "Total count of times an attempt to close a data stream was made"),
+ STREAM_FORWARD_SEEK_OPERATIONS("streamForwardSeekOperations",
+ "Number of executed seek operations which went forward in a stream"),
+ STREAM_OPENED("streamOpened",
+ "Total count of times an input stream to object store was opened"),
+ STREAM_READ_EXCEPTIONS("streamReadExceptions",
+ "Number of seek operations invoked on input streams"),
+ STREAM_READ_FULLY_OPERATIONS("streamReadFullyOperations",
+ "count of readFully() operations in streams"),
+ STREAM_READ_OPERATIONS("streamReadOperations",
+ "Count of read() operations in streams"),
+ STREAM_READ_OPERATIONS_INCOMPLETE("streamReadOperationsIncomplete",
+ "Count of incomplete read() operations in streams"),
+ STREAM_SEEK_BYTES_BACKWARDS("streamBytesBackwardsOnSeek",
+ "Count of bytes moved backwards during seek operations"),
+ STREAM_SEEK_BYTES_READ("streamBytesRead",
+ "Count of bytes read during seek() in stream operations"),
+ STREAM_SEEK_BYTES_SKIPPED("streamBytesSkippedOnSeek",
+ "Count of bytes skipped during forward seek operation"),
+ STREAM_SEEK_OPERATIONS("streamSeekOperations",
+ "Number of read exceptions caught and attempted to recovered from");
+
+ Statistic(String symbol, String description) {
+ this.symbol = symbol;
+ this.description = description;
+ }
+
+ private final String symbol;
+ private final String description;
+
+ public String getSymbol() {
+ return symbol;
+ }
+
+ /**
+ * Get a statistic from a symbol.
+ * @param symbol statistic to look up
+ * @return the value or null.
+ */
+ public static Statistic fromSymbol(String symbol) {
+ if (symbol != null) {
+ for (Statistic opType : values()) {
+ if (opType.getSymbol().equals(symbol)) {
+ return opType;
+ }
+ }
+ }
+ return null;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ /**
+ * The string value is simply the symbol.
+ * This makes this operation very low cost.
+ * @return the symbol of this statistic.
+ */
+ @Override
+ public String toString() {
+ return symbol;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 568ffff..8cd2155 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -738,9 +738,19 @@ The exact number of operations to perform is configurable in the option
Larger values generate more load, and are recommended when testing locally,
or in batch runs.
-Smaller values should result in faster test runs, especially when the object
+Smaller values results in faster test runs, especially when the object
store is a long way away.
+Operations which work on directories have a separate option: this controls
+the width and depth of tests creating recursive directories. Larger
+values create exponentially more directories, with consequent performance
+impact.
+
+ <property>
+ <name>scale.test.directory.count</name>
+ <value>2</value>
+ </property>
+
DistCp tests targeting S3A support a configurable file size. The default is
10 MB, but the configuration value is expressed in KB so that it can be tuned
smaller to achieve faster test runs.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index a4f9b99..04010d6 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -21,7 +21,9 @@ package org.apache.hadoop.fs.s3a;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
+import org.junit.Assert;
import org.junit.internal.AssumptionViolatedException;
+import org.slf4j.Logger;
import java.io.IOException;
import java.net.URI;
@@ -190,4 +192,155 @@ public class S3ATestUtils {
}
}
+ /**
+ * Reset all metrics in a list.
+ * @param metrics metrics to reset
+ */
+ public static void reset(S3ATestUtils.MetricDiff... metrics) {
+ for (S3ATestUtils.MetricDiff metric : metrics) {
+ metric.reset();
+ }
+ }
+
+ /**
+ * Print all metrics in a list.
+ * @param log log to print the metrics to.
+ * @param metrics metrics to process
+ */
+ public static void print(Logger log, S3ATestUtils.MetricDiff... metrics) {
+ for (S3ATestUtils.MetricDiff metric : metrics) {
+ log.info(metric.toString());
+ }
+ }
+
+ /**
+ * Print all metrics in a list, then reset them.
+ * @param log log to print the metrics to.
+ * @param metrics metrics to process
+ */
+ public static void printThenReset(Logger log,
+ S3ATestUtils.MetricDiff... metrics) {
+ print(log, metrics);
+ reset(metrics);
+ }
+
+ /**
+ * Helper class to do diffs of metrics.
+ */
+ public static final class MetricDiff {
+ private final S3AFileSystem fs;
+ private final Statistic statistic;
+ private long startingValue;
+
+ /**
+ * Constructor.
+ * Invokes {@link #reset()} so it is immediately capable of measuring the
+ * difference in metric values.
+ *
+ * @param fs the filesystem to monitor
+ * @param statistic the statistic to monitor.
+ */
+ public MetricDiff(S3AFileSystem fs, Statistic statistic) {
+ this.fs = fs;
+ this.statistic = statistic;
+ reset();
+ }
+
+ /**
+ * Reset the starting value to the current value.
+ * Diffs will be against this new value.
+ */
+ public void reset() {
+ startingValue = currentValue();
+ }
+
+ /**
+ * Get the current value of the metric.
+ * @return the latest value.
+ */
+ public long currentValue() {
+ return fs.getInstrumentation().getCounterValue(statistic);
+ }
+
+ /**
+ * Get the difference between the the current value and
+ * {@link #startingValue}.
+ * @return the difference.
+ */
+ public long diff() {
+ return currentValue() - startingValue;
+ }
+
+ @Override
+ public String toString() {
+ long c = currentValue();
+ final StringBuilder sb = new StringBuilder(statistic.getSymbol());
+ sb.append(" starting=").append(startingValue);
+ sb.append(" current=").append(c);
+ sb.append(" diff=").append(c - startingValue);
+ return sb.toString();
+ }
+
+ /**
+ * Assert that the value of {@link #diff()} matches that expected.
+ * @param expected expected value.
+ */
+ public void assertDiffEquals(long expected) {
+ Assert.assertEquals("Count of " + this,
+ expected, diff());
+ }
+
+ /**
+ * Assert that the value of {@link #diff()} matches that of another
+ * instance.
+ * @param that the other metric diff instance.
+ */
+ public void assertDiffEquals(MetricDiff that) {
+ Assert.assertEquals(this.toString() + " != " + that,
+ this.diff(), that.diff());
+ }
+
+ /**
+ * Comparator for assertions.
+ * @param that other metric diff
+ * @return true if the value is {@code ==} the other's
+ */
+ public boolean diffEquals(MetricDiff that) {
+ return this.currentValue() == that.currentValue();
+ }
+
+ /**
+ * Comparator for assertions.
+ * @param that other metric diff
+ * @return true if the value is {@code <} the other's
+ */
+ public boolean diffLessThan(MetricDiff that) {
+ return this.currentValue() < that.currentValue();
+ }
+
+ /**
+ * Comparator for assertions.
+ * @param that other metric diff
+ * @return true if the value is {@code <=} the other's
+ */
+ public boolean diffLessThanOrEquals(MetricDiff that) {
+ return this.currentValue() <= that.currentValue();
+ }
+
+ /**
+ * Get the statistic
+ * @return the statistic
+ */
+ public Statistic getStatistic() {
+ return statistic;
+ }
+
+ /**
+ * Get the starting value; that set in the last {@link #reset()}.
+ * @return the starting value for diffs.
+ */
+ public long getStartingValue() {
+ return startingValue;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
new file mode 100644
index 0000000..0a8dd2d
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.net.URI;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff;
+import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
+
+/**
+ * Use metrics to assert about the cost of file status queries.
+ * {@link S3AFileSystem#getFileStatus(Path)}.
+ */
+public class TestS3AFileOperationCost extends AbstractFSContractTestBase {
+
+ private MetricDiff metadataRequests;
+ private MetricDiff listRequests;
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestS3AFileOperationCost.class);
+
+ @Override
+ protected AbstractFSContract createContract(Configuration conf) {
+ return new S3AContract(conf);
+ }
+
+ @Override
+ public S3AFileSystem getFileSystem() {
+ return (S3AFileSystem) super.getFileSystem();
+ }
+
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ S3AFileSystem fs = getFileSystem();
+ metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+ listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
+ }
+
+ @Test
+ public void testCostOfGetFileStatusOnFile() throws Throwable {
+ describe("performing getFileStatus on a file");
+ Path simpleFile = path("simple.txt");
+ S3AFileSystem fs = getFileSystem();
+ touch(fs, simpleFile);
+ resetMetricDiffs();
+ S3AFileStatus status = fs.getFileStatus(simpleFile);
+ assertTrue("not a file: " + status, status.isFile());
+ metadataRequests.assertDiffEquals(1);
+ listRequests.assertDiffEquals(0);
+ }
+
+ private void resetMetricDiffs() {
+ reset(metadataRequests, listRequests);
+ }
+
+ @Test
+ public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
+ describe("performing getFileStatus on an empty directory");
+ S3AFileSystem fs = getFileSystem();
+ Path dir = path("empty");
+ fs.mkdirs(dir);
+ resetMetricDiffs();
+ S3AFileStatus status = fs.getFileStatus(dir);
+ assertTrue("not empty: " + status, status.isEmptyDirectory());
+ metadataRequests.assertDiffEquals(2);
+ listRequests.assertDiffEquals(0);
+ }
+
+ @Test
+ public void testCostOfGetFileStatusOnMissingFile() throws Throwable {
+ describe("performing getFileStatus on a missing file");
+ S3AFileSystem fs = getFileSystem();
+ Path path = path("missing");
+ resetMetricDiffs();
+ try {
+ S3AFileStatus status = fs.getFileStatus(path);
+ fail("Got a status back from a missing file path " + status);
+ } catch (FileNotFoundException expected) {
+ // expected
+ }
+ metadataRequests.assertDiffEquals(2);
+ listRequests.assertDiffEquals(1);
+ }
+
+ @Test
+ public void testCostOfGetFileStatusOnMissingSubPath() throws Throwable {
+ describe("performing getFileStatus on a missing file");
+ S3AFileSystem fs = getFileSystem();
+ Path path = path("missingdir/missingpath");
+ resetMetricDiffs();
+ try {
+ S3AFileStatus status = fs.getFileStatus(path);
+ fail("Got a status back from a missing file path " + status);
+ } catch (FileNotFoundException expected) {
+ // expected
+ }
+ metadataRequests.assertDiffEquals(2);
+ listRequests.assertDiffEquals(1);
+ }
+
+ @Test
+ public void testCostOfGetFileStatusOnNonEmptyDir() throws Throwable {
+ describe("performing getFileStatus on a non-empty directory");
+ S3AFileSystem fs = getFileSystem();
+ Path dir = path("empty");
+ fs.mkdirs(dir);
+ Path simpleFile = new Path(dir, "simple.txt");
+ touch(fs, simpleFile);
+ resetMetricDiffs();
+ S3AFileStatus status = fs.getFileStatus(dir);
+ if (status.isEmptyDirectory()) {
+ // erroneous state
+ String fsState = fs.toString();
+ fail("FileStatus says directory isempty: " + status
+ + "\n" + ContractTestUtils.ls(fs, dir)
+ + "\n" + fsState);
+ }
+ metadataRequests.assertDiffEquals(2);
+ listRequests.assertDiffEquals(1);
+ }
+
+ @Test
+ public void testCostOfCopyFromLocalFile() throws Throwable {
+ describe("testCostOfCopyFromLocalFile");
+ File localTestDir = getTestDir("tmp");
+ localTestDir.mkdirs();
+ File tmpFile = File.createTempFile("tests3acost", ".txt",
+ localTestDir);
+ tmpFile.delete();
+ try {
+ URI localFileURI = tmpFile.toURI();
+ FileSystem localFS = FileSystem.get(localFileURI,
+ getFileSystem().getConf());
+ Path localPath = new Path(localFileURI);
+ int len = 10 * 1024;
+ byte[] data = dataset(len, 'A', 'Z');
+ writeDataset(localFS, localPath, data, len, 1024, true);
+ S3AFileSystem s3a = getFileSystem();
+ MetricDiff copyLocalOps = new MetricDiff(s3a,
+ INVOCATION_COPY_FROM_LOCAL_FILE);
+ MetricDiff putRequests = new MetricDiff(s3a,
+ OBJECT_PUT_REQUESTS);
+ MetricDiff putBytes = new MetricDiff(s3a,
+ OBJECT_PUT_BYTES);
+
+ Path remotePath = path("copied");
+ s3a.copyFromLocalFile(false, true, localPath, remotePath);
+ verifyFileContents(s3a, remotePath, data);
+ copyLocalOps.assertDiffEquals(1);
+ putRequests.assertDiffEquals(1);
+ putBytes.assertDiffEquals(len);
+ // print final stats
+ LOG.info("Filesystem {}", s3a);
+ } finally {
+ tmpFile.delete();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
index d65f693..21639b1 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
@@ -34,13 +34,11 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
-import java.util.Locale;
-
-import static org.junit.Assume.assumeTrue;
/**
* Base class for scale tests; here is where the common scale configuration
@@ -51,11 +49,57 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants {
@Rule
public TestName methodName = new TestName();
+ @Rule
+ public Timeout testTimeout = new Timeout(30 * 60 * 1000);
+
@BeforeClass
public static void nameThread() {
Thread.currentThread().setName("JUnit");
}
+ /**
+ * The number of operations to perform: {@value}.
+ */
+ public static final String KEY_OPERATION_COUNT =
+ SCALE_TEST + "operation.count";
+
+ /**
+ * The number of directory operations to perform: {@value}.
+ */
+ public static final String KEY_DIRECTORY_COUNT =
+ SCALE_TEST + "directory.count";
+
+ /**
+ * The readahead buffer: {@value}.
+ */
+ public static final String KEY_READ_BUFFER_SIZE =
+ S3A_SCALE_TEST + "read.buffer.size";
+
+ public static final int DEFAULT_READ_BUFFER_SIZE = 16384;
+
+ /**
+ * Key for a multi MB test file: {@value}.
+ */
+ public static final String KEY_CSVTEST_FILE =
+ S3A_SCALE_TEST + "csvfile";
+
+ /**
+ * Default path for the multi MB test file: {@value}.
+ */
+ public static final String DEFAULT_CSVTEST_FILE
+ = "s3a://landsat-pds/scene_list.gz";
+
+ /**
+ * The default number of operations to perform: {@value}.
+ */
+ public static final long DEFAULT_OPERATION_COUNT = 2005;
+
+ /**
+ * Default number of directories to create when performing
+ * directory performance/scale tests.
+ */
+ public static final int DEFAULT_DIRECTORY_COUNT = 2;
+
protected S3AFileSystem fs;
protected static final Logger LOG =
@@ -132,108 +176,4 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants {
}
}
- /**
- * Make times more readable, by adding a "," every three digits.
- * @param nanos nanos or other large number
- * @return a string for logging
- */
- protected static String toHuman(long nanos) {
- return String.format(Locale.ENGLISH, "%,d", nanos);
- }
-
- /**
- * Log the bandwidth of a timer as inferred from the number of
- * bytes processed.
- * @param timer timer
- * @param bytes bytes processed in the time period
- */
- protected void bandwidth(NanoTimer timer, long bytes) {
- LOG.info("Bandwidth = {} MB/S",
- timer.bandwidthDescription(bytes));
- }
-
- /**
- * Work out the bandwidth in MB/s
- * @param bytes bytes
- * @param durationNS duration in nanos
- * @return the number of megabytes/second of the recorded operation
- */
- public static double bandwidthMBs(long bytes, long durationNS) {
- return (bytes * 1000.0 ) / durationNS;
- }
-
- /**
- * A simple class for timing operations in nanoseconds, and for
- * printing some useful results in the process.
- */
- protected static class NanoTimer {
- final long startTime;
- long endTime;
-
- public NanoTimer() {
- startTime = now();
- }
-
- /**
- * End the operation
- * @return the duration of the operation
- */
- public long end() {
- endTime = now();
- return duration();
- }
-
- /**
- * End the operation; log the duration
- * @param format message
- * @param args any arguments
- * @return the duration of the operation
- */
- public long end(String format, Object... args) {
- long d = end();
- LOG.info("Duration of {}: {} nS",
- String.format(format, args), toHuman(d));
- return d;
- }
-
- long now() {
- return System.nanoTime();
- }
-
- long duration() {
- return endTime - startTime;
- }
-
- double bandwidth(long bytes) {
- return S3AScaleTestBase.bandwidthMBs(bytes, duration());
- }
-
- /**
- * Bandwidth as bytes per second
- * @param bytes bytes in
- * @return the number of bytes per second this operation timed.
- */
- double bandwidthBytes(long bytes) {
- return (bytes * 1.0 ) / duration();
- }
-
- /**
- * How many nanoseconds per byte
- * @param bytes bytes processed in this time period
- * @return the nanoseconds it took each byte to be processed
- */
- long nanosPerByte(long bytes) {
- return duration() / bytes;
- }
-
- /**
- * Get a description of the bandwidth, even down to fractions of
- * a MB
- * @param bytes bytes processed
- * @return bandwidth
- */
- String bandwidthDescription(long bytes) {
- return String.format("%,.6f", bandwidth(bytes));
- }
- }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
index af1883e..5e07dcb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
@@ -20,9 +20,7 @@ package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
-import org.junit.Rule;
import org.junit.Test;
-import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -34,15 +32,13 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
-import static org.junit.Assert.assertEquals;
-
+/**
+ * Test some scalable operations related to file renaming and deletion.
+ */
public class TestS3ADeleteManyFiles extends S3AScaleTestBase {
private static final Logger LOG =
LoggerFactory.getLogger(TestS3ADeleteManyFiles.class);
- @Rule
- public Timeout testTimeout = new Timeout(30 * 60 * 1000);
-
/**
* CAUTION: If this test starts failing, please make sure that the
* {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
new file mode 100644
index 0000000..7ece394
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Statistic;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
+/**
+ * Test the performance of listing files/directories.
+ */
+public class TestS3ADirectoryPerformance extends S3AScaleTestBase {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ TestS3ADirectoryPerformance.class);
+
+ @Test
+ public void testListOperations() throws Throwable {
+ describe("Test recursive list operations");
+ final Path scaleTestDir = getTestPath();
+ final Path listDir = new Path(scaleTestDir, "lists");
+
+ // scale factor.
+ int scale = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
+ int width = scale;
+ int depth = scale;
+ int files = scale;
+ MetricDiff metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+ MetricDiff listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
+ MetricDiff listStatusCalls = new MetricDiff(fs, INVOCATION_LIST_FILES);
+ MetricDiff getFileStatusCalls =
+ new MetricDiff(fs, INVOCATION_GET_FILE_STATUS);
+ NanoTimer createTimer = new NanoTimer();
+ TreeScanResults created =
+ createSubdirs(fs, listDir, depth, width, files, 0);
+ // add some empty directories
+ int emptyDepth = 1 * scale;
+ int emptyWidth = 3 * scale;
+
+ created.add(createSubdirs(fs, listDir, emptyDepth, emptyWidth, 0,
+ 0, "empty", "f-", ""));
+ createTimer.end("Time to create %s", created);
+ LOG.info("Time per operation: {}",
+ toHuman(createTimer.nanosPerOperation(created.totalCount())));
+ printThenReset(LOG,
+ metadataRequests,
+ listRequests,
+ listStatusCalls,
+ getFileStatusCalls);
+
+ try {
+ // Scan the directory via an explicit tree walk.
+ // This is the baseline for any listing speedups.
+ MetricDiff treewalkMetadataRequests =
+ new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+ MetricDiff treewalkListRequests = new MetricDiff(fs,
+ OBJECT_LIST_REQUESTS);
+ MetricDiff treewalkListStatusCalls = new MetricDiff(fs,
+ INVOCATION_LIST_FILES);
+ MetricDiff treewalkGetFileStatusCalls =
+ new MetricDiff(fs, INVOCATION_GET_FILE_STATUS);
+ NanoTimer treeWalkTimer = new NanoTimer();
+ TreeScanResults treewalkResults = treeWalk(fs, listDir);
+ treeWalkTimer.end("List status via treewalk");
+
+ print(LOG,
+ treewalkMetadataRequests,
+ treewalkListRequests,
+ treewalkListStatusCalls,
+ treewalkGetFileStatusCalls);
+ assertEquals("Files found in listFiles(recursive=true) " +
+ " created=" + created + " listed=" + treewalkResults,
+ created.getFileCount(), treewalkResults.getFileCount());
+
+
+ // listFiles() does the recursion internally
+ NanoTimer listFilesRecursiveTimer = new NanoTimer();
+
+ TreeScanResults listFilesResults = new TreeScanResults(
+ fs.listFiles(listDir, true));
+
+ listFilesRecursiveTimer.end("listFiles(recursive=true) of %s", created);
+ assertEquals("Files found in listFiles(recursive=true) " +
+ " created=" + created + " listed=" + listFilesResults,
+ created.getFileCount(), listFilesResults.getFileCount());
+
+ treewalkListRequests.assertDiffEquals(listRequests);
+ printThenReset(LOG,
+ metadataRequests, listRequests,
+ listStatusCalls, getFileStatusCalls);
+
+ NanoTimer globStatusTimer = new NanoTimer();
+ FileStatus[] globStatusFiles = fs.globStatus(listDir);
+ globStatusTimer.end("Time to globStatus() %s", globStatusTimer);
+ LOG.info("Time for glob status {} entries: {}",
+ globStatusFiles.length,
+ toHuman(createTimer.duration()));
+ printThenReset(LOG,
+ metadataRequests,
+ listRequests,
+ listStatusCalls,
+ getFileStatusCalls);
+
+ } finally {
+ // deletion at the end of the run
+ NanoTimer deleteTimer = new NanoTimer();
+ fs.delete(listDir, true);
+ deleteTimer.end("Deleting directory tree");
+ printThenReset(LOG,
+ metadataRequests, listRequests,
+ listStatusCalls, getFileStatusCalls);
+ }
+ }
+
+ @Test
+ public void testTimeToStatEmptyDirectory() throws Throwable {
+ describe("Time to stat an empty directory");
+ Path path = new Path(getTestPath(), "empty");
+ fs.mkdirs(path);
+ timeToStatPath(path);
+ }
+
+ @Test
+ public void testTimeToStatNonEmptyDirectory() throws Throwable {
+ describe("Time to stat a non-empty directory");
+ Path path = new Path(getTestPath(), "dir");
+ fs.mkdirs(path);
+ touch(fs, new Path(path, "file"));
+ timeToStatPath(path);
+ }
+
+ @Test
+ public void testTimeToStatFile() throws Throwable {
+ describe("Time to stat a simple file");
+ Path path = new Path(getTestPath(), "file");
+ touch(fs, path);
+ timeToStatPath(path);
+ }
+
+ @Test
+ public void testTimeToStatRoot() throws Throwable {
+ describe("Time to stat the root path");
+ timeToStatPath(new Path("/"));
+ }
+
+ private void timeToStatPath(Path path) throws IOException {
+ describe("Timing getFileStatus(\"%s\")", path);
+ MetricDiff metadataRequests =
+ new MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS);
+ MetricDiff listRequests =
+ new MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS);
+ long attempts = getOperationCount();
+ NanoTimer timer = new NanoTimer();
+ for (long l = 0; l < attempts; l++) {
+ fs.getFileStatus(path);
+ }
+ timer.end("Time to execute %d getFileStatusCalls", attempts);
+ LOG.info("Time per call: {}", toHuman(timer.nanosPerOperation(attempts)));
+ LOG.info("metadata: {}", metadataRequests);
+ LOG.info("metadata per operation {}", metadataRequests.diff() / attempts);
+ LOG.info("listObjects: {}", listRequests);
+ LOG.info("listObjects: per operation {}", listRequests.diff() / attempts);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
index 0c8b273..5222a4e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
@@ -36,8 +36,10 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
/**
- * Look at the performance of S3a operations
+ * Look at the performance of S3a operations.
*/
public class TestS3AInputStreamPerformance extends S3AScaleTestBase {
private static final Logger LOG = LoggerFactory.getLogger(
@@ -151,7 +153,7 @@ public class TestS3AInputStreamPerformance extends S3AScaleTestBase {
readTimer.end("Time to read %d bytes", len);
bandwidth(readTimer, count);
assertEquals("Not enough bytes were read)", len, count);
- long nanosPerByte = readTimer.nanosPerByte(count);
+ long nanosPerByte = readTimer.nanosPerOperation(count);
LOG.info("An open() call has the equivalent duration of reading {} bytes",
toHuman( timeOpen.duration() / nanosPerByte));
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/043a0c2e/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
index bc85425..1330ed1 100644
--- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
+++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
@@ -15,7 +15,9 @@ log4j.rootLogger=info,stdout
log4j.threshold=ALL
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n
+
+log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
# for debugging low level S3a operations, uncomment this line
# log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org