You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by da...@apache.org on 2019/10/09 23:52:43 UTC
[hadoop] branch branch-2 updated: HADOOP-16630 : Backport of
Hadoop-16548 : Disable Flush() over config
This is an automated email from the ASF dual-hosted git repository.
dazhou pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 27d7d09 HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config
27d7d09 is described below
commit 27d7d09fa48afe4cccd766a5567464b772a46300
Author: Sneha Vijayarajan <sn...@gmail.com>
AuthorDate: Wed Oct 9 16:50:06 2019 -0700
HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config
---
.../hadoop/fs/azurebfs/AbfsConfiguration.java | 14 +++++++++++
.../fs/azurebfs/AzureBlobFileSystemStore.java | 6 +++--
.../fs/azurebfs/constants/ConfigurationKeys.java | 8 +++++++
.../constants/FileSystemConfigurations.java | 1 +
.../fs/azurebfs/services/AbfsOutputStream.java | 7 ++++--
.../hadoop-azure/src/site/markdown/abfs.md | 18 +++++++++++++++
.../fs/azurebfs/ITestAzureBlobFileSystemFlush.java | 27 +++++++++++-----------
7 files changed, 64 insertions(+), 17 deletions(-)
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
index 67055c5..56ff622 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -146,6 +146,10 @@ public class AbfsConfiguration{
DefaultValue = DEFAULT_ENABLE_FLUSH)
private boolean enableFlush;
+ @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH,
+ DefaultValue = DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH)
+ private boolean disableOutputStreamFlush;
+
@BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_AUTOTHROTTLING,
DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING)
private boolean enableAutoThrottling;
@@ -427,6 +431,10 @@ public class AbfsConfiguration{
return this.enableFlush;
}
+ public boolean isOutputStreamFlushDisabled() {
+ return this.disableOutputStreamFlush;
+ }
+
public boolean isAutoThrottlingEnabled() {
return this.enableAutoThrottling;
}
@@ -635,4 +643,10 @@ public class AbfsConfiguration{
void setEnableFlush(boolean enableFlush) {
this.enableFlush = enableFlush;
}
+
+ @VisibleForTesting
+ void setDisableOutputStreamFlush(boolean disableOutputStreamFlush) {
+ this.disableOutputStreamFlush = disableOutputStreamFlush;
+ }
+
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
index 2694565..6710275 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -354,7 +354,8 @@ public class AzureBlobFileSystemStore {
AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path),
0,
abfsConfiguration.getWriteBufferSize(),
- abfsConfiguration.isFlushEnabled());
+ abfsConfiguration.isFlushEnabled(),
+ abfsConfiguration.isOutputStreamFlushDisabled());
}
public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask)
@@ -426,7 +427,8 @@ public class AzureBlobFileSystemStore {
AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path),
offset,
abfsConfiguration.getWriteBufferSize(),
- abfsConfiguration.isFlushEnabled());
+ abfsConfiguration.isFlushEnabled(),
+ abfsConfiguration.isOutputStreamFlushDisabled());
}
public void rename(final Path source, final Path destination) throws
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
index 8cd86bf..cd86f18 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -51,7 +51,15 @@ public final class ConfigurationKeys {
public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https";
public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key";
public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth";
+ /** Provides a config control to enable or disable ABFS Flush operations -
+ * HFlush and HSync. Default is true. **/
public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush";
+ /** Provides a config control to disable or enable OutputStream Flush API
+ * operations in AbfsOutputStream. Flush() will trigger actions that
+ * guarantee that buffered data is persistent with a perf cost while the API
+ * documentation does not have such expectations of data being persisted.
+ * Default value of this config is true. **/
+ public static final String FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH = "fs.azure.disable.outputstream.flush";
public static final String FS_AZURE_USER_AGENT_PREFIX_KEY = "fs.azure.user.agent.prefix";
public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode";
public static final String FS_AZURE_USE_UPN = "fs.azure.use.upn";
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
index a2a0064..29367eb 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -57,6 +57,7 @@ public final class FileSystemConfigurations {
public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = -1;
public static final boolean DEFAULT_ENABLE_FLUSH = true;
+ public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true;
public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true;
public static final SSLSocketFactoryEx.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
index 679f22e..be2ab67 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
@@ -52,6 +52,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
private long position;
private boolean closed;
private boolean supportFlush;
+ private boolean disableOutputStreamFlush;
private volatile IOException lastError;
private long lastFlushOffset;
@@ -80,12 +81,14 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
final String path,
final long position,
final int bufferSize,
- final boolean supportFlush) {
+ final boolean supportFlush,
+ final boolean disableOutputStreamFlush) {
this.client = client;
this.path = path;
this.position = position;
this.closed = false;
this.supportFlush = supportFlush;
+ this.disableOutputStreamFlush = disableOutputStreamFlush;
this.lastError = null;
this.lastFlushOffset = 0;
this.bufferSize = bufferSize;
@@ -199,7 +202,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
*/
@Override
public void flush() throws IOException {
- if (supportFlush) {
+ if (!disableOutputStreamFlush) {
flushInternalAsync();
}
}
diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
index db55e67..d8452a2 100644
--- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
+++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
@@ -72,6 +72,24 @@ or you can configure an identity to be used only for a specific storage account
Note that it doesn't make sense to do this with some properties, like shared keys that are inherently account-specific.
+### <a name="flushconfigoptions"></a> Flush Options
+
+#### <a name="abfsflushconfigoptions"></a> 1. Azure Blob File System Flush Options
+Config `fs.azure.enable.flush` provides an option to render ABFS flush APIs -
+ HFlush() and HSync() to be no-op. By default, this
+config will be set to true.
+
+Both the APIs will ensure that data is persisted.
+
+#### <a name="outputstreamflushconfigoptions"></a> 2. OutputStream Flush Options
+Config `fs.azure.disable.outputstream.flush` provides an option to render
+OutputStream Flush() API to be a no-op in AbfsOutputStream. By default, this
+config will be set to true.
+
+Hflush() being the only documented API that can provide persistent data
+transfer, Flush() also attempting to persist buffered data will lead to
+performance issues.
+
## Testing ABFS
See the relevant section in [Testing Azure](testing_azure.html).
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
index d60cae8..60f7f7d 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
@@ -208,43 +208,44 @@ public class ITestAzureBlobFileSystemFlush extends AbstractAbfsScaleTest {
}
@Test
- public void testFlushWithFlushEnabled() throws Exception {
- testFlush(true);
+ public void testFlushWithOutputStreamFlushEnabled() throws Exception {
+ testFlush(false);
}
@Test
- public void testFlushWithFlushDisabled() throws Exception {
- testFlush(false);
+ public void testFlushWithOutputStreamFlushDisabled() throws Exception {
+ testFlush(true);
}
- private void testFlush(boolean flushEnabled) throws Exception {
+ private void testFlush(boolean disableOutputStreamFlush) throws Exception {
final AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem();
- // Simulate setting "fs.azure.enable.flush" to true or false
- fs.getAbfsStore().getAbfsConfiguration().setEnableFlush(flushEnabled);
+ // Simulate setting "fs.azure.disable.outputstream.flush" to true or false
+ fs.getAbfsStore().getAbfsConfiguration()
+ .setDisableOutputStreamFlush(disableOutputStreamFlush);
final Path testFilePath = path(methodName.getMethodName());
byte[] buffer = getRandomBytesArray();
// The test case must write "fs.azure.write.request.size" bytes
// to the stream in order for the data to be uploaded to storage.
- assertEquals(
- fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(),
+ assertEquals(fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(),
buffer.length);
try (FSDataOutputStream stream = fs.create(testFilePath)) {
stream.write(buffer);
// Write asynchronously uploads data, so we must wait for completion
- AbfsOutputStream abfsStream = (AbfsOutputStream) stream.getWrappedStream();
+ AbfsOutputStream abfsStream = (AbfsOutputStream) stream
+ .getWrappedStream();
abfsStream.waitForPendingUploads();
// Flush commits the data so it can be read.
stream.flush();
- // Verify that the data can be read if flushEnabled is true; and otherwise
- // cannot be read.
- validate(fs.open(testFilePath), buffer, flushEnabled);
+ // Verify that the data can be read if disableOutputStreamFlush is
+ // false; and otherwise cannot be read.
+ validate(fs.open(testFilePath), buffer, !disableOutputStreamFlush);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org