You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by da...@apache.org on 2019/10/09 23:52:43 UTC

[hadoop] branch branch-2 updated: HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config

This is an automated email from the ASF dual-hosted git repository.

dazhou pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 27d7d09  HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config
27d7d09 is described below

commit 27d7d09fa48afe4cccd766a5567464b772a46300
Author: Sneha Vijayarajan <sn...@gmail.com>
AuthorDate: Wed Oct 9 16:50:06 2019 -0700

    HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config
---
 .../hadoop/fs/azurebfs/AbfsConfiguration.java      | 14 +++++++++++
 .../fs/azurebfs/AzureBlobFileSystemStore.java      |  6 +++--
 .../fs/azurebfs/constants/ConfigurationKeys.java   |  8 +++++++
 .../constants/FileSystemConfigurations.java        |  1 +
 .../fs/azurebfs/services/AbfsOutputStream.java     |  7 ++++--
 .../hadoop-azure/src/site/markdown/abfs.md         | 18 +++++++++++++++
 .../fs/azurebfs/ITestAzureBlobFileSystemFlush.java | 27 +++++++++++-----------
 7 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
index 67055c5..56ff622 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -146,6 +146,10 @@ public class AbfsConfiguration{
       DefaultValue = DEFAULT_ENABLE_FLUSH)
   private boolean enableFlush;
 
+  @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH,
+      DefaultValue = DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH)
+  private boolean disableOutputStreamFlush;
+
   @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_AUTOTHROTTLING,
       DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING)
   private boolean enableAutoThrottling;
@@ -427,6 +431,10 @@ public class AbfsConfiguration{
     return this.enableFlush;
   }
 
+  public boolean isOutputStreamFlushDisabled() {
+    return this.disableOutputStreamFlush;
+  }
+
   public boolean isAutoThrottlingEnabled() {
     return this.enableAutoThrottling;
   }
@@ -635,4 +643,10 @@ public class AbfsConfiguration{
   void setEnableFlush(boolean enableFlush) {
     this.enableFlush = enableFlush;
   }
+
+  @VisibleForTesting
+  void setDisableOutputStreamFlush(boolean disableOutputStreamFlush) {
+    this.disableOutputStreamFlush = disableOutputStreamFlush;
+  }
+
 }
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
index 2694565..6710275 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -354,7 +354,8 @@ public class AzureBlobFileSystemStore {
         AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path),
         0,
         abfsConfiguration.getWriteBufferSize(),
-        abfsConfiguration.isFlushEnabled());
+        abfsConfiguration.isFlushEnabled(),
+        abfsConfiguration.isOutputStreamFlushDisabled());
   }
 
   public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask)
@@ -426,7 +427,8 @@ public class AzureBlobFileSystemStore {
         AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path),
         offset,
         abfsConfiguration.getWriteBufferSize(),
-        abfsConfiguration.isFlushEnabled());
+        abfsConfiguration.isFlushEnabled(),
+        abfsConfiguration.isOutputStreamFlushDisabled());
   }
 
   public void rename(final Path source, final Path destination) throws
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
index 8cd86bf..cd86f18 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -51,7 +51,15 @@ public final class ConfigurationKeys {
   public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https";
   public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key";
   public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth";
+  /** Provides a config control to enable or disable ABFS Flush operations -
+   *  HFlush and HSync. Default is true. **/
   public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush";
+  /** Provides a config control to disable or enable OutputStream Flush API
+   *  operations in AbfsOutputStream. Flush() will trigger actions that
+   *  guarantee that buffered data is persistent with a perf cost while the API
+   *  documentation does not have such expectations of data being persisted.
+   *  Default value of this config is true. **/
+  public static final String FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH = "fs.azure.disable.outputstream.flush";
   public static final String FS_AZURE_USER_AGENT_PREFIX_KEY = "fs.azure.user.agent.prefix";
   public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode";
   public static final String FS_AZURE_USE_UPN = "fs.azure.use.upn";
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
index a2a0064..29367eb 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -57,6 +57,7 @@ public final class FileSystemConfigurations {
 
   public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = -1;
   public static final boolean DEFAULT_ENABLE_FLUSH = true;
+  public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true;
   public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true;
 
   public static final SSLSocketFactoryEx.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
index 679f22e..be2ab67 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
@@ -52,6 +52,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
   private long position;
   private boolean closed;
   private boolean supportFlush;
+  private boolean disableOutputStreamFlush;
   private volatile IOException lastError;
 
   private long lastFlushOffset;
@@ -80,12 +81,14 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
       final String path,
       final long position,
       final int bufferSize,
-      final boolean supportFlush) {
+      final boolean supportFlush,
+      final boolean disableOutputStreamFlush) {
     this.client = client;
     this.path = path;
     this.position = position;
     this.closed = false;
     this.supportFlush = supportFlush;
+    this.disableOutputStreamFlush = disableOutputStreamFlush;
     this.lastError = null;
     this.lastFlushOffset = 0;
     this.bufferSize = bufferSize;
@@ -199,7 +202,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa
    */
   @Override
   public void flush() throws IOException {
-    if (supportFlush) {
+    if (!disableOutputStreamFlush) {
       flushInternalAsync();
     }
   }
diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
index db55e67..d8452a2 100644
--- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
+++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
@@ -72,6 +72,24 @@ or you can configure an identity to be used only for a specific storage account
 
 Note that it doesn't make sense to do this with some properties, like shared keys that are inherently account-specific.
 
+### <a name="flushconfigoptions"></a> Flush Options
+
+#### <a name="abfsflushconfigoptions"></a> 1. Azure Blob File System Flush Options
+Config `fs.azure.enable.flush` provides an option to render ABFS flush APIs -
+ HFlush() and HSync() to be no-op. By default, this
+config will be set to true.
+
+Both the APIs will ensure that data is persisted.
+
+#### <a name="outputstreamflushconfigoptions"></a> 2. OutputStream Flush Options
+Config `fs.azure.disable.outputstream.flush` provides an option to render
+OutputStream Flush() API to be a no-op in AbfsOutputStream. By default, this
+config will be set to true.
+
+Hflush() being the only documented API that can provide persistent data
+transfer, Flush() also attempting to persist buffered data will lead to
+performance issues.
+
 ## Testing ABFS
 
 See the relevant section in [Testing Azure](testing_azure.html).
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
index d60cae8..60f7f7d 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java
@@ -208,43 +208,44 @@ public class ITestAzureBlobFileSystemFlush extends AbstractAbfsScaleTest {
   }
 
   @Test
-  public void testFlushWithFlushEnabled() throws Exception {
-    testFlush(true);
+  public void testFlushWithOutputStreamFlushEnabled() throws Exception {
+    testFlush(false);
   }
 
   @Test
-  public void testFlushWithFlushDisabled() throws Exception {
-    testFlush(false);
+  public void testFlushWithOutputStreamFlushDisabled() throws Exception {
+    testFlush(true);
   }
 
-  private void testFlush(boolean flushEnabled) throws Exception {
+  private void testFlush(boolean disableOutputStreamFlush) throws Exception {
     final AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem();
 
-    // Simulate setting "fs.azure.enable.flush" to true or false
-    fs.getAbfsStore().getAbfsConfiguration().setEnableFlush(flushEnabled);
+    // Simulate setting "fs.azure.disable.outputstream.flush" to true or false
+    fs.getAbfsStore().getAbfsConfiguration()
+        .setDisableOutputStreamFlush(disableOutputStreamFlush);
 
     final Path testFilePath = path(methodName.getMethodName());
     byte[] buffer = getRandomBytesArray();
 
     // The test case must write "fs.azure.write.request.size" bytes
     // to the stream in order for the data to be uploaded to storage.
-    assertEquals(
-        fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(),
+    assertEquals(fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(),
         buffer.length);
 
     try (FSDataOutputStream stream = fs.create(testFilePath)) {
       stream.write(buffer);
 
       // Write asynchronously uploads data, so we must wait for completion
-      AbfsOutputStream abfsStream = (AbfsOutputStream) stream.getWrappedStream();
+      AbfsOutputStream abfsStream = (AbfsOutputStream) stream
+          .getWrappedStream();
       abfsStream.waitForPendingUploads();
 
       // Flush commits the data so it can be read.
       stream.flush();
 
-      // Verify that the data can be read if flushEnabled is true; and otherwise
-      // cannot be read.
-      validate(fs.open(testFilePath), buffer, flushEnabled);
+      // Verify that the data can be read if disableOutputStreamFlush is
+      // false; and otherwise cannot be read.
+      validate(fs.open(testFilePath), buffer, !disableOutputStreamFlush);
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org