You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2017/08/22 01:58:41 UTC

[38/50] [abbrv] hadoop git commit: HADOOP-14398. Modify documents for the FileSystem Builder API. (Lei (Eddy) Xu)

HADOOP-14398. Modify documents for the FileSystem Builder API. (Lei (Eddy) Xu)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/99e558b1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/99e558b1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/99e558b1

Branch: refs/heads/HDFS-7240
Commit: 99e558b13ba4d5832aea97374e1d07b4e78e5e39
Parents: 4230872
Author: Lei Xu <le...@apache.org>
Authored: Thu Aug 17 18:06:23 2017 -0700
Committer: Lei Xu <le...@apache.org>
Committed: Thu Aug 17 18:06:23 2017 -0700

----------------------------------------------------------------------
 .../hadoop/fs/FSDataOutputStreamBuilder.java    |  74 ++++++--
 .../src/site/markdown/filesystem/filesystem.md  |  33 +++-
 .../filesystem/fsdataoutputstreambuilder.md     | 182 +++++++++++++++++++
 .../src/site/markdown/filesystem/index.md       |   1 +
 4 files changed, 272 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/99e558b1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
index 1f668eb..86c284a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
@@ -54,16 +54,29 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_
  * options accordingly, for example:
  *
  * <code>
- * FSDataOutputStreamBuilder builder = fs.createFile(path);
- * builder.permission(perm)
+ *
+ * // Don't
+ * if (fs instanceof FooFileSystem) {
+ *   FooFileSystem fs = (FooFileSystem) fs;
+ *   OutputStream out = dfs.createFile(path)
+ *     .optionA()
+ *     .optionB("value")
+ *     .cache()
+ *   .build()
+ * } else if (fs instanceof BarFileSystem) {
+ *   ...
+ * }
+ *
+ * // Do
+ * OutputStream out = fs.createFile(path)
+ *   .permission(perm)
  *   .bufferSize(bufSize)
- *   .opt("dfs.outputstream.builder.lazy-persist", true)
- *   .opt("dfs.outputstream.builder.ec.policy-name", "rs-3-2-64k")
- *   .opt("fs.local.o-direct", true)
- *   .must("fs.s3a.fast-upload", true)
- *   .must("fs.azure.buffer-size", 256 * 1024 * 1024);
- * FSDataOutputStream out = builder.build();
- * ...
+ *   .opt("foofs:option.a", true)
+ *   .opt("foofs:option.b", "value")
+ *   .opt("barfs:cache", true)
+ *   .must("foofs:cache", true)
+ *   .must("barfs:cache-size", 256 * 1024 * 1024)
+ *   .build();
  * </code>
  *
  * If the option is not related to the file system, the option will be ignored.
@@ -263,6 +276,8 @@ public abstract class FSDataOutputStreamBuilder
 
   /**
    * Set optional boolean parameter for the Builder.
+   *
+   * @see #opt(String, String)
    */
   public B opt(@Nonnull final String key, boolean value) {
     mandatoryKeys.remove(key);
@@ -272,6 +287,8 @@ public abstract class FSDataOutputStreamBuilder
 
   /**
    * Set optional int parameter for the Builder.
+   *
+   * @see #opt(String, String)
    */
   public B opt(@Nonnull final String key, int value) {
     mandatoryKeys.remove(key);
@@ -281,6 +298,8 @@ public abstract class FSDataOutputStreamBuilder
 
   /**
    * Set optional float parameter for the Builder.
+   *
+   * @see #opt(String, String)
    */
   public B opt(@Nonnull final String key, float value) {
     mandatoryKeys.remove(key);
@@ -290,6 +309,8 @@ public abstract class FSDataOutputStreamBuilder
 
   /**
    * Set optional double parameter for the Builder.
+   *
+   * @see #opt(String, String)
    */
   public B opt(@Nonnull final String key, double value) {
     mandatoryKeys.remove(key);
@@ -299,6 +320,8 @@ public abstract class FSDataOutputStreamBuilder
 
   /**
    * Set an array of string values as optional parameter for the Builder.
+   *
+   * @see #opt(String, String)
    */
   public B opt(@Nonnull final String key, @Nonnull final String... values) {
     mandatoryKeys.remove(key);
@@ -310,8 +333,7 @@ public abstract class FSDataOutputStreamBuilder
    * Set mandatory option to the Builder.
    *
    * If the option is not supported or unavailable on the {@link FileSystem},
-   * the client should expect {@link #build()} throws
-   * {@link IllegalArgumentException}.
+   * the client should expect {@link #build()} throws IllegalArgumentException.
    */
   public B must(@Nonnull final String key, @Nonnull final String value) {
     mandatoryKeys.add(key);
@@ -319,35 +341,55 @@ public abstract class FSDataOutputStreamBuilder
     return getThisBuilder();
   }
 
-  /** Set mandatory boolean option. */
+  /**
+   * Set mandatory boolean option.
+   *
+   * @see #must(String, String)
+   */
   public B must(@Nonnull final String key, boolean value) {
     mandatoryKeys.add(key);
     options.setBoolean(key, value);
     return getThisBuilder();
   }
 
-  /** Set mandatory int option. */
+  /**
+   * Set mandatory int option.
+   *
+   * @see #must(String, String)
+   */
   public B must(@Nonnull final String key, int value) {
     mandatoryKeys.add(key);
     options.setInt(key, value);
     return getThisBuilder();
   }
 
-  /** Set mandatory float option. */
+  /**
+   * Set mandatory float option.
+   *
+   * @see #must(String, String)
+   */
   public B must(@Nonnull final String key, float value) {
     mandatoryKeys.add(key);
     options.setFloat(key, value);
     return getThisBuilder();
   }
 
-  /** Set mandatory double option. */
+  /**
+   * Set mandatory double option.
+   *
+   * @see #must(String, String)
+   */
   public B must(@Nonnull final String key, double value) {
     mandatoryKeys.add(key);
     options.setDouble(key, value);
     return getThisBuilder();
   }
 
-  /** Set a string array as mandatory option. */
+  /**
+   * Set a string array as mandatory option.
+   *
+   * @see #must(String, String)
+   */
   public B must(@Nonnull final String key, @Nonnull final String... values) {
     mandatoryKeys.add(key);
     options.setStrings(key, values);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/99e558b1/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
index d7e57ce..1e522c7 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
@@ -553,7 +553,7 @@ on a path that exists and is a file. Instead the operation returns false.
        FS' = FS
        result = False
 
-### `FSDataOutputStream create(Path, ...)`
+### <a name='FileSystem.create'></a> `FSDataOutputStream create(Path, ...)`
 
 
     FSDataOutputStream create(Path p,
@@ -616,7 +616,24 @@ this precondition fails.
 
 * Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the `create()` operation
 
-### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)`
+### `FSDataOutputStreamBuilder createFile(Path p)`
+
+Make a `FSDataOutputStreamBuilder` to specify the parameters to create a file.
+
+#### Implementation Notes
+
+`createFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
+change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
+the builder parameters are verified and [`create(Path p)`](#FileSystem.create)
+is invoked on the underlying filesystem. `build()` has the same preconditions
+and postconditions as [`create(Path p)`](#FileSystem.create).
+
+* Similar to [`create(Path p)`](#FileSystem.create), files are overwritten
+by default, unless specify `builder.overwrite(false)`.
+* Unlike [`create(Path p)`](#FileSystem.create), missing parent directories are
+not created by default, unless specify `builder.recursive()`.
+
+### <a name='FileSystem.append'></a> `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)`
 
 Implementations without a compliant call SHOULD throw `UnsupportedOperationException`.
 
@@ -634,6 +651,18 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep
 Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]`
 by appending data to the existing list.
 
+### `FSDataOutputStreamBuilder appendFile(Path p)`
+
+Make a `FSDataOutputStreamBuilder` to specify the parameters to append to an
+existing file.
+
+#### Implementation Notes
+
+`appendFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
+change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
+the builder parameters are verified and [`append()`](#FileSystem.append) is
+invoked on the underlying filesystem. `build()` has the same preconditions and
+postconditions as [`append()`](#FileSystem.append).
 
 ### `FSDataInputStream open(Path f, int bufferSize)`
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/99e558b1/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
new file mode 100644
index 0000000..4ea1fd1
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
@@ -0,0 +1,182 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!--  ============================================================= -->
+<!--  CLASS: FSDataOutputStreamBuilder -->
+<!--  ============================================================= -->
+
+# class `org.apache.hadoop.fs.FSDataOutputStreamBuilder`
+
+<!-- MACRO{toc|fromDepth=1|toDepth=2} -->
+
+Builder pattern for `FSDataOutputStream` and its subclasses. It is used to
+create a new file or open an existing file on `FileSystem` for write.
+
+## Invariants
+
+The `FSDataOutputStreamBuilder` interface does not validate parameters
+and modify the state of `FileSystem` until [`build()`](#Builder.build) is
+invoked.
+
+## Implementation-agnostic parameters.
+
+### <a name="Builder.create"></a> `FSDataOutputStreamBuilder create()`
+
+Specify `FSDataOutputStreamBuilder` to create a file on `FileSystem`, equivalent
+to `CreateFlag#CREATE`.
+
+### <a name="Builder.append"></a> `FSDataOutputStreamBuilder append()`
+
+Specify `FSDataOutputStreamBuilder` to append to an existing file on
+`FileSystem`, equivalent to `CreateFlag#APPEND`.
+
+### <a name="Builder.overwrite"></a> `FSDataOutputStreamBuilder overwrite(boolean overwrite)`
+
+Specify `FSDataOutputStreamBuilder` to overwrite an existing file or not. If
+giving `overwrite==true`, it truncates an existing file, equivalent to
+`CreateFlag#OVERWITE`.
+
+### <a name="Builder.permission"></a> `FSDataOutputStreamBuilder permission(FsPermission permission)`
+
+Set permission for the file.
+
+### <a name="Builder.bufferSize"></a> `FSDataOutputStreamBuilder bufferSize(int bufSize)`
+
+Set the size of the buffer to be used.
+
+### <a name="Builder.replication"></a> `FSDataOutputStreamBuilder replication(short replica)`
+
+Set the replication factor.
+
+### <a name="Builder.blockSize"></a> `FSDataOutputStreamBuilder blockSize(long size)`
+
+Set block size in bytes.
+
+### <a name="Builder.recursive"></a> `FSDataOutputStreamBuilder recursive()`
+
+Create parent directories if they do not exist.
+
+### <a name="Builder.progress"></a> `FSDataOutputStreamBuilder progress(Progresable prog)`
+
+Set the facility of reporting progress.
+
+### <a name="Builder.checksumOpt"></a> `FSDataOutputStreamBuilder checksumOpt(ChecksumOpt chksumOpt)`
+
+Set checksum opt.
+
+### Set optional or mandatory parameters
+
+    FSDataOutputStreamBuilder opt(String key, ...)
+    FSDataOutputStreamBuilder must(String key, ...)
+
+Set optional or mandatory parameters to the builder. Using `opt()` or `must()`,
+client can specify FS-specific parameters without inspecting the concrete type
+of `FileSystem`.
+
+    // Don't
+    if (fs instanceof FooFileSystem) {
+        FooFileSystem fs = (FooFileSystem) fs;
+        out = dfs.createFile(path)
+            .optionA()
+            .optionB("value")
+            .cache()
+            .build()
+    } else if (fs instanceof BarFileSystem) {
+        ...
+    }
+
+    // Do
+    out = fs.createFile(path)
+        .permission(perm)
+        .bufferSize(bufSize)
+        .opt("foofs:option.a", true)
+        .opt("foofs:option.b", "value")
+        .opt("barfs:cache", true)
+        .must("foofs:cache", true)
+        .must("barfs:cache-size", 256 * 1024 * 1024)
+        .build();
+
+#### Implementation Notes
+
+The concrete `FileSystem` and/or `FSDataOutputStreamBuilder` implementation
+MUST verify that implementation-agnostic parameters (i.e., "syncable") or
+implementation-specific parameters (i.e., "foofs:cache")
+are supported. `FileSystem` will satisfy optional parameters (via `opt(key, ...)`)
+on best effort. If the mandatory parameters (via `must(key, ...)`) can not be satisfied
+in the `FileSystem`, `IllegalArgumentException` should be thrown in `build()`.
+
+The behavior of resolving the conflicts between the parameters set by
+builder methods (i.e., `bufferSize()`) and `opt()`/`must()` is undefined.
+
+## HDFS-specific parameters.
+
+`HdfsDataOutputStreamBuilder extends FSDataOutputStreamBuilder` provides additional
+HDFS-specific parameters, for further customize file creation / append behavior.
+
+### `FSDataOutpuStreamBuilder favoredNodes(InetSocketAddress[] nodes)`
+
+Set favored DataNodes for new blocks.
+
+### `FSDataOutputStreamBuilder syncBlock()`
+
+Force closed blocks to the disk device. See `CreateFlag#SYNC_BLOCK`
+
+### `FSDataOutputStreamBuilder lazyPersist()`
+
+Create the block on transient storage if possible.
+
+### `FSDataOutputStreamBuilder newBlock()`
+
+Append data to a new block instead of the end of the last partial block.
+
+### `FSDataOutputStreamBuilder noLocalWrite()`
+
+Advise that a block replica NOT be written to the local DataNode.
+
+### `FSDataOutputStreamBuilder ecPolicyName()`
+
+Enforce the file to be a striped file with erasure coding policy 'policyName',
+no matter what its parent directory's replication or erasure coding policy is.
+
+### `FSDataOutputStreamBuilder replicate()`
+
+Enforce the file to be a replicated file, no matter what its parent directory's
+replication or erasure coding policy is.
+
+## Builder interface
+
+### <a name="Builder.build"></a> `FSDataOutputStream build()`
+
+Create a new file or append an existing file on the underlying `FileSystem`,
+and return `FSDataOutputStream` for write.
+
+#### Preconditions
+
+The following combinations of parameters are not supported:
+
+    if APPEND|OVERWRITE: raise HadoopIllegalArgumentException
+    if CREATE|APPEND|OVERWRITE: raise HadoopIllegalArgumentExdeption
+
+`FileSystem` may reject the request for other reasons and throw `IOException`,
+see `FileSystem#create(path, ...)` and `FileSystem#append()`.
+
+#### Postconditions
+
+    FS' where :
+       FS'.Files'[p] == []
+       ancestors(p) is-subset-of FS'.Directories'
+
+    result = FSDataOutputStream
+
+The result is `FSDataOutputStream` to be used to write data to filesystem.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/99e558b1/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
index 66a7eb3..532b6c7 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
@@ -33,5 +33,6 @@ HDFS as these are commonly expected by Hadoop client applications.
 1. [Model](model.html)
 1. [FileSystem class](filesystem.html)
 1. [FSDataInputStream class](fsdatainputstream.html)
+1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html)
 2. [Testing with the Filesystem specification](testing.html)
 2. [Extending the specification and its tests](extending.html)


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org