You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vv...@apache.org on 2016/01/28 07:49:03 UTC
[37/50] [abbrv] hadoop git commit: HDFS-9541. Add hdfsStreamBuilder
API to libhdfs to support defaultBlockSizes greater than 2 GB. Contributed by
Colin Patrick McCabe.
HDFS-9541. Add hdfsStreamBuilder API to libhdfs to support defaultBlockSizes greater than 2 GB. Contributed by Colin Patrick McCabe.
Change-Id: Ifce1b9be534dc8f7e9d2634cd60e423921b9810f
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cf8af7bb
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cf8af7bb
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cf8af7bb
Branch: refs/heads/YARN-3926
Commit: cf8af7bb459b21babaad2d972330a3b4c6bb222d
Parents: d0d7c22
Author: Zhe Zhang <zh...@apache.org>
Authored: Tue Jan 26 11:24:57 2016 -0800
Committer: Zhe Zhang <zh...@apache.org>
Committed: Tue Jan 26 11:24:57 2016 -0800
----------------------------------------------------------------------
.../src/main/native/libhdfs/hdfs.c | 98 +++++++++++++++++++-
.../src/main/native/libhdfs/include/hdfs/hdfs.h | 89 +++++++++++++++++-
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
3 files changed, 187 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cf8af7bb/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c
index c5aad1d..4618dbb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c
@@ -836,9 +836,95 @@ static jthrowable getDefaultBlockSize(JNIEnv *env, jobject jFS,
return NULL;
}
-hdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags,
+hdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags,
int bufferSize, short replication, tSize blockSize)
{
+ struct hdfsStreamBuilder *bld = hdfsStreamBuilderAlloc(fs, path, flags);
+ if (bufferSize != 0) {
+ hdfsStreamBuilderSetBufferSize(bld, bufferSize);
+ }
+ if (replication != 0) {
+ hdfsStreamBuilderSetReplication(bld, replication);
+ }
+ if (blockSize != 0) {
+ hdfsStreamBuilderSetDefaultBlockSize(bld, blockSize);
+ }
+ return hdfsStreamBuilderBuild(bld);
+}
+
+struct hdfsStreamBuilder {
+ hdfsFS fs;
+ int flags;
+ int32_t bufferSize;
+ int16_t replication;
+ int64_t defaultBlockSize;
+ char path[1];
+};
+
+struct hdfsStreamBuilder *hdfsStreamBuilderAlloc(hdfsFS fs,
+ const char *path, int flags)
+{
+ int path_len = strlen(path);
+ struct hdfsStreamBuilder *bld;
+
+ // sizeof(hdfsStreamBuilder->path) includes one byte for the string
+ // terminator
+ bld = malloc(sizeof(struct hdfsStreamBuilder) + path_len);
+ if (!bld) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ bld->fs = fs;
+ bld->flags = flags;
+ bld->bufferSize = 0;
+ bld->replication = 0;
+ bld->defaultBlockSize = 0;
+ memcpy(bld->path, path, path_len);
+ bld->path[path_len] = '\0';
+ return bld;
+}
+
+void hdfsStreamBuilderFree(struct hdfsStreamBuilder *bld)
+{
+ free(bld);
+}
+
+int hdfsStreamBuilderSetBufferSize(struct hdfsStreamBuilder *bld,
+ int32_t bufferSize)
+{
+ if ((bld->flags & O_ACCMODE) != O_WRONLY) {
+ errno = EINVAL;
+ return -1;
+ }
+ bld->bufferSize = bufferSize;
+ return 0;
+}
+
+int hdfsStreamBuilderSetReplication(struct hdfsStreamBuilder *bld,
+ int16_t replication)
+{
+ if ((bld->flags & O_ACCMODE) != O_WRONLY) {
+ errno = EINVAL;
+ return -1;
+ }
+ bld->replication = replication;
+ return 0;
+}
+
+int hdfsStreamBuilderSetDefaultBlockSize(struct hdfsStreamBuilder *bld,
+ int64_t defaultBlockSize)
+{
+ if ((bld->flags & O_ACCMODE) != O_WRONLY) {
+ errno = EINVAL;
+ return -1;
+ }
+ bld->defaultBlockSize = defaultBlockSize;
+ return 0;
+}
+
+static hdfsFile hdfsOpenFileImpl(hdfsFS fs, const char *path, int flags,
+ int32_t bufferSize, int16_t replication, int64_t blockSize)
+{
/*
JAVA EQUIVALENT:
File f = new File(path);
@@ -1037,6 +1123,16 @@ done:
return file;
}
+hdfsFile hdfsStreamBuilderBuild(struct hdfsStreamBuilder *bld)
+{
+ hdfsFile file = hdfsOpenFileImpl(bld->fs, bld->path, bld->flags,
+ bld->bufferSize, bld->replication, bld->defaultBlockSize);
+ int prevErrno = errno;
+ hdfsStreamBuilderFree(bld);
+ errno = prevErrno;
+ return file;
+}
+
int hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength)
{
jobject jFS = (jobject)fs;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cf8af7bb/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h
index c1515d7..c856928 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h
@@ -80,6 +80,7 @@ extern "C" {
kObjectKindFile = 'F',
kObjectKindDirectory = 'D',
} tObjectKind;
+ struct hdfsStreamBuilder;
/**
@@ -376,9 +377,11 @@ extern "C" {
LIBHDFS_EXTERNAL
int hdfsDisconnect(hdfsFS fs);
-
/**
* hdfsOpenFile - Open a hdfs file in given mode.
+ * @deprecated Use the hdfsStreamBuilder functions instead.
+ * This function does not support setting block sizes bigger than 2 GB.
+ *
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT),
@@ -388,7 +391,9 @@ extern "C" {
* @param replication Block replication - pass 0 if you want to use
* the default configured values.
* @param blocksize Size of block - pass 0 if you want to use the
- * default configured values.
+ * default configured values. Note that if you want a block size bigger
+ * than 2 GB, you must use the hdfsStreamBuilder API rather than this
+ * deprecated function.
* @return Returns the handle to the open file or NULL on error.
*/
LIBHDFS_EXTERNAL
@@ -396,6 +401,86 @@ extern "C" {
int bufferSize, short replication, tSize blocksize);
/**
+ * hdfsStreamBuilderAlloc - Allocate an HDFS stream builder.
+ *
+ * @param fs The configured filesystem handle.
+ * @param path The full path to the file. Will be deep-copied.
+ * @param flags The open flags, as in hdfsOpenFile.
+ * @return Returns the hdfsStreamBuilder, or NULL on error.
+ */
+ LIBHDFS_EXTERNAL
+ struct hdfsStreamBuilder *hdfsStreamBuilderAlloc(hdfsFS fs,
+ const char *path, int flags);
+
+ /**
+ * hdfsStreamBuilderFree - Free an HDFS file builder.
+ *
+ * It is normally not necessary to call this function since
+ * hdfsStreamBuilderBuild frees the builder.
+ *
+ * @param bld The hdfsStreamBuilder to free.
+ */
+ LIBHDFS_EXTERNAL
+ void hdfsStreamBuilderFree(struct hdfsStreamBuilder *bld);
+
+ /**
+ * hdfsStreamBuilderSetBufferSize - Set the stream buffer size.
+ *
+ * @param bld The hdfs stream builder.
+ * @param bufferSize The buffer size to set.
+ *
+ * @return 0 on success, or -1 on error. Errno will be set on error.
+ */
+ LIBHDFS_EXTERNAL
+ int hdfsStreamBuilderSetBufferSize(struct hdfsStreamBuilder *bld,
+ int32_t bufferSize);
+
+ /**
+ * hdfsStreamBuilderSetReplication - Set the replication for the stream.
+ * This is only relevant for output streams, which will create new blocks.
+ *
+ * @param bld The hdfs stream builder.
+ * @param replication The replication to set.
+ *
+ * @return 0 on success, or -1 on error. Errno will be set on error.
+ * If you call this on an input stream builder, you will get
+ * EINVAL, because this configuration is not relevant to input
+ * streams.
+ */
+ LIBHDFS_EXTERNAL
+ int hdfsStreamBuilderSetReplication(struct hdfsStreamBuilder *bld,
+ int16_t replication);
+
+ /**
+ * hdfsStreamBuilderSetDefaultBlockSize - Set the default block size for
+ * the stream. This is only relevant for output streams, which will create
+ * new blocks.
+ *
+ * @param bld The hdfs stream builder.
+ * @param defaultBlockSize The default block size to set.
+ *
+ * @return 0 on success, or -1 on error. Errno will be set on error.
+ * If you call this on an input stream builder, you will get
+ * EINVAL, because this configuration is not relevant to input
+ * streams.
+ */
+ LIBHDFS_EXTERNAL
+ int hdfsStreamBuilderSetDefaultBlockSize(struct hdfsStreamBuilder *bld,
+ int64_t defaultBlockSize);
+
+ /**
+ * hdfsStreamBuilderBuild - Build the stream by calling open or create.
+ *
+ * @param bld The hdfs stream builder. This pointer will be freed, whether
+ * or not the open succeeds.
+ *
+ * @return the stream pointer on success, or NULL on error. Errno will be
+ * set on error.
+ */
+ LIBHDFS_EXTERNAL
+ hdfsFile hdfsStreamBuilderBuild(struct hdfsStreamBuilder *bld);
+
+ /**
* hdfsTruncateFile - Truncate a hdfs file to given lenght.
* @param fs The configured filesystem handle.
* @param path The full path to the file.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cf8af7bb/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 56a85f4..097c051 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -956,6 +956,9 @@ Release 2.9.0 - UNRELEASED
HDFS-9094. Add command line option to ask NameNode reload
configuration. (Xiaobing Zhou via Arpit Agarwal)
+ HDFS-9541. Add hdfsStreamBuilder API to libhdfs to support defaultBlockSizes
+ greater than 2 GB. (cmccabe via zhz)
+
OPTIMIZATIONS
BUG FIXES