You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/12/18 03:43:35 UTC
svn commit: r1423260 - in
/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/native/fuse-dfs/ src/main/native/fuse-dfs/test/
Author: atm
Date: Tue Dec 18 02:43:32 2012
New Revision: 1423260
URL: http://svn.apache.org/viewvc?rev=1423260&view=rev
Log:
HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. Contributed by Colin Patrick McCabe.
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Dec 18 02:43:32 2012
@@ -333,6 +333,9 @@ Release 2.0.3-alpha - Unreleased
HDFS-4315. DNs with multiple BPs can have BPOfferServices fail to start
due to unsynchronized map access. (atm)
+ HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. (Colin Patrick McCabe
+ via atm)
+
BREAKDOWN OF HDFS-3077 SUBTASKS
HDFS-3077. Quorum-based protocol for reading and writing edit logs.
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c Tue Dec 18 02:43:32 2012
@@ -131,7 +131,6 @@ static enum authConf discoverAuthConf(vo
int fuseConnectInit(const char *nnUri, int port)
{
- const char *timerPeriod;
int ret;
gTimerPeriod = FUSE_CONN_DEFAULT_TIMER_PERIOD;
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c Tue Dec 18 02:43:32 2012
@@ -24,12 +24,77 @@
#include <stdio.h>
#include <stdlib.h>
+static int get_hdfs_open_flags_from_info(hdfsFS fs, const char *path,
+ int flags, int *outflags, const hdfsFileInfo *info);
+
+/**
+ * Given a set of FUSE flags, determine the libhdfs flags we need.
+ *
+ * This is complicated by two things:
+ * 1. libhdfs doesn't support O_RDWR at all;
+ * 2. when given O_WRONLY, libhdfs will truncate the file unless O_APPEND is
+ * also given. In other words, there is an implicit O_TRUNC.
+ *
+ * Probably the next iteration of the libhdfs interface should not use the POSIX
+ * flags at all, since, as you can see, they don't really match up very closely
+ * to the POSIX meaning. However, for the time being, this is the API.
+ *
+ * @param fs The libhdfs object
+ * @param path The path we're opening
+ * @param flags The FUSE flags
+ *
+ * @return negative error code on failure; flags otherwise.
+ */
+static int64_t get_hdfs_open_flags(hdfsFS fs, const char *path, int flags)
+{
+ int hasContent;
+ int64_t ret;
+ hdfsFileInfo *info;
+
+ if ((flags & O_ACCMODE) == O_RDONLY) {
+ return O_RDONLY;
+ }
+ if (flags & O_TRUNC) {
+ /* If we're opening for write or read/write, O_TRUNC means we should blow
+ * away the file which is there and create our own file.
+ * */
+ return O_WRONLY;
+ }
+ info = hdfsGetPathInfo(fs, path);
+ if (info) {
+ if (info->mSize == 0) {
+ // If the file has zero length, we shouldn't feel bad about blowing it
+ // away.
+ ret = O_WRONLY;
+ } else if ((flags & O_ACCMODE) == O_RDWR) {
+ // HACK: translate O_RDWR requests into O_RDONLY if the file already
+ // exists and has non-zero length.
+ ret = O_RDONLY;
+ } else { // O_WRONLY
+ // HACK: translate O_WRONLY requests into append if the file already
+ // exists.
+ ret = O_WRONLY | O_APPEND;
+ }
+ } else { // !info
+ if (flags & O_CREAT) {
+ ret = O_WRONLY;
+ } else {
+ ret = -ENOENT;
+ }
+ }
+ if (info) {
+ hdfsFreeFileInfo(info, 1);
+ }
+ return ret;
+}
+
int dfs_open(const char *path, struct fuse_file_info *fi)
{
hdfsFS fs = NULL;
dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data;
dfs_fh *fh = NULL;
- int mutexInit = 0, ret;
+ int mutexInit = 0, ret, flags = 0;
+ int64_t flagRet;
TRACE1("open", path)
@@ -38,10 +103,6 @@ int dfs_open(const char *path, struct fu
assert('/' == *path);
assert(dfs);
- // 0x8000 is always passed in and hadoop doesn't like it, so killing it here
- // bugbug figure out what this flag is and report problem to Hadoop JIRA
- int flags = (fi->flags & 0x7FFF);
-
// retrieve dfs specific data
fh = (dfs_fh*)calloc(1, sizeof (dfs_fh));
if (!fh) {
@@ -57,22 +118,12 @@ int dfs_open(const char *path, struct fu
goto error;
}
fs = hdfsConnGetFs(fh->conn);
-
- if (flags & O_RDWR) {
- hdfsFileInfo *info = hdfsGetPathInfo(fs, path);
- if (info == NULL) {
- // File does not exist (maybe?); interpret it as a O_WRONLY
- // If the actual error was something else, we'll get it again when
- // we try to open the file.
- flags ^= O_RDWR;
- flags |= O_WRONLY;
- } else {
- // File exists; open this as read only.
- flags ^= O_RDWR;
- flags |= O_RDONLY;
- }
+ flagRet = get_hdfs_open_flags(fs, path, fi->flags);
+ if (flagRet < 0) {
+ ret = -flagRet;
+ goto error;
}
-
+ flags = flagRet;
if ((fh->hdfsFH = hdfsOpenFile(fs, path, flags, 0, 0, 0)) == NULL) {
ERROR("Could not open file %s (errno=%d)", path, errno);
if (errno == 0 || errno == EINTERNAL) {
@@ -91,7 +142,7 @@ int dfs_open(const char *path, struct fu
}
mutexInit = 1;
- if (fi->flags & O_WRONLY || fi->flags & O_CREAT) {
+ if ((flags & O_ACCMODE) == O_WRONLY) {
fh->buf = NULL;
} else {
assert(dfs->rdbuffer_size > 0);
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c Tue Dec 18 02:43:32 2012
@@ -98,7 +98,7 @@ static void dfsPrintOptions(FILE *fp, co
o->attribute_timeout, o->rdbuffer_size, o->direct_io);
}
-void *dfs_init(void)
+void *dfs_init(struct fuse_conn_info *conn)
{
int ret;
@@ -143,6 +143,45 @@ void *dfs_init(void)
exit(EXIT_FAILURE);
}
}
+
+#ifdef FUSE_CAP_ATOMIC_O_TRUNC
+ // If FUSE_CAP_ATOMIC_O_TRUNC is set, open("foo", O_CREAT | O_TRUNC) will
+ // result in dfs_open being called with O_TRUNC.
+ //
+ // If this capability is not present, fuse will try to use multiple
+ // operation to "simulate" open(O_TRUNC). This doesn't work very well with
+ // HDFS.
+ // Unfortunately, this capability is only implemented on Linux 2.6.29 or so.
+ // See HDFS-4140 for details.
+ if (conn->capable & FUSE_CAP_ATOMIC_O_TRUNC) {
+ conn->want |= FUSE_CAP_ATOMIC_O_TRUNC;
+ }
+#endif
+
+#ifdef FUSE_CAP_ASYNC_READ
+ // We're OK with doing reads at the same time as writes.
+ if (conn->capable & FUSE_CAP_ASYNC_READ) {
+ conn->want |= FUSE_CAP_ASYNC_READ;
+ }
+#endif
+
+#ifdef FUSE_CAP_BIG_WRITES
+ // Yes, we can read more than 4kb at a time. In fact, please do!
+ if (conn->capable & FUSE_CAP_BIG_WRITES) {
+ conn->want |= FUSE_CAP_BIG_WRITES;
+ }
+#endif
+
+#ifdef FUSE_CAP_DONT_MASK
+ if ((options.no_permissions) && (conn->capable & FUSE_CAP_DONT_MASK)) {
+ // If we're handing permissions ourselves, we don't want the kernel
+ // applying its own umask. HDFS already implements its own per-user
+ // umasks! Sadly, this only actually does something on kernels 2.6.31 and
+ // later.
+ conn->want |= FUSE_CAP_DONT_MASK;
+ }
+#endif
+
return (void*)dfs;
}
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h Tue Dec 18 02:43:32 2012
@@ -19,13 +19,15 @@
#ifndef __FUSE_INIT_H__
#define __FUSE_INIT_H__
+struct fuse_conn_info;
+
/**
* These are responsible for initializing connections to dfs and internal
* data structures and then freeing them.
* i.e., what happens on mount and unmount.
*
*/
-void *dfs_init();
+void *dfs_init(struct fuse_conn_info *conn);
void dfs_destroy (void *ptr);
#endif
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c?rev=1423260&r1=1423259&r2=1423260&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c Tue Dec 18 02:43:32 2012
@@ -16,6 +16,8 @@
* limitations under the License.
*/
+#define FUSE_USE_VERSION 26
+
#include "fuse-dfs/test/fuse_workload.h"
#include "libhdfs/expect.h"
#include "util/posix_util.h"
@@ -23,6 +25,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <fuse.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -138,13 +141,89 @@ static int safeRead(int fd, void *buf, i
return amt;
}
+/* Bug: HDFS-2551.
+ * When a program writes a file, closes it, and immediately re-opens it,
+ * it might not appear to have the correct length. This is because FUSE
+ * invokes the release() callback asynchronously.
+ *
+ * To work around this, we keep retrying until the file length is what we
+ * expect.
+ */
+static int closeWorkaroundHdfs2551(int fd, const char *path, off_t expectedSize)
+{
+ int ret, try;
+ struct stat stBuf;
+
+ RETRY_ON_EINTR_GET_ERRNO(ret, close(fd));
+ EXPECT_ZERO(ret);
+ for (try = 0; try < MAX_TRIES; try++) {
+ EXPECT_ZERO(stat(path, &stBuf));
+ EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
+ if (stBuf.st_size == expectedSize) {
+ return 0;
+ }
+ sleepNoSig(1);
+ }
+ fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
+ "%lld; instead, it had length %lld\n",
+ path, (long long)expectedSize, (long long)stBuf.st_size);
+ return -EIO;
+}
+
+#ifdef FUSE_CAP_ATOMIC_O_TRUNC
+
+/**
+ * Test that we can create a file, write some contents to it, close that file,
+ * and then successfully re-open with O_TRUNC.
+ */
+static int testOpenTrunc(const char *base)
+{
+ int fd, err;
+ char path[PATH_MAX];
+ const char * const SAMPLE1 = "this is the first file that we wrote.";
+ const char * const SAMPLE2 = "this is the second file that we wrote. "
+ "It's #2!";
+
+ snprintf(path, sizeof(path), "%s/trunc.txt", base);
+ fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
+ if (fd < 0) {
+ err = errno;
+ fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): first open "
+ "failed with error %d\n", path, err);
+ return -err;
+ }
+ EXPECT_ZERO(safeWrite(fd, SAMPLE1, strlen(SAMPLE1)));
+ EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE1)));
+ fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
+ if (fd < 0) {
+ err = errno;
+ fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): second open "
+ "failed with error %d\n", path, err);
+ return -err;
+ }
+ EXPECT_ZERO(safeWrite(fd, SAMPLE2, strlen(SAMPLE2)));
+ EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE2)));
+ return 0;
+}
+
+#else
+
+static int testOpenTrunc(const char *base)
+{
+ fprintf(stderr, "FUSE_WORKLOAD: We lack FUSE_CAP_ATOMIC_O_TRUNC support. "
+ "Not testing open(O_TRUNC).\n");
+ return 0;
+}
+
+#endif
+
int runFuseWorkloadImpl(const char *root, const char *pcomp,
struct fileCtx *ctx)
{
char base[PATH_MAX], tmp[PATH_MAX], *tmpBuf;
char src[PATH_MAX], dst[PATH_MAX];
struct stat stBuf;
- int ret, i, try;
+ int ret, i;
struct utimbuf tbuf;
struct statvfs stvBuf;
@@ -241,35 +320,10 @@ int runFuseWorkloadImpl(const char *root
EXPECT_ZERO(safeWrite(ctx[i].fd, ctx[i].str, ctx[i].strLen));
}
for (i = 0; i < NUM_FILE_CTX; i++) {
- RETRY_ON_EINTR_GET_ERRNO(ret, close(ctx[i].fd));
- EXPECT_ZERO(ret);
+ EXPECT_ZERO(closeWorkaroundHdfs2551(ctx[i].fd, ctx[i].path, ctx[i].strLen));
ctx[i].fd = -1;
}
for (i = 0; i < NUM_FILE_CTX; i++) {
- /* Bug: HDFS-2551.
- * When a program writes a file, closes it, and immediately re-opens it,
- * it might not appear to have the correct length. This is because FUSE
- * invokes the release() callback asynchronously.
- *
- * To work around this, we keep retrying until the file length is what we
- * expect.
- */
- for (try = 0; try < MAX_TRIES; try++) {
- EXPECT_ZERO(stat(ctx[i].path, &stBuf));
- EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
- if (ctx[i].strLen == stBuf.st_size) {
- break;
- }
- sleepNoSig(1);
- }
- if (try == MAX_TRIES) {
- fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
- "%d; instead, it had length %lld\n",
- ctx[i].path, ctx[i].strLen, (long long)stBuf.st_size);
- return -EIO;
- }
- }
- for (i = 0; i < NUM_FILE_CTX; i++) {
ctx[i].fd = open(ctx[i].path, O_RDONLY);
if (ctx[i].fd < 0) {
fprintf(stderr, "FUSE_WORKLOAD: Failed to open file %s for reading!\n",
@@ -308,6 +362,7 @@ int runFuseWorkloadImpl(const char *root
for (i = 0; i < NUM_FILE_CTX; i++) {
free(ctx[i].path);
}
+ EXPECT_ZERO(testOpenTrunc(base));
EXPECT_ZERO(recursiveDelete(base));
return 0;
}