You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2012/10/24 02:25:03 UTC

svn commit: r1401520 - in /hadoop/common/branches/branch-1-win: ./ src/core/org/apache/hadoop/fs/ src/core/org/apache/hadoop/io/nativeio/ src/native/src/org/apache/hadoop/io/nativeio/ src/test/org/apache/hadoop/fs/ src/winutils/

Author: suresh
Date: Wed Oct 24 00:25:03 2012
New Revision: 1401520

URL: http://svn.apache.org/viewvc?rev=1401520&view=rev
Log:
HADOOP-8872. FileSystem#length returns zero for symlinks on windows+java6. Contributed by Ivan Mitic.

Modified:
    hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
    hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/FileUtil.java
    hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/RawLocalFileSystem.java
    hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/io/nativeio/NativeIO.java
    hadoop/common/branches/branch-1-win/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c
    hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/fs/TestFileUtil.java
    hadoop/common/branches/branch-1-win/src/winutils/ls.c

Modified: hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt (original)
+++ hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt Wed Oct 24 00:25:03 2012
@@ -188,3 +188,9 @@ Branch-hadoop-1-win - unreleased
     HADOOP-4093. Fix a bug that AzureBlockPlacementPolicy#chooseTarget only
     returns one DN when replication factor is greater than 3.  (Jing Zhao via
     szetszwo)
+
+    HADOOP-8907. Provide means to look for zlib1.dll next to hadoop.dll 
+    on Windows. (Ivan Mitic via suresh)
+
+    HADOOP-8872. FileSystem#length returns zero for symlinks on 
+    windows+java6. (Ivan Mitic via suresh)

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/FileUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/FileUtil.java?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/FileUtil.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/FileUtil.java Wed Oct 24 00:25:03 2012
@@ -875,4 +875,48 @@ public class FileUtil {
     jarStream.close();
     return jarFile;
   }
+
+  /** Returns the file length. In case of a symbolic link, follows the link
+   *  and returns the target file length. API is used to provide
+   *  transparent behavior between Unix and Windows since on Windows
+   *  {@link File#length()} does not follow symbolic links.
+   * @param file file to extract the length for
+   *
+   * @throws IOException if an error occurred.
+   */
+  public static long getLengthFollowSymlink(File file) {
+    return getLengthFollowSymlink(file, false);
+  }
+
+  /** Package level API used for unit-testing only. */
+  static long getLengthFollowSymlink(File file, boolean disableNativeIO) {
+    if (!Shell.WINDOWS) {
+      return file.length();
+    } else {
+      try {
+        // FIXME: Below logic is not needed On Java 7 under Windows since
+        // File#length returns the correct value.
+        if (!disableNativeIO && NativeIO.isAvailable()) {
+          // Use Windows native API for extracting the file length if NativeIO
+          // is available.
+          return NativeIO.Windows.getLengthFollowSymlink(
+            file.getCanonicalPath());
+        } else {
+          // If NativeIO is not available, we have to provide the fallback
+          // mechanism since downstream Hadoop projects do not want
+          // to worry about adding hadoop.dll to the java library path.
+
+          // Extract the target link size via winutils
+          String output = FileUtil.execCommand(
+            file, new String[] { Shell.WINUTILS, "ls", "-L", "-F" });
+          // Output tokens are separated with a pipe character
+          String[] args = output.split("\\|");
+          return Long.parseLong(args[4]);
+        }
+      } catch (IOException ex) {
+        // In case of an error return zero to be consistent with File#length
+        return 0;
+      }
+    }
+  }
 }

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/RawLocalFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/RawLocalFileSystem.java?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/RawLocalFileSystem.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/fs/RawLocalFileSystem.java Wed Oct 24 00:25:03 2012
@@ -436,7 +436,9 @@ public class RawLocalFileSystem extends 
     }
     
     RawLocalFileStatus(File f, long defaultBlockSize, FileSystem fs) {
-      super(f.length(), f.isDirectory(), 1, defaultBlockSize,
+      // Extract File#length thru FileUtil#getLengthFollowSymlink to provide
+      // the same behavior for symbolic links on different platforms. 
+      super(FileUtil.getLengthFollowSymlink(f), f.isDirectory(), 1, defaultBlockSize,
             f.lastModified(), new Path(f.getPath()).makeQualified(fs));
     }
     

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/io/nativeio/NativeIO.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/io/nativeio/NativeIO.java?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/io/nativeio/NativeIO.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/io/nativeio/NativeIO.java Wed Oct 24 00:25:03 2012
@@ -177,6 +177,10 @@ public class NativeIO {
     /** Windows only methods used for getOwner() implementation */
     private static native String getOwner(FileDescriptor fd) throws IOException;
 
+    /** Windows only method used for getting the file length */
+    public static native long getLengthFollowSymlink(
+        String path) throws IOException;
+
     static {
       if (NativeCodeLoader.isNativeCodeLoaded()) {
         try {

Modified: hadoop/common/branches/branch-1-win/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c (original)
+++ hadoop/common/branches/branch-1-win/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c Wed Oct 24 00:25:03 2012
@@ -604,6 +604,42 @@ cleanup:
 #endif
 }
 
+/*
+ * Class:     org_apache_hadoop_io_nativeio_NativeIO_Windows
+ * Method:    getLengthFollowSymlink
+ * Signature: (Ljava/lang/String;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_hadoop_io_nativeio_NativeIO_00024Windows_getLengthFollowSymlink
+  (JNIEnv *env, jclass clazz, jstring j_path)
+{
+#ifdef UNIX
+  THROW(env, "java/io/IOException",
+    "The function getLengthFollowSymlink(String) is not supported on Unix");
+  return 0;
+#endif
+
+#ifdef WINDOWS
+  DWORD dwRtnCode = ERROR_SUCCESS;
+  BY_HANDLE_FILE_INFORMATION fileInfo = { 0 };
+  LARGE_INTEGER fileSize = { 0 };
+
+  const wchar_t *path = (const wchar_t*) (*env)->GetStringChars(env, j_path, NULL);
+  if (path == NULL) return 0; // JVM throws Exception for us
+
+  dwRtnCode = GetFileInformationByName(path, TRUE, &fileInfo);
+  if (dwRtnCode != ERROR_SUCCESS) {
+    throw_ioe(env, dwRtnCode);
+  }
+
+  (*env)->ReleaseStringChars(env, j_path, path);
+
+  fileSize.HighPart = fileInfo.nFileSizeHigh;
+  fileSize.LowPart = fileInfo.nFileSizeLow;
+
+  return (jlong)(fileSize.QuadPart);
+#endif
+}
+
 /**
  * vim: sw=2: ts=2: et:
  */

Modified: hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/fs/TestFileUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/fs/TestFileUtil.java?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/fs/TestFileUtil.java (original)
+++ hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/fs/TestFileUtil.java Wed Oct 24 00:25:03 2012
@@ -362,4 +362,53 @@ public class TestFileUtil {
     in.close();
     Assert.assertEquals(data.length, len);
   }
+
+  /**
+   * Test the value of File's length extracted using FileUtil.
+   */
+  @Test
+  public void testGetLengthFollowSymlink() throws Exception {
+    Assert.assertFalse(del.exists());
+    del.mkdirs();
+
+    byte[] data = "testSymLinkData".getBytes();
+
+    File file = new File(del, FILE);
+    File link = new File(del, "_link");
+
+    // write some data to the file
+    FileOutputStream os = new FileOutputStream(file);
+    os.write(data);
+    os.close();
+
+    // ensure that getLengthFollowSymlink returns zero if a file
+    // does not exist
+    Assert.assertEquals(0, FileUtil.getLengthFollowSymlink(link));
+
+    // create the symlink
+    FileUtil.symLink(file.getAbsolutePath(), link.getAbsolutePath());
+
+    // ensure that getLengthFollowSymlink returns the target file and link size
+    Assert.assertEquals(data.length, FileUtil.getLengthFollowSymlink(file));
+    Assert.assertEquals(data.length, FileUtil.getLengthFollowSymlink(link));
+
+    // ensure that getLengthFollowSymlink returns the target file and link
+    // size when NativeIO is not used (tests the fallback functionality
+    // on Windows)
+    Assert.assertEquals(
+      data.length, FileUtil.getLengthFollowSymlink(file, true));
+    Assert.assertEquals(
+      data.length, FileUtil.getLengthFollowSymlink(link, true));
+
+    // Make sure that files can be deleted (no remaining open handles)
+    file.delete();
+    Assert.assertFalse(file.exists());
+
+    // Link size should be zero when it's pointing to nothing
+    Assert.assertEquals(0, FileUtil.getLengthFollowSymlink(link));
+
+    link.delete();
+    Assert.assertFalse(link.exists());
+
+  }
 }

Modified: hadoop/common/branches/branch-1-win/src/winutils/ls.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/winutils/ls.c?rev=1401520&r1=1401519&r2=1401520&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/winutils/ls.c (original)
+++ hadoop/common/branches/branch-1-win/src/winutils/ls.c Wed Oct 24 00:25:03 2012
@@ -76,6 +76,8 @@ static BOOL GetMaskString(INT accessMask
 //	None
 //
 // Notes:
+//  if useSeparator is false, separates the output tokens with a space
+//  character, otherwise, with a pipe character
 //
 static BOOL LsPrintLine(
   const INT mask,
@@ -84,7 +86,8 @@ static BOOL LsPrintLine(
   LPCWSTR groupName,
   const FILETIME *lpFileWritetime,
   const LARGE_INTEGER fileSize,
-  LPCWSTR path)
+  LPCWSTR path,
+  BOOL useSeparator)
 {
   // 'd' + 'rwx' for user, group, other
   static const size_t ck_ullMaskLen = 1 + 3 * 3;
@@ -117,10 +120,20 @@ static BOOL LsPrintLine(
     goto LsPrintLineEnd;
   }
 
-  fwprintf(stdout, L"%10s %d %s %s %lld %3s %2d %4d %s\n",
-    maskString, hardlinkCount, ownerName, groupName, fileSize.QuadPart,
-    MONTHS[stFileWriteTime.wMonth], stFileWriteTime.wDay,
-    stFileWriteTime.wYear, path);
+  if (useSeparator)
+  {
+    fwprintf(stdout, L"%10s|%d|%s|%s|%lld|%3s|%2d|%4d|%s\n",
+      maskString, hardlinkCount, ownerName, groupName, fileSize.QuadPart,
+      MONTHS[stFileWriteTime.wMonth], stFileWriteTime.wDay,
+      stFileWriteTime.wYear, path);
+  }
+  else
+  {
+    fwprintf(stdout, L"%10s %d %s %s %lld %3s %2d %4d %s\n",
+      maskString, hardlinkCount, ownerName, groupName, fileSize.QuadPart,
+      MONTHS[stFileWriteTime.wMonth], stFileWriteTime.wDay,
+      stFileWriteTime.wYear, path);
+  }
 
   ret = TRUE;
 
@@ -130,6 +143,88 @@ LsPrintLineEnd:
   return ret;
 }
 
+// List of command line options supported by "winutils ls"
+enum CmdLineOption
+{
+  CmdLineOptionFollowSymlink = 0x1,  // "-L"
+  CmdLineOptionSeparator = 0x2  // "-F"
+  // options should be powers of 2 (aka next is 0x4)
+};
+
+static wchar_t* CurrentDir = L".";
+
+//----------------------------------------------------------------------------
+// Function: ParseCommandLine
+//
+// Description:
+//   Parses the command line
+//
+// Returns:
+//   TRUE on the valid command line, FALSE otherwise
+//
+BOOL ParseCommandLine(
+  int argc, wchar_t *argv[], wchar_t** path, int *optionsMask)
+{
+  int MaxOptions = 2; // Should be equal to the number of elems in CmdLineOption
+  int i = 0;
+
+  assert(optionsMask != NULL);
+  assert(argv != NULL);
+  assert(path != NULL);
+
+  *optionsMask = 0;
+
+  if (argc == 1)
+  {
+    // no path specified, assume "."
+    *path = CurrentDir;
+    return TRUE;
+  }
+
+  if (argc == 2)
+  {
+    // only path specified, no other options
+    *path = argv[1];
+    return TRUE;
+  }
+
+  if (argc > 2 + MaxOptions)
+  {
+    // too many parameters
+    return FALSE;
+  }
+
+  for (i = 1; i < argc - 1; ++i)
+  {
+    if (wcscmp(argv[i], L"-L") == 0)
+    {
+      // Check if this option was already specified
+      BOOL alreadySet = *optionsMask & CmdLineOptionFollowSymlink;
+      if (alreadySet)
+        return FALSE;
+
+      *optionsMask |= CmdLineOptionFollowSymlink;
+    }
+    else if (wcscmp(argv[i], L"-F") == 0)
+    {
+      // Check if this option was already specified
+      BOOL alreadySet = *optionsMask & CmdLineOptionSeparator;
+      if (alreadySet)
+        return FALSE;
+
+      *optionsMask |= CmdLineOptionSeparator;
+    }
+    else
+    {
+      return FALSE;
+    }
+  }
+
+  *path = argv[argc - 1];
+
+  return TRUE;
+}
+
 //----------------------------------------------------------------------------
 // Function: Ls
 //
@@ -158,19 +253,16 @@ int Ls(int argc, wchar_t *argv[])
   BOOL isSymlink = FALSE;
 
   int ret = EXIT_FAILURE;
+  int optionsMask = 0;
 
-  if (argc > 2)
+  if (!ParseCommandLine(argc, argv, &pathName, &optionsMask))
   {
     fwprintf(stderr, L"Incorrect command line arguments.\n\n");
     LsUsage(argv[0]);
     return EXIT_FAILURE;
   }
 
-  if (argc == 2)
-    pathName = argv[1];
-
-  if (pathName == NULL || wcslen(pathName) == 0)
-    pathName = L".";
+  assert(pathName != NULL);
 
   if (wcsspn(pathName, L"/?|><:*\"") != 0)
   {
@@ -187,7 +279,8 @@ int Ls(int argc, wchar_t *argv[])
     goto LsEnd;
   }
 
-  dwErrorCode = GetFileInformationByName(longPathName, FALSE, &fileInformation);
+  dwErrorCode = GetFileInformationByName(
+    longPathName, optionsMask & CmdLineOptionFollowSymlink, &fileInformation);
   if (dwErrorCode != ERROR_SUCCESS)
   {
     ReportErrorCode(L"GetFileInformationByName", dwErrorCode);
@@ -224,7 +317,8 @@ int Ls(int argc, wchar_t *argv[])
     ownerName, groupName,
     &fileInformation.ftLastWriteTime,
     fileSize,
-    pathName))
+    pathName,
+    optionsMask & CmdLineOptionSeparator))
     goto LsEnd;
 
   ret = EXIT_SUCCESS;
@@ -240,10 +334,13 @@ LsEnd:
 void LsUsage(LPCWSTR program)
 {
   fwprintf(stdout, L"\
-Usage: %s [FILE]\n\
+Usage: %s [OPTIONS] [FILE]\n\
 List information about the FILE (the current directory by default).\n\
 Using long listing format and list directory entries instead of contents,\n\
 and do not dereference symbolic links.\n\
-Provide equivalent or similar function as 'ls -ld' on GNU/Linux.\n",
+Provides equivalent or similar function as 'ls -ld' on GNU/Linux.\n\
+\n\
+OPTIONS: -L dereference symbolic links\n\
+         -F format the output by separating tokens with a pipe\n",
 program);
 }