You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/04/20 19:22:10 UTC

[arrow] 28/28: ARROW-8477: [C++] Enable reading and writing of long filenames for Windows

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 5093b809d63ac8db99aec9caa7ad7e723f277c46
Author: TP Boudreau <tp...@gmail.com>
AuthorDate: Mon Apr 20 21:03:40 2020 +0200

    ARROW-8477: [C++] Enable reading and writing of long filenames for Windows
    
    This patch enables reading/writing of files with long (>260 characters) pathnames in Windows.
    
    In order for the new test to run under Windows, both (1) the test host must have long paths enabled in its registry, and (2) the test executable (arrow_utility_test.exe) must include a manifest indicating support for long paths (see https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file?redirectedfrom=MSDN#enable-long-paths-in-windows-10-version-1607-and-later).  The test source code checks for (1) and the cmake file changes ensure (2).
    
    Closes #6993 from tpboudreau/ARROW-8477
    
    Lead-authored-by: TP Boudreau <tp...@gmail.com>
    Co-authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 .github/workflows/cpp.yml                |  4 ++-
 cpp/src/arrow/util/CMakeLists.txt        | 14 +++++++-
 cpp/src/arrow/util/io_util.cc            | 40 +++++++++++++++++----
 cpp/src/arrow/util/io_util_test.cc       | 60 ++++++++++++++++++++++++++++++++
 cpp/src/arrow/util/io_util_test.manifest | 39 +++++++++++++++++++++
 cpp/src/arrow/util/io_util_test.rc       | 44 +++++++++++++++++++++++
 6 files changed, 192 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 287c0a5..a4db36f 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -228,7 +228,9 @@ jobs:
         run: ci/scripts/util_checkout.sh
       - name: Build
         shell: bash
-        run: ci/scripts/cpp_build.sh $(pwd) $(pwd)/build
+        run: |
+          export BOOST_ROOT=$BOOST_ROOT_1_72_0
+          ci/scripts/cpp_build.sh $(pwd) $(pwd)/build
       - name: Test
         shell: bash
         run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 4c2ed4e..ce08839 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -26,6 +26,18 @@ arrow_install_all_headers("arrow/util")
 # arrow_test_main
 #
 
+if(WIN32)
+  # This manifest enables long file paths on Windows 10+
+  # See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#enable-long-paths-in-windows-10-version-1607-and-later
+  if(MSVC)
+    set(IO_UTIL_TEST_SOURCES io_util_test.cc io_util_test.manifest)
+  else()
+    set(IO_UTIL_TEST_SOURCES io_util_test.cc io_util_test.rc)
+  endif()
+else()
+  set(IO_UTIL_TEST_SOURCES io_util_test.cc)
+endif()
+
 add_arrow_test(utility-test
                SOURCES
                align_util_test.cc
@@ -37,7 +49,7 @@ add_arrow_test(utility-test
                key_value_metadata_test.cc
                hashing_test.cc
                int_util_test.cc
-               io_util_test.cc
+               ${IO_UTIL_TEST_SOURCES}
                iterator_test.cc
                logging_test.cc
                parsing_util_test.cc
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index c7efa15..2e589fd 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -837,8 +837,19 @@ Result<int> FileOpenReadable(const PlatformFilename& file_name) {
   int fd, errno_actual;
 #if defined(_WIN32)
   SetLastError(0);
-  errno_actual = _wsopen_s(&fd, file_name.ToNative().c_str(),
-                           _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
+  HANDLE file_handle = CreateFileW(file_name.ToNative().c_str(), GENERIC_READ,
+                                   FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                                   OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+  DWORD last_error = GetLastError();
+  if (last_error == ERROR_SUCCESS) {
+    errno_actual = 0;
+    fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle),
+                         _O_RDONLY | _O_BINARY | _O_NOINHERIT);
+  } else {
+    return IOErrorFromWinError(last_error, "Failed to open local file '",
+                               file_name.ToString(), "'");
+  }
 #else
   fd = open(file_name.ToNative().c_str(), O_RDONLY);
   errno_actual = errno;
@@ -868,23 +879,38 @@ Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only,
 #if defined(_WIN32)
   SetLastError(0);
   int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
-  int pmode = _S_IREAD | _S_IWRITE;
+  DWORD desired_access = GENERIC_WRITE;
+  DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+  DWORD creation_disposition = OPEN_ALWAYS;
 
-  if (truncate) {
-    oflag |= _O_TRUNC;
-  }
   if (append) {
     oflag |= _O_APPEND;
   }
 
+  if (truncate) {
+    oflag |= _O_TRUNC;
+    creation_disposition = CREATE_ALWAYS;
+  }
+
   if (write_only) {
     oflag |= _O_WRONLY;
   } else {
     oflag |= _O_RDWR;
+    desired_access |= GENERIC_READ;
   }
 
-  errno_actual = _wsopen_s(&fd, file_name.ToNative().c_str(), oflag, _SH_DENYNO, pmode);
+  HANDLE file_handle =
+      CreateFileW(file_name.ToNative().c_str(), desired_access, share_mode, NULL,
+                  creation_disposition, FILE_ATTRIBUTE_NORMAL, NULL);
 
+  DWORD last_error = GetLastError();
+  if (last_error == ERROR_SUCCESS || last_error == ERROR_ALREADY_EXISTS) {
+    errno_actual = 0;
+    fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle), oflag);
+  } else {
+    return IOErrorFromWinError(last_error, "Failed to open local file '",
+                               file_name.ToString(), "'");
+  }
 #else
   int oflag = O_CREAT;
 
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index eb6ed0d..b432d33 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -394,6 +394,10 @@ TEST(DeleteDirContents, Basics) {
 #else
   ASSERT_EQ(ErrnoFromStatus(status), ENOENT);
 #endif
+
+  // Now actually delete the test directory
+  ASSERT_OK_AND_ASSIGN(deleted, DeleteDirTree(parent));
+  ASSERT_TRUE(deleted);
 }
 
 TEST(TemporaryDir, Basics) {
@@ -523,5 +527,61 @@ TEST(DeleteFile, Basics) {
   ASSERT_RAISES(IOError, DeleteFile(fn));
 }
 
+#ifndef __APPLE__
+TEST(FileUtils, LongPaths) {
+  // ARROW-8477: check using long file paths under Windows (> 260 characters).
+  bool created, deleted;
+#ifdef _WIN32
+  const char* kRegKeyName = R"(SYSTEM\CurrentControlSet\Control\FileSystem)";
+  const char* kRegValueName = "LongPathsEnabled";
+  DWORD value = 0;
+  DWORD size = sizeof(value);
+  LSTATUS status = RegGetValueA(HKEY_LOCAL_MACHINE, kRegKeyName, kRegValueName,
+                                RRF_RT_REG_DWORD, NULL, &value, &size);
+  bool test_long_paths = (status == ERROR_SUCCESS && value == 1);
+  if (!test_long_paths) {
+    ARROW_LOG(WARNING)
+        << "Tests for accessing files with long path names have been disabled. "
+        << "To enable these tests, set the value of " << kRegValueName
+        << " in registry key \\HKEY_LOCAL_MACHINE\\" << kRegKeyName
+        << " to 1 on the test host.";
+    return;
+  }
+#endif
+
+  const std::string BASE = "xxx-io-util-test-dir-long";
+  PlatformFilename base_path, long_path, long_filename;
+  int fd = -1;
+  std::stringstream fs;
+  fs << BASE;
+  for (int i = 0; i < 64; ++i) {
+    fs << "/123456789ABCDEF";
+  }
+  ASSERT_OK_AND_ASSIGN(base_path,
+                       PlatformFilename::FromString(BASE));  // long_path length > 1024
+  ASSERT_OK_AND_ASSIGN(
+      long_path, PlatformFilename::FromString(fs.str()));  // long_path length > 1024
+  ASSERT_OK_AND_ASSIGN(created, CreateDirTree(long_path));
+  ASSERT_TRUE(created);
+  AssertExists(long_path);
+  ASSERT_OK_AND_ASSIGN(long_filename,
+                       PlatformFilename::FromString(fs.str() + "/file.txt"));
+  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(long_filename));
+  ASSERT_OK(FileClose(fd));
+  AssertExists(long_filename);
+  fd = -1;
+  ASSERT_OK_AND_ASSIGN(fd, FileOpenReadable(long_filename));
+  ASSERT_OK(FileClose(fd));
+  ASSERT_OK_AND_ASSIGN(deleted, DeleteDirContents(long_path));
+  ASSERT_TRUE(deleted);
+  ASSERT_OK_AND_ASSIGN(deleted, DeleteDirTree(long_path));
+  ASSERT_TRUE(deleted);
+
+  // Now delete the whole test directory tree
+  ASSERT_OK_AND_ASSIGN(deleted, DeleteDirTree(base_path));
+  ASSERT_TRUE(deleted);
+}
+#endif
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/io_util_test.manifest b/cpp/src/arrow/util/io_util_test.manifest
new file mode 100644
index 0000000..de5c0d8
--- /dev/null
+++ b/cpp/src/arrow/util/io_util_test.manifest
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+  Enable long file paths on the target application
+  See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#enable-long-paths-in-windows-10-version-1607-and-later
+  -->
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+  <assemblyIdentity type="win32" name="ArrowUtilityTest" version="1.1.1.1"/>
+  <application xmlns="urn:schemas-microsoft-com:asm.v3">
+    <windowsSettings xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
+      <ws2:longPathAware>true</ws2:longPathAware>
+    </windowsSettings>
+  </application>
+  <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+    <security>
+      <requestedPrivileges>
+        <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
+      </requestedPrivileges>
+    </security>
+  </trustInfo>
+</assembly>
diff --git a/cpp/src/arrow/util/io_util_test.rc b/cpp/src/arrow/util/io_util_test.rc
new file mode 100644
index 0000000..c3236cb
--- /dev/null
+++ b/cpp/src/arrow/util/io_util_test.rc
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef CREATEPROCESS_MANIFEST_RESOURCE_ID
+#define CREATEPROCESS_MANIFEST_RESOURCE_ID  1
+#endif
+#ifndef RT_MANIFEST
+#define RT_MANIFEST  24
+#endif
+
+CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST
+BEGIN
+  "<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>"
+  "<assembly xmlns=""urn:schemas-microsoft-com:asm.v1"" manifestVersion=""1.0"">"
+    "<assemblyIdentity type=""win32"" name=""ArrowUtilityTest"" version=""1.1.1.1""/>"
+    "<application xmlns=""urn:schemas-microsoft-com:asm.v3"">"
+      "<windowsSettings xmlns:ws2=""http://schemas.microsoft.com/SMI/2016/WindowsSettings"">"
+        "<ws2:longPathAware>true</ws2:longPathAware>"
+      "</windowsSettings>"
+    "</application>"
+    "<trustInfo xmlns=""urn:schemas-microsoft-com:asm.v3"">"
+      "<security>"
+        "<requestedPrivileges>"
+          "<requestedExecutionLevel level=""asInvoker"" uiAccess=""false""/>"
+        "</requestedPrivileges>"
+      "</security>"
+    "</trustInfo>"
+  "</assembly>"
+END
+