You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2021/01/21 16:49:37 UTC

[arrow] branch master updated: ARROW-11320: [C++] Try to strengthen temporary dir creation

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bc5d8bf  ARROW-11320: [C++] Try to strengthen temporary dir creation
bc5d8bf is described below

commit bc5d8bf76ef3cf8898cdfb7e7feb089fe2944731
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Jan 21 17:48:49 2021 +0100

    ARROW-11320: [C++] Try to strengthen temporary dir creation
    
    * Make the random generator seed a bit more unique.
    * When a temporary name already exists, retry with another one instead of bailing out.
    
    Closes #9265 from pitrou/ARROW-11320-temp-dir-retry
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/util/io_util.cc | 69 ++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 5d36ba9..cabbfb3 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -1482,31 +1482,51 @@ std::string MakeRandomName(int num_chars) {
 }  // namespace
 
 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
-  std::string suffix = MakeRandomName(8);
+  const int kNumChars = 8;
+
   NativePathString base_name;
-  ARROW_ASSIGN_OR_RAISE(base_name, StringToNative(prefix + suffix));
+
+  auto MakeBaseName = [&]() {
+    std::string suffix = MakeRandomName(kNumChars);
+    return StringToNative(prefix + suffix);
+  };
+
+  auto TryCreatingDirectory =
+      [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
+    Status st;
+    for (int attempt = 0; attempt < 3; ++attempt) {
+      PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
+      auto result = CreateDir(fn);
+      if (!result.ok()) {
+        // Probably a permissions error or a non-existing base_dir
+        return nullptr;
+      }
+      if (*result) {
+        return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+      }
+      // The random name already exists in base_dir, try with another name
+      st = Status::IOError("Path already exists: '", fn.ToString(), "'");
+      ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+    }
+    return st;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
 
   auto base_dirs = GetPlatformTemporaryDirs();
   DCHECK_NE(base_dirs.size(), 0);
 
-  auto st = Status::OK();
-  for (const auto& p : base_dirs) {
-    PlatformFilename fn(p + kNativeSep + base_name + kNativeSep);
-    auto result = CreateDir(fn);
-    if (!result.ok()) {
-      st = result.status();
-      continue;
-    }
-    if (!*result) {
-      // XXX Should we retry with another random name?
-      return Status::IOError("Path already exists: '", fn.ToString(), "'");
-    } else {
-      return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+  for (const auto& base_dir : base_dirs) {
+    ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
+    if (ptr) {
+      return std::move(ptr);
     }
+    // Cannot create in this directory, try the next one
   }
 
-  DCHECK(!st.ok());
-  return st;
+  return Status::IOError(
+      "Cannot create temporary subdirectory in any "
+      "of the platform temporary directories");
 }
 
 TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
@@ -1590,20 +1610,29 @@ Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler)
 
 namespace {
 
+int64_t GetPid() {
+#ifdef _WIN32
+  return GetCurrentProcessId();
+#else
+  return getpid();
+#endif
+}
+
 std::mt19937_64 GetSeedGenerator() {
   // Initialize Mersenne Twister PRNG with a true random seed.
+  // Make sure to mix in process id to minimize risks of clashes when parallel testing.
 #ifdef ARROW_VALGRIND
   // Valgrind can crash, hang or enter an infinite loop on std::random_device,
   // use a crude initializer instead.
-  // Make sure to mix in process id to avoid clashes when parallel testing.
   const uint8_t dummy = 0;
   ARROW_UNUSED(dummy);
   std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
-                           static_cast<uintptr_t>(getpid()));
+                           static_cast<uintptr_t>(GetPid()));
 #else
   std::random_device true_random;
   std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
-                           (static_cast<uint64_t>(true_random()) << 32));
+                           (static_cast<uint64_t>(true_random()) << 32) ^
+                           static_cast<uint64_t>(GetPid()));
 #endif
   return seed_gen;
 }