You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/01/19 18:05:13 UTC

[GitHub] [arrow] westonpace commented on a change in pull request #9265: ARROW-11320: [C++] Try to strengthen temporary dir creation

westonpace commented on a change in pull request #9265:
URL: https://github.com/apache/arrow/pull/9265#discussion_r560366357



##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -1482,31 +1482,51 @@ std::string MakeRandomName(int num_chars) {
 }  // namespace
 
 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
-  std::string suffix = MakeRandomName(8);
+  const int kNumChars = 8;
+
   NativePathString base_name;
-  ARROW_ASSIGN_OR_RAISE(base_name, StringToNative(prefix + suffix));
+
+  auto MakeBaseName = [&]() {
+    std::string suffix = MakeRandomName(kNumChars);
+    return StringToNative(prefix + suffix);
+  };
+
+  auto TryCreatingDirectory =

Review comment:
       Why not simply use a static/anon-namespace functions for TryCreatingDirectory and MakeBaseName?  MakeRandomName is already one.

##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -1482,31 +1482,51 @@ std::string MakeRandomName(int num_chars) {
 }  // namespace
 
 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
-  std::string suffix = MakeRandomName(8);
+  const int kNumChars = 8;
+
   NativePathString base_name;
-  ARROW_ASSIGN_OR_RAISE(base_name, StringToNative(prefix + suffix));
+
+  auto MakeBaseName = [&]() {
+    std::string suffix = MakeRandomName(kNumChars);
+    return StringToNative(prefix + suffix);
+  };
+
+  auto TryCreatingDirectory =
+      [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
+    Status st;
+    for (int attempt = 0; attempt < 3; ++attempt) {
+      PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
+      auto result = CreateDir(fn);
+      if (!result.ok()) {
+        // Probably a permissions error or a non-existing base_dir
+        return nullptr;
+      }
+      if (*result) {
+        return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+      }
+      // The random name already exists in base_dir, try with another name
+      st = Status::IOError("Path already exists: '", fn.ToString(), "'");
+      ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+    }
+    return st;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
 
   auto base_dirs = GetPlatformTemporaryDirs();
   DCHECK_NE(base_dirs.size(), 0);
 
-  auto st = Status::OK();
-  for (const auto& p : base_dirs) {
-    PlatformFilename fn(p + kNativeSep + base_name + kNativeSep);
-    auto result = CreateDir(fn);
-    if (!result.ok()) {
-      st = result.status();
-      continue;
-    }
-    if (!*result) {
-      // XXX Should we retry with another random name?
-      return Status::IOError("Path already exists: '", fn.ToString(), "'");
-    } else {
-      return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+  for (const auto& base_dir : base_dirs) {
+    ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));

Review comment:
       The way it is at the moment you will try the next directory if you get a permissions error or non-existing directory but you won't try the next directory if you tried three times and failed.  I think this is probably ok, just making sure this is the behavior you want.

##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -1482,31 +1482,51 @@ std::string MakeRandomName(int num_chars) {
 }  // namespace
 
 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
-  std::string suffix = MakeRandomName(8);
+  const int kNumChars = 8;
+
   NativePathString base_name;
-  ARROW_ASSIGN_OR_RAISE(base_name, StringToNative(prefix + suffix));
+
+  auto MakeBaseName = [&]() {
+    std::string suffix = MakeRandomName(kNumChars);
+    return StringToNative(prefix + suffix);
+  };
+
+  auto TryCreatingDirectory =
+      [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
+    Status st;
+    for (int attempt = 0; attempt < 3; ++attempt) {
+      PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
+      auto result = CreateDir(fn);
+      if (!result.ok()) {
+        // Probably a permissions error or a non-existing base_dir
+        return nullptr;
+      }
+      if (*result) {
+        return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+      }
+      // The random name already exists in base_dir, try with another name
+      st = Status::IOError("Path already exists: '", fn.ToString(), "'");
+      ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+    }
+    return st;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
 
   auto base_dirs = GetPlatformTemporaryDirs();
   DCHECK_NE(base_dirs.size(), 0);
 
-  auto st = Status::OK();
-  for (const auto& p : base_dirs) {
-    PlatformFilename fn(p + kNativeSep + base_name + kNativeSep);
-    auto result = CreateDir(fn);
-    if (!result.ok()) {
-      st = result.status();
-      continue;
-    }
-    if (!*result) {
-      // XXX Should we retry with another random name?
-      return Status::IOError("Path already exists: '", fn.ToString(), "'");
-    } else {
-      return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+  for (const auto& base_dir : base_dirs) {
+    ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
+    if (ptr) {
+      return std::move(ptr);

Review comment:
       Why are you applying `std::move` to a return value?

##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -1590,20 +1610,29 @@ Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler)
 
 namespace {
 
+int64_t GetPid() {
+#ifdef _WIN32
+  return GetCurrentProcessId();
+#else
+  return getpid();
+#endif
+}
+
 std::mt19937_64 GetSeedGenerator() {
   // Initialize Mersenne Twister PRNG with a true random seed.
+  // Make sure to mix in process id to minimize risks of clashes when parallel testing.
 #ifdef ARROW_VALGRIND
   // Valgrind can crash, hang or enter an infinite loop on std::random_device,
   // use a crude initializer instead.
-  // Make sure to mix in process id to avoid clashes when parallel testing.
   const uint8_t dummy = 0;
   ARROW_UNUSED(dummy);
   std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
-                           static_cast<uintptr_t>(getpid()));
+                           static_cast<uintptr_t>(GetPid()));
 #else
   std::random_device true_random;
   std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
-                           (static_cast<uint64_t>(true_random()) << 32));
+                           (static_cast<uint64_t>(true_random()) << 32) ^
+                           (static_cast<uint64_t>(GetPid()) << 17));

Review comment:
       Why `<< 17`?  Won't this leave the last 17 bits as 0?  It appears PID is at least 32 bits.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org