You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2021/01/21 16:49:37 UTC
[arrow] branch master updated: ARROW-11320: [C++] Try to strengthen
temporary dir creation
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bc5d8bf ARROW-11320: [C++] Try to strengthen temporary dir creation
bc5d8bf is described below
commit bc5d8bf76ef3cf8898cdfb7e7feb089fe2944731
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Jan 21 17:48:49 2021 +0100
ARROW-11320: [C++] Try to strengthen temporary dir creation
* Make the random generator seed a bit more unique.
* When a temporary name already exists, retry with another one instead of bailing out.
Closes #9265 from pitrou/ARROW-11320-temp-dir-retry
Authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/util/io_util.cc | 69 ++++++++++++++++++++++++++++++-------------
1 file changed, 49 insertions(+), 20 deletions(-)
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 5d36ba9..cabbfb3 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -1482,31 +1482,51 @@ std::string MakeRandomName(int num_chars) {
} // namespace
Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
- std::string suffix = MakeRandomName(8);
+ const int kNumChars = 8;
+
NativePathString base_name;
- ARROW_ASSIGN_OR_RAISE(base_name, StringToNative(prefix + suffix));
+
+ auto MakeBaseName = [&]() {
+ std::string suffix = MakeRandomName(kNumChars);
+ return StringToNative(prefix + suffix);
+ };
+
+ auto TryCreatingDirectory =
+ [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
+ Status st;
+ for (int attempt = 0; attempt < 3; ++attempt) {
+ PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
+ auto result = CreateDir(fn);
+ if (!result.ok()) {
+ // Probably a permissions error or a non-existing base_dir
+ return nullptr;
+ }
+ if (*result) {
+ return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+ }
+ // The random name already exists in base_dir, try with another name
+ st = Status::IOError("Path already exists: '", fn.ToString(), "'");
+ ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+ }
+ return st;
+ };
+
+ ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
auto base_dirs = GetPlatformTemporaryDirs();
DCHECK_NE(base_dirs.size(), 0);
- auto st = Status::OK();
- for (const auto& p : base_dirs) {
- PlatformFilename fn(p + kNativeSep + base_name + kNativeSep);
- auto result = CreateDir(fn);
- if (!result.ok()) {
- st = result.status();
- continue;
- }
- if (!*result) {
- // XXX Should we retry with another random name?
- return Status::IOError("Path already exists: '", fn.ToString(), "'");
- } else {
- return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+ for (const auto& base_dir : base_dirs) {
+ ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
+ if (ptr) {
+ return std::move(ptr);
}
+ // Cannot create in this directory, try the next one
}
- DCHECK(!st.ok());
- return st;
+ return Status::IOError(
+ "Cannot create temporary subdirectory in any "
+ "of the platform temporary directories");
}
TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
@@ -1590,20 +1610,29 @@ Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler)
namespace {
+int64_t GetPid() {
+#ifdef _WIN32
+ return GetCurrentProcessId();
+#else
+ return getpid();
+#endif
+}
+
std::mt19937_64 GetSeedGenerator() {
// Initialize Mersenne Twister PRNG with a true random seed.
+ // Make sure to mix in process id to minimize risks of clashes when parallel testing.
#ifdef ARROW_VALGRIND
// Valgrind can crash, hang or enter an infinite loop on std::random_device,
// use a crude initializer instead.
- // Make sure to mix in process id to avoid clashes when parallel testing.
const uint8_t dummy = 0;
ARROW_UNUSED(dummy);
std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
- static_cast<uintptr_t>(getpid()));
+ static_cast<uintptr_t>(GetPid()));
#else
std::random_device true_random;
std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
- (static_cast<uint64_t>(true_random()) << 32));
+ (static_cast<uint64_t>(true_random()) << 32) ^
+ static_cast<uint64_t>(GetPid()));
#endif
return seed_gen;
}