You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/25 01:33:11 UTC
[arrow] branch master updated: ARROW-2025: [C++] Creating multiple
equivalent `HadoopFileSystem`s works fine
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2126ebf ARROW-2025: [C++] Creating multiple equivalent `HadoopFileSystem`s works fine
2126ebf is described below
commit 2126ebf8a755e3ee884058be4aae83585a55107e
Author: Jim Crist <ji...@gmail.com>
AuthorDate: Wed Jan 24 20:33:06 2018 -0500
ARROW-2025: [C++] Creating multiple equivalent `HadoopFileSystem`s works fine
Previously creating two instances of `HadoopFileSystem` using the same init parameters would result in both pointing to the same `hdfsFS` object. If one `HadoopFileSystem` disconnected then the underlying `hdfsFS` would be closed for both instances.
To fix this, we force a new instance of `hdfsFS` on connect, removing this cacheing behavior.
Author: Jim Crist <ji...@gmail.com>
Closes #1499 from jcrist/no-cache-hdfs and squashes the following commits:
f8ff1351 [Jim Crist] Add test
bf6627e8 [Jim Crist] Force libhdfs/libhdfs3 to return new FS on connect
---
cpp/src/arrow/io/hdfs-internal.cc | 5 +++++
cpp/src/arrow/io/hdfs-internal.h | 4 ++++
cpp/src/arrow/io/hdfs.cc | 1 +
cpp/src/arrow/io/io-hdfs-test.cc | 15 +++++++++++++++
4 files changed, 25 insertions(+)
diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc
index 545b2d1..efceb8a 100644
--- a/cpp/src/arrow/io/hdfs-internal.cc
+++ b/cpp/src/arrow/io/hdfs-internal.cc
@@ -310,6 +310,10 @@ void LibHdfsShim::BuilderSetKerbTicketCachePath(hdfsBuilder* bld,
this->hdfsBuilderSetKerbTicketCachePath(bld, kerbTicketCachePath);
}
+void LibHdfsShim::BuilderSetForceNewInstance(hdfsBuilder* bld) {
+ this->hdfsBuilderSetForceNewInstance(bld);
+}
+
hdfsFS LibHdfsShim::BuilderConnect(hdfsBuilder* bld) {
return this->hdfsBuilderConnect(bld);
}
@@ -490,6 +494,7 @@ Status LibHdfsShim::GetRequiredSymbols() {
GET_SYMBOL_REQUIRED(this, hdfsBuilderSetNameNodePort);
GET_SYMBOL_REQUIRED(this, hdfsBuilderSetUserName);
GET_SYMBOL_REQUIRED(this, hdfsBuilderSetKerbTicketCachePath);
+ GET_SYMBOL_REQUIRED(this, hdfsBuilderSetForceNewInstance);
GET_SYMBOL_REQUIRED(this, hdfsBuilderConnect);
GET_SYMBOL_REQUIRED(this, hdfsCreateDirectory);
GET_SYMBOL_REQUIRED(this, hdfsDelete);
diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h
index df925cf..f0fce23 100644
--- a/cpp/src/arrow/io/hdfs-internal.h
+++ b/cpp/src/arrow/io/hdfs-internal.h
@@ -51,6 +51,7 @@ struct LibHdfsShim {
void (*hdfsBuilderSetUserName)(hdfsBuilder* bld, const char* userName);
void (*hdfsBuilderSetKerbTicketCachePath)(hdfsBuilder* bld,
const char* kerbTicketCachePath);
+ void (*hdfsBuilderSetForceNewInstance)(hdfsBuilder* bld);
hdfsFS (*hdfsBuilderConnect)(hdfsBuilder* bld);
int (*hdfsDisconnect)(hdfsFS fs);
@@ -95,6 +96,7 @@ struct LibHdfsShim {
this->hdfsBuilderSetNameNodePort = nullptr;
this->hdfsBuilderSetUserName = nullptr;
this->hdfsBuilderSetKerbTicketCachePath = nullptr;
+ this->hdfsBuilderSetForceNewInstance = nullptr;
this->hdfsBuilderConnect = nullptr;
this->hdfsDisconnect = nullptr;
this->hdfsOpenFile = nullptr;
@@ -138,6 +140,8 @@ struct LibHdfsShim {
void BuilderSetKerbTicketCachePath(hdfsBuilder* bld, const char* kerbTicketCachePath);
+ void BuilderSetForceNewInstance(hdfsBuilder* bld);
+
hdfsFS BuilderConnect(hdfsBuilder* bld);
int Disconnect(hdfsFS fs);
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index 6e3e4a7..6c569ae 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -335,6 +335,7 @@ class HadoopFileSystem::HadoopFileSystemImpl {
if (!config->kerb_ticket.empty()) {
driver_->BuilderSetKerbTicketCachePath(builder, config->kerb_ticket.c_str());
}
+ driver_->BuilderSetForceNewInstance(builder);
fs_ = driver_->BuilderConnect(builder);
if (fs_ == nullptr) {
diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc
index 5305b47..f2ded6f 100644
--- a/cpp/src/arrow/io/io-hdfs-test.cc
+++ b/cpp/src/arrow/io/io-hdfs-test.cc
@@ -178,6 +178,21 @@ TYPED_TEST(TestHadoopFileSystem, ConnectsAgain) {
ASSERT_OK(client->Disconnect());
}
+TYPED_TEST(TestHadoopFileSystem, MultipleClients) {
+ SKIP_IF_NO_DRIVER();
+
+ std::shared_ptr<HadoopFileSystem> client1;
+ std::shared_ptr<HadoopFileSystem> client2;
+ ASSERT_OK(HadoopFileSystem::Connect(&this->conf_, &client1));
+ ASSERT_OK(HadoopFileSystem::Connect(&this->conf_, &client2));
+ ASSERT_OK(client1->Disconnect());
+
+ // client2 continues to function after equivalent client1 has shutdown
+ std::vector<HdfsPathInfo> listing;
+ EXPECT_OK(client2->ListDirectory(this->scratch_dir_, &listing));
+ ASSERT_OK(client2->Disconnect());
+}
+
TYPED_TEST(TestHadoopFileSystem, MakeDirectory) {
SKIP_IF_NO_DRIVER();
--
To stop receiving notification emails like this one, please contact
wesm@apache.org.