You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/25 01:33:11 UTC

[arrow] branch master updated: ARROW-2025: [C++] Creating multiple equivalent `HadoopFileSystem`s works fine

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2126ebf  ARROW-2025: [C++] Creating multiple equivalent `HadoopFileSystem`s works fine
2126ebf is described below

commit 2126ebf8a755e3ee884058be4aae83585a55107e
Author: Jim Crist <ji...@gmail.com>
AuthorDate: Wed Jan 24 20:33:06 2018 -0500

    ARROW-2025: [C++] Creating multiple equivalent `HadoopFileSystem`s works fine
    
    Previously creating two instances of `HadoopFileSystem` using the same init parameters would result in both pointing to the same `hdfsFS` object. If one `HadoopFileSystem` disconnected then the underlying `hdfsFS` would be closed for both instances.
    
    To fix this, we force a new instance of `hdfsFS` on connect, removing this cacheing behavior.
    
    Author: Jim Crist <ji...@gmail.com>
    
    Closes #1499 from jcrist/no-cache-hdfs and squashes the following commits:
    
    f8ff1351 [Jim Crist] Add test
    bf6627e8 [Jim Crist] Force libhdfs/libhdfs3 to return new FS on connect
---
 cpp/src/arrow/io/hdfs-internal.cc |  5 +++++
 cpp/src/arrow/io/hdfs-internal.h  |  4 ++++
 cpp/src/arrow/io/hdfs.cc          |  1 +
 cpp/src/arrow/io/io-hdfs-test.cc  | 15 +++++++++++++++
 4 files changed, 25 insertions(+)

diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc
index 545b2d1..efceb8a 100644
--- a/cpp/src/arrow/io/hdfs-internal.cc
+++ b/cpp/src/arrow/io/hdfs-internal.cc
@@ -310,6 +310,10 @@ void LibHdfsShim::BuilderSetKerbTicketCachePath(hdfsBuilder* bld,
   this->hdfsBuilderSetKerbTicketCachePath(bld, kerbTicketCachePath);
 }
 
+void LibHdfsShim::BuilderSetForceNewInstance(hdfsBuilder* bld) {
+  this->hdfsBuilderSetForceNewInstance(bld);
+}
+
 hdfsFS LibHdfsShim::BuilderConnect(hdfsBuilder* bld) {
   return this->hdfsBuilderConnect(bld);
 }
@@ -490,6 +494,7 @@ Status LibHdfsShim::GetRequiredSymbols() {
   GET_SYMBOL_REQUIRED(this, hdfsBuilderSetNameNodePort);
   GET_SYMBOL_REQUIRED(this, hdfsBuilderSetUserName);
   GET_SYMBOL_REQUIRED(this, hdfsBuilderSetKerbTicketCachePath);
+  GET_SYMBOL_REQUIRED(this, hdfsBuilderSetForceNewInstance);
   GET_SYMBOL_REQUIRED(this, hdfsBuilderConnect);
   GET_SYMBOL_REQUIRED(this, hdfsCreateDirectory);
   GET_SYMBOL_REQUIRED(this, hdfsDelete);
diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h
index df925cf..f0fce23 100644
--- a/cpp/src/arrow/io/hdfs-internal.h
+++ b/cpp/src/arrow/io/hdfs-internal.h
@@ -51,6 +51,7 @@ struct LibHdfsShim {
   void (*hdfsBuilderSetUserName)(hdfsBuilder* bld, const char* userName);
   void (*hdfsBuilderSetKerbTicketCachePath)(hdfsBuilder* bld,
                                             const char* kerbTicketCachePath);
+  void (*hdfsBuilderSetForceNewInstance)(hdfsBuilder* bld);
   hdfsFS (*hdfsBuilderConnect)(hdfsBuilder* bld);
 
   int (*hdfsDisconnect)(hdfsFS fs);
@@ -95,6 +96,7 @@ struct LibHdfsShim {
     this->hdfsBuilderSetNameNodePort = nullptr;
     this->hdfsBuilderSetUserName = nullptr;
     this->hdfsBuilderSetKerbTicketCachePath = nullptr;
+    this->hdfsBuilderSetForceNewInstance = nullptr;
     this->hdfsBuilderConnect = nullptr;
     this->hdfsDisconnect = nullptr;
     this->hdfsOpenFile = nullptr;
@@ -138,6 +140,8 @@ struct LibHdfsShim {
 
   void BuilderSetKerbTicketCachePath(hdfsBuilder* bld, const char* kerbTicketCachePath);
 
+  void BuilderSetForceNewInstance(hdfsBuilder* bld);
+
   hdfsFS BuilderConnect(hdfsBuilder* bld);
 
   int Disconnect(hdfsFS fs);
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index 6e3e4a7..6c569ae 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -335,6 +335,7 @@ class HadoopFileSystem::HadoopFileSystemImpl {
     if (!config->kerb_ticket.empty()) {
       driver_->BuilderSetKerbTicketCachePath(builder, config->kerb_ticket.c_str());
     }
+    driver_->BuilderSetForceNewInstance(builder);
     fs_ = driver_->BuilderConnect(builder);
 
     if (fs_ == nullptr) {
diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc
index 5305b47..f2ded6f 100644
--- a/cpp/src/arrow/io/io-hdfs-test.cc
+++ b/cpp/src/arrow/io/io-hdfs-test.cc
@@ -178,6 +178,21 @@ TYPED_TEST(TestHadoopFileSystem, ConnectsAgain) {
   ASSERT_OK(client->Disconnect());
 }
 
+TYPED_TEST(TestHadoopFileSystem, MultipleClients) {
+  SKIP_IF_NO_DRIVER();
+
+  std::shared_ptr<HadoopFileSystem> client1;
+  std::shared_ptr<HadoopFileSystem> client2;
+  ASSERT_OK(HadoopFileSystem::Connect(&this->conf_, &client1));
+  ASSERT_OK(HadoopFileSystem::Connect(&this->conf_, &client2));
+  ASSERT_OK(client1->Disconnect());
+
+  // client2 continues to function after equivalent client1 has shutdown
+  std::vector<HdfsPathInfo> listing;
+  EXPECT_OK(client2->ListDirectory(this->scratch_dir_, &listing));
+  ASSERT_OK(client2->Disconnect());
+}
+
 TYPED_TEST(TestHadoopFileSystem, MakeDirectory) {
   SKIP_IF_NO_DRIVER();
 

-- 
To stop receiving notification emails like this one, please contact
wesm@apache.org.