You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fe...@apache.org on 2023/12/23 15:03:55 UTC

(arrow) branch main updated: GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)

This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d519544158 GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)
d519544158 is described below

commit d51954415882423584f2a95b0897aa4d073a4e1c
Author: Thomas Newton <th...@gmail.com>
AuthorDate: Sat Dec 23 15:03:47 2023 +0000

    GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)
    
    
    
    ### Rationale for this change
    Workload identity is a useful Azure authentication method. Also I failed to set the account_name correctly for a bunch of auths (I think this got lost in a rebase then I copy pasted the broken code).
    
    ### What changes are included in this PR?
    - Make filesystem initialisation fail if `account_name_.empty()`. This prevents the account name configuration bug we had. Also added a test asserting that filesystem initialization fails in this case.
    - Remove account name configuration on all auth configs, in favour of setting in separately from the auth configuration.
    - Implement `AzureOptions::ConfigureManagedIdentityCredential`
    
    ### Are these changes tested?
    Added a simple test initialising a filesystem using `ConfigureManagedIdentityCredential`. This is not the most comprehensive test but its the same as what we agreed on for https://github.com/apache/arrow/pull/39263.
    
    ### Are there any user-facing changes?
    Managed identity authentication is now supported.
    
    * Closes: #39320
    
    Authored-by: Thomas Newton <th...@gmail.com>
    Signed-off-by: Felipe Oliveira Carvalho <fe...@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 38 +++++++++++++++++++++-----------
 cpp/src/arrow/filesystem/azurefs.h       | 16 ++++++++------
 cpp/src/arrow/filesystem/azurefs_test.cc | 34 +++++++++++++++++++++++-----
 3 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 26c2761886..21350a4904 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -58,7 +58,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
                       blob_storage_scheme == other.blob_storage_scheme &&
                       dfs_storage_scheme == other.dfs_storage_scheme &&
                       default_metadata == other.default_metadata &&
-                      account_name_ == other.account_name_ &&
+                      account_name == other.account_name &&
                       credential_kind_ == other.credential_kind_;
   if (!equals) {
     return false;
@@ -104,17 +104,17 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
   return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
-Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
-                                                   const std::string& account_key) {
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
-  account_name_ = account_name;
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_name,
-                                                     const std::string& tenant_id,
+Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
                                                      const std::string& client_id,
                                                      const std::string& client_secret) {
   credential_kind_ = CredentialKind::kTokenCredential;
@@ -123,14 +123,20 @@ Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+Status AzureOptions::ConfigureDefaultCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureWorkloadIdentityCredential(
-    const std::string& account_name) {
+Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ =
+      std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureWorkloadIdentityCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
   return Status::OK();
@@ -138,14 +144,17 @@ Status AzureOptions::ConfigureWorkloadIdentityCredential(
 
 Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
     const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
@@ -153,15 +162,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
 
 Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
 AzureOptions::MakeDataLakeServiceClient() const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), token_credential_);
+          AccountDfsUrl(account_name), token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), storage_shared_key_credential_);
+          AccountDfsUrl(account_name), storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
 }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 346dd349e9..78e0a8148c 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -48,6 +48,9 @@ class TestAzureFileSystem;
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
+  /// \brief account name of the Azure Storage account.
+  std::string account_name;
+
   /// \brief hostname[:port] of the Azure Blob Storage Service.
   ///
   /// If the hostname is a relative domain name (one that starts with a '.'), then storage
@@ -94,7 +97,6 @@ struct ARROW_EXPORT AzureOptions {
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
-  std::string account_name_;
   std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
@@ -103,15 +105,15 @@ struct ARROW_EXPORT AzureOptions {
   AzureOptions();
   ~AzureOptions();
 
-  Status ConfigureDefaultCredential(const std::string& account_name);
+  Status ConfigureDefaultCredential();
+
+  Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
 
-  Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
+  Status ConfigureWorkloadIdentityCredential();
 
-  Status ConfigureAccountKeyCredential(const std::string& account_name,
-                                       const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_key);
 
-  Status ConfigureClientSecretCredential(const std::string& account_name,
-                                         const std::string& tenant_id,
+  Status ConfigureClientSecretCredential(const std::string& tenant_id,
                                          const std::string& client_id,
                                          const std::string& client_secret);
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 62c5ef2232..f6af9f722d 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -271,22 +271,44 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
   bool WithHierarchicalNamespace() const final { return true; }
 };
 
+TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
+  AzureOptions options;
+  ASSERT_RAISES(Invalid, options.ConfigureAccountKeyCredential("account_key"));
+
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+  ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureClientSecretCredential(
-      "dummy-account-name", "tenant_id", "client_id", "client_secret"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
+TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+  EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -383,6 +405,7 @@ class TestAzureFileSystem : public ::testing::Test {
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
+    options.account_name = env->account_name();
     switch (env->backend()) {
       case AzureBackend::kAzurite:
         options.blob_storage_authority = "127.0.0.1:10000";
@@ -394,8 +417,7 @@ class TestAzureFileSystem : public ::testing::Test {
         // Use the default values
         break;
     }
-    ARROW_EXPECT_OK(
-        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential(env->account_key()));
     return options;
   }