You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fe...@apache.org on 2023/12/23 15:03:55 UTC
(arrow) branch main updated: GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)
This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d519544158 GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)
d519544158 is described below
commit d51954415882423584f2a95b0897aa4d073a4e1c
Author: Thomas Newton <th...@gmail.com>
AuthorDate: Sat Dec 23 15:03:47 2023 +0000
GH-39320: [C++][FS][Azure] Add managed identity auth configuration (#39321)
### Rationale for this change
Workload identity is a useful Azure authentication method. Also I failed to set the account_name correctly for a bunch of auths (I think this got lost in a rebase then I copy pasted the broken code).
### What changes are included in this PR?
- Make filesystem initialisation fail if `account_name_.empty()`. This prevents the account name configuration bug we had. Also added a test asserting that filesystem initialization fails in this case.
- Remove account name configuration on all auth configs, in favour of setting in separately from the auth configuration.
- Implement `AzureOptions::ConfigureManagedIdentityCredential`
### Are these changes tested?
Added a simple test initialising a filesystem using `ConfigureManagedIdentityCredential`. This is not the most comprehensive test but its the same as what we agreed on for https://github.com/apache/arrow/pull/39263.
### Are there any user-facing changes?
Managed identity authentication is now supported.
* Closes: #39320
Authored-by: Thomas Newton <th...@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <fe...@gmail.com>
---
cpp/src/arrow/filesystem/azurefs.cc | 38 +++++++++++++++++++++-----------
cpp/src/arrow/filesystem/azurefs.h | 16 ++++++++------
cpp/src/arrow/filesystem/azurefs_test.cc | 34 +++++++++++++++++++++++-----
3 files changed, 62 insertions(+), 26 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 26c2761886..21350a4904 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -58,7 +58,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
blob_storage_scheme == other.blob_storage_scheme &&
dfs_storage_scheme == other.dfs_storage_scheme &&
default_metadata == other.default_metadata &&
- account_name_ == other.account_name_ &&
+ account_name == other.account_name &&
credential_kind_ == other.credential_kind_;
if (!equals) {
return false;
@@ -104,17 +104,17 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}
-Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
- const std::string& account_key) {
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
- account_name_ = account_name;
+ if (account_name.empty()) {
+ return Status::Invalid("AzureOptions doesn't contain a valid account name");
+ }
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
return Status::OK();
}
-Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_name,
- const std::string& tenant_id,
+Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret) {
credential_kind_ = CredentialKind::kTokenCredential;
@@ -123,14 +123,20 @@ Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_
return Status::OK();
}
-Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+Status AzureOptions::ConfigureDefaultCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}
-Status AzureOptions::ConfigureWorkloadIdentityCredential(
- const std::string& account_name) {
+Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
+ credential_kind_ = CredentialKind::kTokenCredential;
+ token_credential_ =
+ std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
+ return Status::OK();
+}
+
+Status AzureOptions::ConfigureWorkloadIdentityCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
return Status::OK();
@@ -138,14 +144,17 @@ Status AzureOptions::ConfigureWorkloadIdentityCredential(
Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
const {
+ if (account_name.empty()) {
+ return Status::Invalid("AzureOptions doesn't contain a valid account name");
+ }
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
- return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+ return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
- return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+ return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
@@ -153,15 +162,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
AzureOptions::MakeDataLakeServiceClient() const {
+ if (account_name.empty()) {
+ return Status::Invalid("AzureOptions doesn't contain a valid account name");
+ }
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
- AccountDfsUrl(account_name_), token_credential_);
+ AccountDfsUrl(account_name), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
- AccountDfsUrl(account_name_), storage_shared_key_credential_);
+ AccountDfsUrl(account_name), storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
}
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 346dd349e9..78e0a8148c 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -48,6 +48,9 @@ class TestAzureFileSystem;
/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
+ /// \brief account name of the Azure Storage account.
+ std::string account_name;
+
/// \brief hostname[:port] of the Azure Blob Storage Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
@@ -94,7 +97,6 @@ struct ARROW_EXPORT AzureOptions {
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;
- std::string account_name_;
std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;
@@ -103,15 +105,15 @@ struct ARROW_EXPORT AzureOptions {
AzureOptions();
~AzureOptions();
- Status ConfigureDefaultCredential(const std::string& account_name);
+ Status ConfigureDefaultCredential();
+
+ Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
- Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
+ Status ConfigureWorkloadIdentityCredential();
- Status ConfigureAccountKeyCredential(const std::string& account_name,
- const std::string& account_key);
+ Status ConfigureAccountKeyCredential(const std::string& account_key);
- Status ConfigureClientSecretCredential(const std::string& account_name,
- const std::string& tenant_id,
+ Status ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret);
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 62c5ef2232..f6af9f722d 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -271,22 +271,44 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
bool WithHierarchicalNamespace() const final { return true; }
};
+TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
+ AzureOptions options;
+ ASSERT_RAISES(Invalid, options.ConfigureAccountKeyCredential("account_key"));
+
+ ARROW_EXPECT_OK(
+ options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+ ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
+}
+
TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
AzureOptions options;
- ARROW_EXPECT_OK(options.ConfigureClientSecretCredential(
- "dummy-account-name", "tenant_id", "client_id", "client_secret"));
+ options.account_name = "dummy-account-name";
+ ARROW_EXPECT_OK(
+ options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
- ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
+ options.account_name = "dummy-account-name";
+ ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
+TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
+ AzureOptions options;
+ options.account_name = "dummy-account-name";
+ ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
+ EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+
+ ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+ EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
+}
+
TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
AzureOptions options;
- ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
+ options.account_name = "dummy-account-name";
+ ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -383,6 +405,7 @@ class TestAzureFileSystem : public ::testing::Test {
static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
AzureOptions options;
+ options.account_name = env->account_name();
switch (env->backend()) {
case AzureBackend::kAzurite:
options.blob_storage_authority = "127.0.0.1:10000";
@@ -394,8 +417,7 @@ class TestAzureFileSystem : public ::testing::Test {
// Use the default values
break;
}
- ARROW_EXPECT_OK(
- options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+ ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential(env->account_key()));
return options;
}