You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fe...@apache.org on 2023/12/20 18:28:38 UTC
(arrow) branch main updated: GH-39322: [C++] Forward arguments to ExceptionToStatus all the way to Status::FromArgs (#39323)
This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 91b2243e27 GH-39322: [C++] Forward arguments to ExceptionToStatus all the way to Status::FromArgs (#39323)
91b2243e27 is described below
commit 91b2243e2753bb1a4ccd645dd41d74b1d0b077c0
Author: Felipe Oliveira Carvalho <fe...@gmail.com>
AuthorDate: Wed Dec 20 15:28:29 2023 -0300
GH-39322: [C++] Forward arguments to ExceptionToStatus all the way to Status::FromArgs (#39323)
### Rationale for this change
This simplifies the creation of long error messages and leads to the use of a string builder to construct the error message.
### What changes are included in this PR?
- std::forward in ExceptionToStatus
- A few nitpicky changes
- Simplification of the error message text
- Moving the signature of `CheckIfHierarchicalNamespaceIsEnabled` to `azurefs_internal.h` to reduce the size of `azurefs.h` -- implementation remains in `azurefs.cc`
### Are these changes tested?
Yes. By existing tests.
* Closes: #39322
Authored-by: Felipe Oliveira Carvalho <fe...@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <fe...@gmail.com>
---
cpp/src/arrow/filesystem/azurefs.cc | 195 +++++++++++++---------------
cpp/src/arrow/filesystem/azurefs.h | 48 -------
cpp/src/arrow/filesystem/azurefs_internal.h | 78 +++++++++++
cpp/src/arrow/filesystem/azurefs_test.cc | 13 +-
4 files changed, 177 insertions(+), 157 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 032cd034e7..a9795e40a6 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -16,6 +16,7 @@
// under the License.
#include "arrow/filesystem/azurefs.h"
+#include "arrow/filesystem/azurefs_internal.h"
#include <azure/identity.hpp>
#include <azure/storage/blobs.hpp>
@@ -41,7 +42,7 @@ namespace DataLake = Azure::Storage::Files::DataLake;
namespace Http = Azure::Core::Http;
namespace Storage = Azure::Storage;
-using internal::HNSSupport;
+using HNSSupport = internal::HierarchicalNamespaceSupport;
// -----------------------------------------------------------------------
// AzureOptions Implementation
@@ -217,9 +218,11 @@ struct AzureLocation {
}
};
-Status ExceptionToStatus(const std::string& prefix,
- const Azure::Storage::StorageException& exception) {
- return Status::IOError(prefix, " Azure Error: ", exception.what());
+template <typename... PrefixArgs>
+Status ExceptionToStatus(const Storage::StorageException& exception,
+ PrefixArgs&&... prefix_args) {
+ return Status::IOError(std::forward<PrefixArgs>(prefix_args)...,
+ " Azure Error: ", exception.what());
}
Status PathNotFound(const AzureLocation& location) {
@@ -418,6 +421,15 @@ std::shared_ptr<const KeyValueMetadata> PropertiesToMetadata(
return metadata;
}
+Storage::Metadata ArrowMetadataToAzureMetadata(
+ const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
+ Storage::Metadata azure_metadata;
+ for (auto key_value : arrow_metadata->sorted_pairs()) {
+ azure_metadata[key_value.first] = key_value.second;
+ }
+ return azure_metadata;
+}
+
class ObjectInputFile final : public io::RandomAccessFile {
public:
ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
@@ -443,9 +455,8 @@ class ObjectInputFile final : public io::RandomAccessFile {
return PathNotFound(location_);
}
return ExceptionToStatus(
- "GetProperties failed for '" + blob_client_->GetUrl() +
- "'. Cannot initialise an ObjectInputFile without knowing the file size.",
- exception);
+ exception, "GetProperties failed for '", blob_client_->GetUrl(),
+ "'. Cannot initialise an ObjectInputFile without knowing the file size.");
}
}
@@ -523,10 +534,9 @@ class ObjectInputFile final : public io::RandomAccessFile {
.Value.ContentRange.Length.Value();
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "DownloadTo from '" + blob_client_->GetUrl() + "' at position " +
- std::to_string(position) + " for " + std::to_string(nbytes) +
- " bytes failed. ReadAt failed to read the required byte range.",
- exception);
+ exception, "DownloadTo from '", blob_client_->GetUrl(), "' at position ",
+ position, " for ", nbytes,
+ " bytes failed. ReadAt failed to read the required byte range.");
}
}
@@ -571,15 +581,14 @@ class ObjectInputFile final : public io::RandomAccessFile {
std::shared_ptr<const KeyValueMetadata> metadata_;
};
-Status CreateEmptyBlockBlob(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client) {
+Status CreateEmptyBlockBlob(const Blobs::BlockBlobClient& block_blob_client) {
try {
- block_blob_client->UploadFrom(nullptr, 0);
+ block_blob_client.UploadFrom(nullptr, 0);
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "UploadFrom failed for '" + block_blob_client->GetUrl() +
- "'. There is no existing blob at this location or the existing blob must be "
- "replaced so ObjectAppendStream must create a new empty block blob.",
- exception);
+ exception, "UploadFrom failed for '", block_blob_client.GetUrl(),
+ "'. There is no existing blob at this location or the existing blob must be "
+ "replaced so ObjectAppendStream must create a new empty block blob.");
}
return Status::OK();
}
@@ -590,19 +599,9 @@ Result<Blobs::Models::GetBlockListResult> GetBlockList(
return block_blob_client->GetBlockList().Value;
} catch (Storage::StorageException& exception) {
return ExceptionToStatus(
- "GetBlockList failed for '" + block_blob_client->GetUrl() +
- "'. Cannot write to a file without first fetching the existing block list.",
- exception);
- }
-}
-
-Storage::Metadata ArrowMetadataToAzureMetadata(
- const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
- Storage::Metadata azure_metadata;
- for (auto key_value : arrow_metadata->sorted_pairs()) {
- azure_metadata[key_value.first] = key_value.second;
+ exception, "GetBlockList failed for '", block_blob_client->GetUrl(),
+ "'. Cannot write to a file without first fetching the existing block list.");
}
- return azure_metadata;
}
Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_blob_client,
@@ -618,9 +617,8 @@ Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_bl
block_blob_client->CommitBlockList(block_ids, options);
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "CommitBlockList failed for '" + block_blob_client->GetUrl() +
- "'. Committing is required to flush an output/append stream.",
- exception);
+ exception, "CommitBlockList failed for '", block_blob_client->GetUrl(),
+ "'. Committing is required to flush an output/append stream.");
}
return Status::OK();
}
@@ -659,13 +657,12 @@ class ObjectAppendStream final : public io::OutputStream {
pos_ = content_length_;
} catch (const Storage::StorageException& exception) {
if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
- RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_));
+ RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client_));
} else {
return ExceptionToStatus(
- "GetProperties failed for '" + block_blob_client_->GetUrl() +
- "'. Cannot initialise an ObjectAppendStream without knowing whether a "
- "file already exists at this path, and if it exists, its size.",
- exception);
+ exception, "GetProperties failed for '", block_blob_client_->GetUrl(),
+ "'. Cannot initialise an ObjectAppendStream without knowing whether a "
+ "file already exists at this path, and if it exists, its size.");
}
content_length_ = 0;
}
@@ -760,10 +757,9 @@ class ObjectAppendStream final : public io::OutputStream {
block_blob_client_->StageBlock(new_block_id, block_content);
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" +
- new_block_id +
- "'. Staging new blocks is fundamental to streaming writes to blob storage.",
- exception);
+ exception, "StageBlock failed for '", block_blob_client_->GetUrl(),
+ "' new_block_id: '", new_block_id,
+ "'. Staging new blocks is fundamental to streaming writes to blob storage.");
}
block_ids_.push_back(new_block_id);
pos_ += nbytes;
@@ -835,9 +831,9 @@ Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
if (exception.ErrorCode == "HierarchicalNamespaceNotEnabled") {
return HNSSupport::kDisabled;
}
- return ExceptionToStatus("Check for Hierarchical Namespace support on '" +
- adlfs_client.GetUrl() + "' failed.",
- exception);
+ return ExceptionToStatus(exception,
+ "Check for Hierarchical Namespace support on '",
+ adlfs_client.GetUrl(), "' failed.");
}
}
}
@@ -855,6 +851,8 @@ namespace {
// "filesystem". Creating a container using the Blob Storage API will make it
// accessible using the Data Lake Storage Gen 2 API and vice versa.
+const char kDelimiter[] = {internal::kSep, '\0'};
+
template <class ContainerClient>
Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
ContainerClient& container_client) {
@@ -869,8 +867,8 @@ Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
info.set_type(FileType::NotFound);
return info;
}
- return ExceptionToStatus(
- "GetProperties for '" + container_client.GetUrl() + "' failed.", exception);
+ return ExceptionToStatus(exception, "GetProperties for '", container_client.GetUrl(),
+ "' failed.");
}
}
@@ -1011,16 +1009,14 @@ class AzureFileSystem::Impl {
return info;
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "ListBlobs failed for prefix='" + *list_blob_options.Prefix +
- "' failed. GetFileInfo is unable to determine whether the path should "
- "be considered an implied directory.",
- exception);
+ exception, "ListBlobs failed for prefix='", *list_blob_options.Prefix,
+ "' failed. GetFileInfo is unable to determine whether the path should "
+ "be considered an implied directory.");
}
}
return ExceptionToStatus(
- "GetProperties failed for '" + file_client.GetUrl() +
- "' GetFileInfo is unable to determine whether the path exists.",
- exception);
+ exception, "GetProperties failed for '", file_client.GetUrl(),
+ "' GetFileInfo is unable to determine whether the path exists.");
}
}
@@ -1038,7 +1034,7 @@ class AzureFileSystem::Impl {
}
}
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to list account containers.", exception);
+ return ExceptionToStatus(exception, "Failed to list account containers.");
}
return Status::OK();
}
@@ -1153,9 +1149,9 @@ class AzureFileSystem::Impl {
if (IsContainerNotFound(exception)) {
found = false;
} else {
- return ExceptionToStatus("Failed to list blobs in a directory: " +
- select.base_dir + ": " + container_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to list blobs in a directory: ", select.base_dir,
+ ": ", container_client.GetUrl());
}
}
@@ -1241,7 +1237,7 @@ class AzureFileSystem::Impl {
Status CreateDir(const AzureLocation& location) {
if (location.container.empty()) {
- return Status::Invalid("Cannot create an empty container");
+ return Status::Invalid("CreateDir requires a non-empty path.");
}
auto container_client =
@@ -1249,17 +1245,13 @@ class AzureFileSystem::Impl {
if (location.path.empty()) {
try {
auto response = container_client.Create();
- if (response.Value.Created) {
- return Status::OK();
- } else {
- return StatusFromErrorResponse(
- container_client.GetUrl(), *response.RawResponse,
- "Failed to create a container: " + location.container);
- }
+ return response.Value.Created
+ ? Status::OK()
+ : Status::AlreadyExists("Directory already exists: " + location.all);
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to create a container: " + location.container +
- ": " + container_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to create a container: ", location.container,
+ ": ", container_client.GetUrl());
}
}
@@ -1291,15 +1283,14 @@ class AzureFileSystem::Impl {
"Failed to create a directory: " + location.path);
}
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to create a directory: " + location.path + ": " +
- directory_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception, "Failed to create a directory: ", location.path,
+ ": ", directory_client.GetUrl());
}
}
Status CreateDirRecursive(const AzureLocation& location) {
if (location.container.empty()) {
- return Status::Invalid("Cannot create an empty container");
+ return Status::Invalid("CreateDir requires a non-empty path.");
}
auto container_client =
@@ -1307,9 +1298,9 @@ class AzureFileSystem::Impl {
try {
container_client.CreateIfNotExists();
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to create a container: " + location.container +
- " (" + container_client.GetUrl() + ")",
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to create a container: ", location.container, " (",
+ container_client.GetUrl(), ")");
}
auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
@@ -1328,9 +1319,9 @@ class AzureFileSystem::Impl {
try {
directory_client.CreateIfNotExists();
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to create a directory: " + location.path + " (" +
- directory_client.GetUrl() + ")",
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to create a directory: ", location.path, " (",
+ directory_client.GetUrl(), ")");
}
}
@@ -1349,7 +1340,7 @@ class AzureFileSystem::Impl {
std::shared_ptr<ObjectAppendStream> stream;
if (truncate) {
- RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client));
+ RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client));
stream = std::make_shared<ObjectAppendStream>(block_blob_client, fs->io_context(),
location, metadata, options_, 0);
} else {
@@ -1393,9 +1384,8 @@ class AzureFileSystem::Impl {
try {
container_client.SubmitBatch(batch);
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to delete blobs in a directory: " +
- location.path + ": " + container_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception, "Failed to delete blobs in a directory: ",
+ location.path, ": ", container_client.GetUrl());
}
std::vector<std::string> failed_blob_names;
for (size_t i = 0; i < deferred_responses.size(); ++i) {
@@ -1424,9 +1414,9 @@ class AzureFileSystem::Impl {
}
}
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to list blobs in a directory: " + location.path +
- ": " + container_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to list blobs in a directory: ", location.path,
+ ": ", container_client.GetUrl());
}
return Status::OK();
}
@@ -1434,7 +1424,7 @@ class AzureFileSystem::Impl {
public:
Status DeleteDir(const AzureLocation& location) {
if (location.container.empty()) {
- return Status::Invalid("Cannot delete an empty container");
+ return Status::Invalid("DeleteDir requires a non-empty path.");
}
auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
@@ -1456,9 +1446,9 @@ class AzureFileSystem::Impl {
"Failed to delete a container: " + location.container);
}
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to delete a container: " + location.container +
- ": " + container_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to delete a container: ", location.container,
+ ": ", container_client.GetUrl());
}
}
@@ -1474,9 +1464,9 @@ class AzureFileSystem::Impl {
"Failed to delete a directory: " + location.path);
}
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " +
- directory_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to delete a directory: ", location.path, ": ",
+ directory_client.GetUrl());
}
} else {
return DeleteDirContentsWithoutHierarchicalNamespace(location,
@@ -1507,9 +1497,8 @@ class AzureFileSystem::Impl {
sub_directory_client.DeleteRecursive();
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "Failed to delete a sub directory: " + location.container +
- internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(),
- exception);
+ exception, "Failed to delete a sub directory: ", location.container,
+ kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
}
} else {
auto sub_file_client = adlfs_client.GetFileClient(path.Name);
@@ -1517,9 +1506,8 @@ class AzureFileSystem::Impl {
sub_file_client.Delete();
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(
- "Failed to delete a sub file: " + location.container +
- internal::kSep + path.Name + ": " + sub_file_client.GetUrl(),
- exception);
+ exception, "Failed to delete a sub file: ", location.container,
+ kDelimiter, path.Name, ": ", sub_file_client.GetUrl());
}
}
}
@@ -1528,9 +1516,9 @@ class AzureFileSystem::Impl {
if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) {
return Status::OK();
} else {
- return ExceptionToStatus("Failed to delete directory contents: " +
- location.path + ": " + directory_client.GetUrl(),
- exception);
+ return ExceptionToStatus(exception,
+ "Failed to delete directory contents: ", location.path,
+ ": ", directory_client.GetUrl());
}
}
return Status::OK();
@@ -1553,9 +1541,8 @@ class AzureFileSystem::Impl {
try {
dest_blob_client.CopyFromUri(src_url);
} catch (const Storage::StorageException& exception) {
- return ExceptionToStatus(
- "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")",
- exception);
+ return ExceptionToStatus(exception, "Failed to copy a blob. (", src_url, " -> ",
+ dest_blob_client.GetUrl(), ")");
}
return Status::OK();
}
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index b7ef2bb313..0c41c42928 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -118,54 +118,6 @@ struct ARROW_EXPORT AzureOptions {
MakeDataLakeServiceClient() const;
};
-namespace internal {
-
-enum class HNSSupport {
- kUnknown = 0,
- kContainerNotFound = 1,
- kDisabled = 2,
- kEnabled = 3,
-};
-
-/// \brief Performs a request to check if the storage account has Hierarchical
-/// Namespace support enabled.
-///
-/// This check requires a DataLakeFileSystemClient for any container of the
-/// storage account. If the container doesn't exist yet, we just forward that
-/// error to the caller (kContainerNotFound) since that's a proper error to the operation
-/// on that container anyways -- no need to try again with or without the knowledge of
-/// Hierarchical Namespace support.
-///
-/// Hierarchical Namespace support can't easily be changed after the storage account is
-/// created and the feature is shared by all containers in the storage account.
-/// This means the result of this check can (and should!) be cached as soon as
-/// it returns a successful result on any container of the storage account (see
-/// AzureFileSystem::Impl).
-///
-/// The check consists of a call to DataLakeFileSystemClient::GetAccessControlList()
-/// on the root directory of the container. An approach taken by the Hadoop Azure
-/// project [1]. A more obvious approach would be to call
-/// BlobServiceClient::GetAccountInfo(), but that endpoint requires elevated
-/// permissions [2] that we can't generally rely on.
-///
-/// [1]:
-/// https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
-/// [2]:
-/// https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
-///
-/// IMPORTANT: If the result is kEnabled or kDisabled, it doesn't necessarily mean that
-/// the container exists.
-///
-/// \param adlfs_client A DataLakeFileSystemClient for a container of the storage
-/// account.
-/// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
-/// returned).
-Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
- Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
- const AzureOptions& options);
-
-} // namespace internal
-
/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
///
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h
new file mode 100644
index 0000000000..13d84c9b54
--- /dev/null
+++ b/cpp/src/arrow/filesystem/azurefs_internal.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/result.h"
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeFileSystemClient;
+class DataLakeServiceClient;
+} // namespace Azure::Storage::Files::DataLake
+
+namespace arrow::fs {
+
+struct AzureOptions;
+
+namespace internal {
+
+enum class HierarchicalNamespaceSupport {
+ kUnknown = 0,
+ kContainerNotFound = 1,
+ kDisabled = 2,
+ kEnabled = 3,
+};
+
+/// \brief Performs a request to check if the storage account has Hierarchical
+/// Namespace support enabled.
+///
+/// This check requires a DataLakeFileSystemClient for any container of the
+/// storage account. If the container doesn't exist yet, we just forward that
+/// error to the caller (kContainerNotFound) since that's a proper error to the operation
+/// on that container anyways -- no need to try again with or without the knowledge of
+/// Hierarchical Namespace support.
+///
+/// Hierarchical Namespace support can't easily be changed after the storage account is
+/// created and the feature is shared by all containers in the storage account.
+/// This means the result of this check can (and should!) be cached as soon as
+/// it returns a successful result on any container of the storage account (see
+/// AzureFileSystem::Impl).
+///
+/// The check consists of a call to DataLakeFileSystemClient::GetAccessControlList()
+/// on the root directory of the container. An approach taken by the Hadoop Azure
+/// project [1]. A more obvious approach would be to call
+/// BlobServiceClient::GetAccountInfo(), but that endpoint requires elevated
+/// permissions [2] that we can't generally rely on.
+///
+/// [1]:
+/// https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
+/// [2]:
+/// https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
+///
+/// IMPORTANT: If the result is kEnabled or kDisabled, it doesn't necessarily mean that
+/// the container exists.
+///
+/// \param adlfs_client A DataLakeFileSystemClient for a container of the storage
+/// account.
+/// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
+/// returned).
+Result<HierarchicalNamespaceSupport> CheckIfHierarchicalNamespaceIsEnabled(
+ Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
+ const arrow::fs::AzureOptions& options);
+
+} // namespace internal
+} // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index db0e133e0d..53e71f3658 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -34,6 +34,7 @@
#include <boost/process.hpp>
#include "arrow/filesystem/azurefs.h"
+#include "arrow/filesystem/azurefs_internal.h"
#include <memory>
#include <random>
@@ -72,6 +73,8 @@ namespace Blobs = Azure::Storage::Blobs;
namespace Core = Azure::Core;
namespace DataLake = Azure::Storage::Files::DataLake;
+using HNSSupport = internal::HierarchicalNamespaceSupport;
+
enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
@@ -629,9 +632,9 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite)
ASSERT_OK_AND_ASSIGN(auto hns_support, internal::CheckIfHierarchicalNamespaceIsEnabled(
adlfs_client, options_));
if (env->WithHierarchicalNamespace()) {
- ASSERT_EQ(hns_support, internal::HNSSupport::kEnabled);
+ ASSERT_EQ(hns_support, HNSSupport::kEnabled);
} else {
- ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+ ASSERT_EQ(hns_support, HNSSupport::kDisabled);
}
}
@@ -643,13 +646,13 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespaceOnMissingContainer() {
EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
switch (env->backend()) {
case AzureBackend::kAzurite:
- ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+ ASSERT_EQ(hns_support, HNSSupport::kDisabled);
break;
case AzureBackend::kAzure:
if (env->WithHierarchicalNamespace()) {
- ASSERT_EQ(hns_support, internal::HNSSupport::kContainerNotFound);
+ ASSERT_EQ(hns_support, HNSSupport::kContainerNotFound);
} else {
- ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+ ASSERT_EQ(hns_support, HNSSupport::kDisabled);
}
break;
}