You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "felipecrv (via GitHub)" <gi...@apache.org> on 2023/12/13 02:09:29 UTC

[PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

felipecrv opened a new pull request, #39207:
URL: https://github.com/apache/arrow/pull/39207

   ### Rationale for this change
   
   This PR contains some unrelated improvements (like better docs) and some nitpicky fixes. The test refactoring makes it easier to see on which environments tests run and make them able to be instantiated with different options in the future once we extend the `AzureOptions`.
   
   ### What changes are included in this PR?
   
    - Random cleanups
    - Refactoring of the tests (multiple environments, TYPED_TEST_SUITE, explicit preexisting data setup)
    - Cleanup of the `AzureOptions` class
   
   ### Are these changes tested?
   
   Yes. I created Azure Storage accounts to test with and without Hierarchical Namespace support. I also ran the tests in a shell without my environment variables to ensure the test-skipping behavior is correct.
   
   ### Are there any user-facing changes?
   
   Changes to `AzureOptions`, but since `AzureFileSystem` is not really used yet, these breaking changes won't be a problem.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424772483


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -194,49 +265,90 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
+auto const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
 class AzureFileSystemTest : public ::testing::Test {
- public:
+ protected:
+  // Set in constructor
+  std::mt19937_64 generator_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
   std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
+
+  // Set in SetUpPreexistingData()
   std::string container_name_;
-  bool suite_skipped_ = false;
 
+ public:
   AzureFileSystemTest() : generator_(std::random_device()()) {}
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
+
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (!options_res.ok() && options_res.status().IsCancelled()) {
+      GTEST_SKIP() << options_res.status().message();
     } else {
-      suite_skipped_ = true;
-      GTEST_SKIP() << options.status().message();
+      EXPECT_OK_AND_ASSIGN(options_, std::move(options_res));
     }

Review Comment:
   @kou I'm explicitly checking that the `Status::IsCancelled()` to make sure other failures don't make the tests be skipped quietly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests and filesystem class instantiation [arrow]

Posted by "conbench-apache-arrow[bot] (via GitHub)" <gi...@apache.org>.
conbench-apache-arrow[bot] commented on PR #39207:
URL: https://github.com/apache/arrow/pull/39207#issuecomment-1857196874

   After merging your PR, Conbench analyzed the 6 benchmarking runs that have been run so far on merge-commit 4e58f7ca0016c2b2d8a859a0c5965df3b15523e0.
   
   There was 1 benchmark result indicating a performance regression:
   
   - Commit Run on `ursa-i9-9960x` at [2023-12-14 19:15:38Z](https://conbench.ursa.dev/compare/runs/7c4744cba092451e80559841e1f14dcf...fdbf90617722448fb926eb7da99b3194/)
     - [`csv-read` (Python) with compression=uncompressed, dataset=nyctaxi_2010-01, output_format=arrow_table, streaming=file](https://conbench.ursa.dev/compare/benchmarks/0657b18eedc371fb80003c9bf0c86457...0657b547bdfd71698000577a7e2d669b)
   
   The [full Conbench report](https://github.com/apache/arrow/runs/19665245558) has more details. It also includes information about 2 possible false positives for unstable benchmarks that are known to sometimes produce them.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424771319


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -158,25 +215,39 @@ class AzuriteEnv : public ::testing::Environment {
     std::cerr << std::endl;
     return Status::OK();
   }
+};
 
-  const std::string& account_name() const { return account_name_; }
-  const std::string& account_key() const { return account_key_; }
-  const Status status() const { return status_; }
-
+class AzureFlatNSEnv : public AzureEnvImpl<AzureFlatNSEnv> {
  private:
-  std::string account_name_;
-  std::string account_key_;
-  bp::child server_process_;
-  Status status_;

Review Comment:
   No more awkward `status_` being set on ctor now that `Make` is used to construct the environments.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425804698


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;

Review Comment:
   Haha. Yes. I didn't want to do it in this PR, but I will end up using pgc32 here :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426746367


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   > Now that I have the environments, I think I can remove this option completely. 🤔
   
   Do you plan to do that here or in a separate PR?



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   > Now that I have the environments, I think I can remove this option completely. 🤔
   
   Do you plan to do that here or in a separate PR?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425797597


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;

Review Comment:
   It's a, perhaps over-defensive, habit I have: declaring a dtor and marking it with `override` guarantees that the root of the class-hierarchy has a virtual destructor.
   
   It's also a good habit if you care about binary size in the codebase (not the case here): because you can have `~Klass() override;` in the header and make the dtor non-inline.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on PR #39207:
URL: https://github.com/apache/arrow/pull/39207#issuecomment-1856229349

   The CI failures look unexpected, can you perhaps rebase on main?
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426942886


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;

Review Comment:
   It's a "don't think, just do it" rule, but I will remove all the dtors here to cleanup.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424774501


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -889,45 +1096,6 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) {
   }
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) {

Review Comment:
   This is scary to look at, but I just re-ordered the tests: grouping them by environment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425877985


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();

Review Comment:
   Yes! I noticed the weird names, but I was unsure what was the conventions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425914623


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();
+  void GetFileInfoObjectTest();
+  void GetFileInfoObjectWithNestedStructureTest();
+
+  void DeleteDirSuccessEmptyTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path =
+        internal::ConcatAbstractPath(data.container_name, data.RandomDirectoryName(rng_));
+
+    if (WithHierarchicalNamespace()) {
+      ASSERT_OK(fs_->CreateDir(directory_path, true));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() and DeleteDir() do nothing.
+      ASSERT_OK(fs_->CreateDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
     }
   }
 
-  int64_t debug_log_start_ = 0;
-};
+  void CreateDirSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->CreateDir(path, false));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+    }
+  }
 
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerOnlyTest() {
+    auto container_name = PreexistingData::RandomContainerName(rng_);
+    ASSERT_OK(fs_->CreateDir(container_name, true));
+    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  }
+
+  void CreateDirRecursiveSuccessDirectoryOnlyTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real flat namespace "
-        "account.");
   }
-};
 
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real hierarchical namespace "
-        "account.");
   }
-};
 
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsSuccessNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsFailureNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+  }
+};
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void AzureFileSystemTest::DetectHierarchicalNamespaceTest() {
+  // Check the environments are implemented and injected here correctly.
+  auto expected = WithHierarchicalNamespace();
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto data = SetUpPreexistingData();
   auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
   ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+  ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
-  AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void AzureFileSystemTest::GetFileInfoObjectTest() {
+  auto data = SetUpPreexistingData();
+  auto object_properties =
+      blob_service_client_->GetBlobContainerClient(data.container_name)
+          .GetBlobClient(data.kObjectName)
+          .GetProperties()
+          .Value;
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+                 std::chrono::system_clock::time_point{object_properties.LastModified},
+                 static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName()));
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() {
+  auto data = SetUpPreexistingData();
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
   constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
   ASSERT_OK_AND_ASSIGN(
       auto output,
-      fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{}));
-  const std::string_view data(kLoremIpsum);
-  ASSERT_OK(output->Write(data));
+      fs_->OpenOutputStream(data.ContainerPath() + kObjectName, /*metadata=*/{}));
+  const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(
-                           PreexistingContainerPath() + "test-object-dir/some_other_dir0",
-                           /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
-  ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0",
-                                    /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
+      output,
+      fs_->OpenOutputStream(data.ContainerPath() + "test-object-dir/some_other_dir0",
+                            /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
+  ASSERT_OK(output->Close());
+  ASSERT_OK_AND_ASSIGN(output,
+                       fs_->OpenOutputStream(data.ContainerPath() + kObjectName + "0",
+                                             /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName, FileType::File);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(),
-                 PreexistingContainerPath() + "test-object-dir/some_other_dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_di",
                  FileType::NotFound);
+
+  if (WithHierarchicalNamespace()) {
+    datalake_service_client_->GetFileSystemClient(data.container_name)
+        .GetDirectoryClient("test-empty-object-dir")
+        .Create();
+
+    AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir",
+                   FileType::Directory);
+  }
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public AzureFileSystemTest {
+ public:
+  using AzureFileSystemTest::AzureFileSystemTest;
+
+  Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You must not enable Hierarchical Namespace on the storage account used for
+// AzureFlatNSFileSystemTest, but you must enable it on the storage account
+// used for AzureHierarchicalNSFileSystemTest.
+//
+// The credentials should be placed in the correct environment variables:
+//
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY
+//
+// [1]: https://azure.microsoft.com/en-gb/free/
+// [2]:
+// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
+using AzureFlatNSFileSystemTest = AzureFileSystemTestImpl<AzureFlatNSEnv>;
+using AzureHierarchicalNSFileSystemTest = AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
+using AzuriteFileSystemTest = AzureFileSystemTestImpl<AzuriteEnv>;
+
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
+
+template <class AzureEnvClass>
+using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+
+using AllEnvironments =
+    ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+
+TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+  this->DetectHierarchicalNamespaceTest();
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
-  datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
-      .GetDirectoryClient("test-empty-object-dir")
-      .Create();
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+  this->GetFileInfoObjectTest();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir",
-                 FileType::Directory);
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+  this->DeleteDirSuccessEmptyTest();
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectTest() {
-  auto object_properties =
-      blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-          .GetBlobClient(PreexistingObjectName())
-          .GetProperties()
-          .Value;
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) {
+  this->GetFileInfoObjectWithNestedStructureTest();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File,
-                 std::chrono::system_clock::time_point(object_properties.LastModified),
-                 static_cast<int64_t>(object_properties.BlobSize));
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) {
+  this->CreateDirSuccessContainerAndDirectoryTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) {
+  this->CreateDirRecursiveSuccessContainerOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) {
+  this->CreateDirRecursiveSuccessDirectoryOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) {
+  this->CreateDirRecursiveSuccessContainerAndDirectoryTest();
+}
+
+// Tests using a real storage account *with Hierarchical Namespace enabled*
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.RandomDirectoryPath(rng_);
+  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) {
+  auto data = SetUpPreexistingData();
+  const auto directory_path = data.RandomDirectoryPath(rng_);
+  const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt");
+  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+  ASSERT_OK(output->Write(std::string_view("hello")));
+  ASSERT_OK(output->Close());
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+  ASSERT_OK(fs_->DeleteDir(directory_path));
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) {
+  auto data = SetUpPreexistingData();
+  const auto parent = data.RandomDirectoryPath(rng_);
+  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+  ASSERT_OK(fs_->CreateDir(path, true));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+  ASSERT_OK(fs_->DeleteDir(parent));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) {
+  auto preexisting_data = SetUpPreexistingData();
+  HierarchicalPaths paths;
+  CreateHierarchicalData(&paths);
+  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+  for (const auto& sub_path : paths.sub_paths) {
+    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+  }
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessNonexistent) {
+  this->DeleteDirContentsSuccessNonexistentTest();
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) {
+  this->DeleteDirContentsFailureNonexistentTest();
+}
+
+// Tests using Azurite (the local Azure emulator)
+
+TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
+  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));

Review Comment:
   `IOError`. Changing.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426946437


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;

Review Comment:
   When I change this, I will change all the filesystem classes that use mt19937. I have a "fs_nitpicks" branch with that already done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425877985


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();

Review Comment:
   Yes! I noticed the weird names, but I was unsure what was the conventions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "github-actions[bot] (via GitHub)" <gi...@apache.org>.
github-actions[bot] commented on PR #39207:
URL: https://github.com/apache/arrow/pull/39207#issuecomment-1853155794

   :warning: GitHub issue #39119 **has been automatically assigned in GitHub** to PR creator.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426881299


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;

Review Comment:
   Same answer :-D



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;

Review Comment:
   Note that even `std::default_engine` can be a better choice.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {

Review Comment:
   Oh, I see, thanks.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;

Review Comment:
   Hmm, I understand it if you're deriving an application-specific class, but you can pretty much count on `testing::Environment` having a virtual destructor. I don't understand the need to verify it explicitly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426754801


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   Or perhaps use `arrow/util/unreachable.h`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424773160


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -299,9 +411,6 @@ class AzureFileSystemTest : public ::testing::Test {
     ASSERT_OK(output->Close());
   }
 
-  void RunGetFileInfoObjectWithNestedStructureTest();
-  void RunGetFileInfoObjectTest();

Review Comment:
   I moved these to the end of the class.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425835890


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;
+
+  static Result<BaseAzureEnv*> GetInstance() {
+    static auto env = MakeAndAddToGlobalTestEnvironment();
+    if (env.ok()) {
+      return env;
     }
-    status_ = Status::OK();
+    return env.status();
   }
 
+  AzureBackend backend() const final { return AzureEnvClass::kBackend; }
+};
+
+class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
+ private:
+  std::unique_ptr<TemporaryDir> temp_dir_;
+  arrow::internal::PlatformFilename debug_log_path_;
+  bp::child server_process_;
+
+  using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+  static const AzureBackend kBackend = AzureBackend::kAzurite;
+
   ~AzuriteEnv() override {
     server_process_.terminate();
     server_process_.wait();
   }
 
-  Result<int64_t> GetDebugLogSize() {
+  static Result<std::unique_ptr<AzureEnvImpl>> Make() {

Review Comment:
   `AzuriteEnv::Make()` is the only factory (of the 3) that doesn't use `MakeFromVars(_, _)`. I'm adding comments that clarify the structure here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425933200


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - line.size() - 1, rng);
+    line += '\n';
+    return line;
+  }
+
+  static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+    return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+  }
+
+  static std::string RandomChars(std::size_t count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+  AzureFileSystemTest() : rng_(std::random_device()()) {}
+
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (!options_res.ok() && options_res.status().IsCancelled()) {
+      GTEST_SKIP() << options_res.status().message();
     } else {
-      suite_skipped_ = true;
-      GTEST_SKIP() << options.status().message();
+      EXPECT_OK_AND_ASSIGN(options_, std::move(options_res));

Review Comment:
   Hmmm. I didn't know `EXPECT_OK_AND_ASSIGN` always moved. I thought it only moved when the `rexpr` was an `&&`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425875126


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - line.size() - 1, rng);
+    line += '\n';
+    return line;
+  }
+
+  static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+    return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+  }
+
+  static std::string RandomChars(std::size_t count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+  AzureFileSystemTest() : rng_(std::random_device()()) {}

Review Comment:
   We want that so that we don't create container names that collide between test runs. Test cleanups try to delete all containers from the storage account, but it's never guaranteed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424772978


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -194,49 +265,90 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
+auto const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
 class AzureFileSystemTest : public ::testing::Test {
- public:
+ protected:
+  // Set in constructor
+  std::mt19937_64 generator_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
   std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
+
+  // Set in SetUpPreexistingData()
   std::string container_name_;
-  bool suite_skipped_ = false;
 
+ public:
   AzureFileSystemTest() : generator_(std::random_device()()) {}
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
+
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (!options_res.ok() && options_res.status().IsCancelled()) {
+      GTEST_SKIP() << options_res.status().message();
     } else {
-      suite_skipped_ = true;
-      GTEST_SKIP() << options.status().message();
+      EXPECT_OK_AND_ASSIGN(options_, std::move(options_res));
     }
-    // Stop-gap solution before GH-39119 is fixed.
-    container_name_ = "z" + RandomChars(31);
+
+    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
     blob_service_client_ = options_.MakeBlobServiceClient();
     datalake_service_client_ = options_.MakeDataLakeServiceClient();
-    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
-    auto container_client = CreateContainer(container_name_);
+    set_up_succeeded_ = true;
 
-    auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName());
-    blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
-                           strlen(kLoremIpsum));
+    SetUpPreexistingData();

Review Comment:
   This gets moved in later commits.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425791869


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   Now that I have the environments, I think I can remove this option completely. 🤔 



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   Now that I have the environments, I think I can remove this option completely. 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425685047


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {

Review Comment:
   Hmm... I don't really want to think about the consequences of using something else than a regular integer type for the enum base type. Why not simply `int`? We're not trying to pack bits here.



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   Do we plan to autodetect the backend at some point? We do that in S3 (the `DetectS3Backend` function).



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;
 
-  std::string sas_token;
-  std::string connection_string;
-  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
-      storage_credentials_provider;
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential>
-      service_principle_credentials_provider;
+  // TODO(GH-38598): Add support for more auth methods.
+  // std::string connection_string;
+  // std::string sas_token;
 
   /// \brief Default metadata for OpenOutputStream.
   ///
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
+ private:
+  std::string account_blob_url_;
+  std::string account_dfs_url_;
+
+  enum class CredentialKind {
+    kAnonymous,
+    kStorageSharedKeyCredential,
+  } credential_kind_ = CredentialKind::kAnonymous;
+
+  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+      storage_shared_key_credential_;
+
+ public:
   AzureOptions();
+  ~AzureOptions();
 
-  Status ConfigureAccountKeyCredentials(const std::string& account_name,
-                                        const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_name,
+                                       const std::string& account_key);
 
   bool Equals(const AzureOptions& other) const;
+
+  const std::string& AccountBlobUrl() const { return account_blob_url_; }
+  const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+
+  std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> MakeBlobServiceClient() const;
+
+  std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
+  MakeDataLakeServiceClient() const;

Review Comment:
   Is it useful to expose these in the public API? We are not rewriting a Azure SDK API...



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;

Review Comment:
   I don't know if this is used to generate large-ish data, but if it is, there are faster random generators available.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));

Review Comment:
   I'm curious: why use `ContainerPath()` above and `ConcatAbstractPath()` here?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -310,10 +420,10 @@ class AzureFileSystemTest : public ::testing::Test {
   };
 
   // Need to use "void" as the return type to use ASSERT_* in this method.
-  void CreateHierarchicalData(HierarchicalPaths& paths) {
-    const auto container_path = RandomContainerName();
-    const auto directory_path =
-        internal::ConcatAbstractPath(container_path, RandomDirectoryName());
+  void CreateHierarchicalData(HierarchicalPaths* paths) {

Review Comment:
   +1



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();
+  void GetFileInfoObjectTest();
+  void GetFileInfoObjectWithNestedStructureTest();
+
+  void DeleteDirSuccessEmptyTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path =
+        internal::ConcatAbstractPath(data.container_name, data.RandomDirectoryName(rng_));
+
+    if (WithHierarchicalNamespace()) {
+      ASSERT_OK(fs_->CreateDir(directory_path, true));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() and DeleteDir() do nothing.
+      ASSERT_OK(fs_->CreateDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
     }
   }
 
-  int64_t debug_log_start_ = 0;
-};
+  void CreateDirSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->CreateDir(path, false));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+    }
+  }
 
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerOnlyTest() {
+    auto container_name = PreexistingData::RandomContainerName(rng_);
+    ASSERT_OK(fs_->CreateDir(container_name, true));
+    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  }
+
+  void CreateDirRecursiveSuccessDirectoryOnlyTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real flat namespace "
-        "account.");
   }
-};
 
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real hierarchical namespace "
-        "account.");
   }
-};
 
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsSuccessNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsFailureNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+  }
+};
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void AzureFileSystemTest::DetectHierarchicalNamespaceTest() {
+  // Check the environments are implemented and injected here correctly.
+  auto expected = WithHierarchicalNamespace();
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto data = SetUpPreexistingData();
   auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
   ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+  ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
-  AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void AzureFileSystemTest::GetFileInfoObjectTest() {
+  auto data = SetUpPreexistingData();
+  auto object_properties =
+      blob_service_client_->GetBlobContainerClient(data.container_name)
+          .GetBlobClient(data.kObjectName)
+          .GetProperties()
+          .Value;
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+                 std::chrono::system_clock::time_point{object_properties.LastModified},
+                 static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName()));
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() {
+  auto data = SetUpPreexistingData();
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
   constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
   ASSERT_OK_AND_ASSIGN(
       auto output,
-      fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{}));
-  const std::string_view data(kLoremIpsum);
-  ASSERT_OK(output->Write(data));
+      fs_->OpenOutputStream(data.ContainerPath() + kObjectName, /*metadata=*/{}));
+  const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(
-                           PreexistingContainerPath() + "test-object-dir/some_other_dir0",
-                           /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
-  ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0",
-                                    /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
+      output,
+      fs_->OpenOutputStream(data.ContainerPath() + "test-object-dir/some_other_dir0",
+                            /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
+  ASSERT_OK(output->Close());
+  ASSERT_OK_AND_ASSIGN(output,
+                       fs_->OpenOutputStream(data.ContainerPath() + kObjectName + "0",
+                                             /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName, FileType::File);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(),
-                 PreexistingContainerPath() + "test-object-dir/some_other_dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_di",
                  FileType::NotFound);
+
+  if (WithHierarchicalNamespace()) {
+    datalake_service_client_->GetFileSystemClient(data.container_name)
+        .GetDirectoryClient("test-empty-object-dir")
+        .Create();
+
+    AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir",
+                   FileType::Directory);
+  }
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public AzureFileSystemTest {
+ public:
+  using AzureFileSystemTest::AzureFileSystemTest;
+
+  Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You must not enable Hierarchical Namespace on the storage account used for
+// AzureFlatNSFileSystemTest, but you must enable it on the storage account
+// used for AzureHierarchicalNSFileSystemTest.
+//
+// The credentials should be placed in the correct environment variables:
+//
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY
+//
+// [1]: https://azure.microsoft.com/en-gb/free/
+// [2]:
+// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
+using AzureFlatNSFileSystemTest = AzureFileSystemTestImpl<AzureFlatNSEnv>;
+using AzureHierarchicalNSFileSystemTest = AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
+using AzuriteFileSystemTest = AzureFileSystemTestImpl<AzuriteEnv>;
+
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
+
+template <class AzureEnvClass>
+using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+
+using AllEnvironments =
+    ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+
+TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+  this->DetectHierarchicalNamespaceTest();
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
-  datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
-      .GetDirectoryClient("test-empty-object-dir")
-      .Create();
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+  this->GetFileInfoObjectTest();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir",
-                 FileType::Directory);
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+  this->DeleteDirSuccessEmptyTest();
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectTest() {
-  auto object_properties =
-      blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-          .GetBlobClient(PreexistingObjectName())
-          .GetProperties()
-          .Value;
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) {
+  this->GetFileInfoObjectWithNestedStructureTest();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File,
-                 std::chrono::system_clock::time_point(object_properties.LastModified),
-                 static_cast<int64_t>(object_properties.BlobSize));
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) {
+  this->CreateDirSuccessContainerAndDirectoryTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) {
+  this->CreateDirRecursiveSuccessContainerOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) {
+  this->CreateDirRecursiveSuccessDirectoryOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) {
+  this->CreateDirRecursiveSuccessContainerAndDirectoryTest();
+}
+
+// Tests using a real storage account *with Hierarchical Namespace enabled*
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.RandomDirectoryPath(rng_);
+  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) {
+  auto data = SetUpPreexistingData();
+  const auto directory_path = data.RandomDirectoryPath(rng_);
+  const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt");
+  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+  ASSERT_OK(output->Write(std::string_view("hello")));
+  ASSERT_OK(output->Close());
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+  ASSERT_OK(fs_->DeleteDir(directory_path));
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) {
+  auto data = SetUpPreexistingData();
+  const auto parent = data.RandomDirectoryPath(rng_);
+  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+  ASSERT_OK(fs_->CreateDir(path, true));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+  ASSERT_OK(fs_->DeleteDir(parent));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) {
+  auto preexisting_data = SetUpPreexistingData();
+  HierarchicalPaths paths;
+  CreateHierarchicalData(&paths);
+  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+  for (const auto& sub_path : paths.sub_paths) {
+    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+  }
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessNonexistent) {
+  this->DeleteDirContentsSuccessNonexistentTest();
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) {
+  this->DeleteDirContentsFailureNonexistentTest();
+}
+
+// Tests using Azurite (the local Azure emulator)
+
+TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
+  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));

Review Comment:
   Do we expect it to raise a specific error? Can we use `ASSERT_RAISES`?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(

Review Comment:
   There are several different factory functions here, with differing signatures and no obvious clue as to which one should be used and when. Can you comment a bit on the purpose/motivation?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;
+
+  static Result<BaseAzureEnv*> GetInstance() {
+    static auto env = MakeAndAddToGlobalTestEnvironment();
+    if (env.ok()) {
+      return env;
     }
-    status_ = Status::OK();
+    return env.status();
   }
 
+  AzureBackend backend() const final { return AzureEnvClass::kBackend; }
+};
+
+class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
+ private:
+  std::unique_ptr<TemporaryDir> temp_dir_;
+  arrow::internal::PlatformFilename debug_log_path_;
+  bp::child server_process_;
+
+  using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+  static const AzureBackend kBackend = AzureBackend::kAzurite;
+
   ~AzuriteEnv() override {
     server_process_.terminate();
     server_process_.wait();
   }
 
-  Result<int64_t> GetDebugLogSize() {
+  static Result<std::unique_ptr<AzureEnvImpl>> Make() {

Review Comment:
   Yet another factory function that doesn't even delegate to the parent :-)



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {

Review Comment:
   The codebase generally uses `int`, `int32_t` or `int64_t`. There is nothing conceptually wrong with `size_t` but the multiplicity of integer types makes the code less pleasant to read.
   
   (also, _please_ omit the `std::` prefix)



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;

Review Comment:
   Is this useful?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");

Review Comment:
   If one env var is set but not the other, it should probably be an error rather than a skip.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;

Review Comment:
   Is this useful?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - line.size() - 1, rng);
+    line += '\n';
+    return line;
+  }
+
+  static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+    return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+  }
+
+  static std::string RandomChars(std::size_t count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+  AzureFileSystemTest() : rng_(std::random_device()()) {}
+
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (!options_res.ok() && options_res.status().IsCancelled()) {

Review Comment:
   Can probably simply be
   ```suggestion
       if (options_res.status().IsCancelled()) {
   ```



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -254,54 +398,20 @@ class AzureFileSystemTest : public ::testing::Test {
     return blob_client;
   }
 
-  std::string PreexistingContainerName() const { return container_name_; }
-
-  std::string PreexistingContainerPath() const {
-    return PreexistingContainerName() + '/';
-  }
-
-  static std::string PreexistingObjectName() { return "test-object-name"; }
-
-  std::string PreexistingObjectPath() const {
-    return PreexistingContainerPath() + PreexistingObjectName();
-  }
-
-  std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; }
-
-  std::string RandomLine(int lineno, std::size_t width) {
-    auto line = std::to_string(lineno) + ":    ";
-    line += RandomChars(width - line.size() - 1);
-    line += '\n';
-    return line;
-  }
-
-  std::size_t RandomIndex(std::size_t end) {
-    return std::uniform_int_distribution<std::size_t>(0, end - 1)(generator_);
-  }
-
-  std::string RandomChars(std::size_t count) {
-    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
-    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
-    std::string s;
-    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; });
-    return s;
-  }
-
-  std::string RandomContainerName() { return RandomChars(32); }
-
-  std::string RandomDirectoryName() { return RandomChars(32); }
-
-  void UploadLines(const std::vector<std::string>& lines, const char* path_to_file,
+  void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
-    const auto path = PreexistingContainerPath() + path_to_file;
     ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
     const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
     ASSERT_OK(output->Write(all_lines));
     ASSERT_OK(output->Close());
   }
 
-  void RunGetFileInfoObjectWithNestedStructureTest();
-  void RunGetFileInfoObjectTest();
+  PreexistingData SetUpPreexistingData() {
+    PreexistingData data(rng_);
+    auto container_client = CreateContainer(data.container_name);
+    CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);

Review Comment:
   Can `CreateBlob` throw an exception? 



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - line.size() - 1, rng);
+    line += '\n';
+    return line;
+  }
+
+  static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+    return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+  }
+
+  static std::string RandomChars(std::size_t count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+  AzureFileSystemTest() : rng_(std::random_device()()) {}

Review Comment:
   I'm not sure it's a good idea to vary the seed on each test invocation. For reproducibility, it would probably be better to use a fixed seed as in most other tests.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();

Review Comment:
   Can we keep the current convention of calling test methods `TestSomething` rather than `SomethingTest`?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {

Review Comment:
   Should be `override`?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();
+  void GetFileInfoObjectTest();
+  void GetFileInfoObjectWithNestedStructureTest();
+
+  void DeleteDirSuccessEmptyTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path =
+        internal::ConcatAbstractPath(data.container_name, data.RandomDirectoryName(rng_));
+
+    if (WithHierarchicalNamespace()) {
+      ASSERT_OK(fs_->CreateDir(directory_path, true));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() and DeleteDir() do nothing.
+      ASSERT_OK(fs_->CreateDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
     }
   }
 
-  int64_t debug_log_start_ = 0;
-};
+  void CreateDirSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->CreateDir(path, false));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+    }
+  }
 
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerOnlyTest() {
+    auto container_name = PreexistingData::RandomContainerName(rng_);
+    ASSERT_OK(fs_->CreateDir(container_name, true));
+    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  }
+
+  void CreateDirRecursiveSuccessDirectoryOnlyTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real flat namespace "
-        "account.");
   }
-};
 
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void CreateDirRecursiveSuccessContainerAndDirectoryTest() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real hierarchical namespace "
-        "account.");
   }
-};
 
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsSuccessNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void DeleteDirContentsFailureNonexistentTest() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+  }
+};
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void AzureFileSystemTest::DetectHierarchicalNamespaceTest() {
+  // Check the environments are implemented and injected here correctly.
+  auto expected = WithHierarchicalNamespace();
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto data = SetUpPreexistingData();
   auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
   ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+  ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
-  AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void AzureFileSystemTest::GetFileInfoObjectTest() {
+  auto data = SetUpPreexistingData();
+  auto object_properties =
+      blob_service_client_->GetBlobContainerClient(data.container_name)
+          .GetBlobClient(data.kObjectName)
+          .GetProperties()
+          .Value;
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+                 std::chrono::system_clock::time_point{object_properties.LastModified},
+                 static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName()));
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() {
+  auto data = SetUpPreexistingData();
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
   constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
   ASSERT_OK_AND_ASSIGN(
       auto output,
-      fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{}));
-  const std::string_view data(kLoremIpsum);
-  ASSERT_OK(output->Write(data));
+      fs_->OpenOutputStream(data.ContainerPath() + kObjectName, /*metadata=*/{}));
+  const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(
-                           PreexistingContainerPath() + "test-object-dir/some_other_dir0",
-                           /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
-  ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0",
-                                    /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
+      output,
+      fs_->OpenOutputStream(data.ContainerPath() + "test-object-dir/some_other_dir0",
+                            /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
+  ASSERT_OK(output->Close());
+  ASSERT_OK_AND_ASSIGN(output,
+                       fs_->OpenOutputStream(data.ContainerPath() + kObjectName + "0",
+                                             /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName, FileType::File);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(),
-                 PreexistingContainerPath() + "test-object-dir/some_other_dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di",
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_di",
                  FileType::NotFound);
+
+  if (WithHierarchicalNamespace()) {
+    datalake_service_client_->GetFileSystemClient(data.container_name)
+        .GetDirectoryClient("test-empty-object-dir")
+        .Create();
+
+    AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir",
+                   FileType::Directory);
+  }
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public AzureFileSystemTest {
+ public:
+  using AzureFileSystemTest::AzureFileSystemTest;
+
+  Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:

Review Comment:
   Thanks for these explanations.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425917099


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   Not having the `default` means the compiler will complain when a new enum entry is added reminding the programmer that they need to add the case to this loop. Putting the `DCHECK(false); return false;` after the switch is required by MSVC (if warning are being treated as errors) so it doesn't complain about the function that doesn't return `void` not having a return statement at the tail.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425840678


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {

Review Comment:
   Yeah. Cleaning up. This is code I moved. I never write `std::size_t`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on PR #39207:
URL: https://github.com/apache/arrow/pull/39207#issuecomment-1853156796

   @kou @Tom-Newton @av8or1


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "kou (via GitHub)" <gi...@apache.org>.
kou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424774080


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&

Review Comment:
   How about using more meaningful name such as `equal`?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   How about using `default`?
   
   ```suggestion
       default:
         DCHECK(false);
         return false;
     }
   ```
   
   We may want to add a comment something like "never happen".



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));
+  }
+
+  // Utilities
+
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+  static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - line.size() - 1, rng);
+    line += '\n';
+    return line;
+  }
+
+  static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+    return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+  }
+
+  static std::string RandomChars(std::size_t count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+  AzureFileSystemTest() : rng_(std::random_device()()) {}
+
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (!options_res.ok() && options_res.status().IsCancelled()) {
+      GTEST_SKIP() << options_res.status().message();
     } else {
-      suite_skipped_ = true;
-      GTEST_SKIP() << options.status().message();
+      EXPECT_OK_AND_ASSIGN(options_, std::move(options_res));

Review Comment:
   Do we need this explicit `std::move()`?



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;
+
+  static Result<BaseAzureEnv*> GetInstance() {
+    static auto env = MakeAndAddToGlobalTestEnvironment();
+    if (env.ok()) {
+      return env;
     }
-    status_ = Status::OK();
+    return env.status();

Review Comment:
   Can we just always return `env`?
   (`return env`)



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;
 }
 
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name,
-                                                    const std::string& account_key) {
-  if (this->backend == AzureBackend::Azurite) {
-    account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
-    account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
+                                                   const std::string& account_key) {
+  if (this->backend == AzureBackend::kAzurite) {
+    account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+    account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
   } else {
-    account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
-    account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+    account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+    account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
   }
-  storage_credentials_provider =
-      std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-                                                                   account_key);
-  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  storage_shared_key_credential_ =
+      std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
+std::unique_ptr<Blobs::BlobServiceClient> AzureOptions::MakeBlobServiceClient() const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+                                                        storage_shared_key_credential_);
+  }
+  DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+  return nullptr;
+}
+
+std::unique_ptr<DataLake::DataLakeServiceClient> AzureOptions::MakeDataLakeServiceClient()
+    const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          account_dfs_url_, storage_shared_key_credential_);
+  }
+  DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+  return nullptr;

Review Comment:
   ditto.



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;
 }
 
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name,
-                                                    const std::string& account_key) {
-  if (this->backend == AzureBackend::Azurite) {
-    account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
-    account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
+                                                   const std::string& account_key) {
+  if (this->backend == AzureBackend::kAzurite) {
+    account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+    account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
   } else {
-    account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
-    account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+    account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+    account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
   }
-  storage_credentials_provider =
-      std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-                                                                   account_key);
-  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  storage_shared_key_credential_ =
+      std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
+std::unique_ptr<Blobs::BlobServiceClient> AzureOptions::MakeBlobServiceClient() const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+                                                        storage_shared_key_credential_);
+  }
+  DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+  return nullptr;

Review Comment:
   How about using `Result<std::unique_ptr<Blobs::BlobServiceClient>>` instead?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425797938


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;

Review Comment:
   Same explanation as above.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425791053


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {

Review Comment:
   Wow. I missed this in my refactoring. I'm removing it now. :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425850839


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");

Review Comment:
   Makes sense! 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425834283


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(

Review Comment:
   I'm adding comments now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119 [C++]: Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424774210


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -536,12 +653,78 @@ void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
                  FileType::NotFound);
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You need to enable Hierarchical Namespace on the storage account

Review Comment:
   I removed this line in a fixup commit.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425793542


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;
 
-  std::string sas_token;
-  std::string connection_string;
-  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
-      storage_credentials_provider;
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential>
-      service_principle_credentials_provider;
+  // TODO(GH-38598): Add support for more auth methods.
+  // std::string connection_string;
+  // std::string sas_token;
 
   /// \brief Default metadata for OpenOutputStream.
   ///
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
+ private:
+  std::string account_blob_url_;
+  std::string account_dfs_url_;
+
+  enum class CredentialKind {
+    kAnonymous,
+    kStorageSharedKeyCredential,
+  } credential_kind_ = CredentialKind::kAnonymous;
+
+  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+      storage_shared_key_credential_;
+
+ public:
   AzureOptions();
+  ~AzureOptions();
 
-  Status ConfigureAccountKeyCredentials(const std::string& account_name,
-                                        const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_name,
+                                       const std::string& account_key);
 
   bool Equals(const AzureOptions& other) const;
+
+  const std::string& AccountBlobUrl() const { return account_blob_url_; }
+  const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+
+  std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> MakeBlobServiceClient() const;
+
+  std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
+  MakeDataLakeServiceClient() const;

Review Comment:
   This is a simple factory that uses the configured credentials to create the client instance. I expose this, so that I don't have to expose all the credentials fields.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425799335


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {

Review Comment:
   I didn't invent this rule. Someone explained it to me. It might be in the C++ Core Guidelines document which I haven't read, but I worked with people who did in the past.



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {

Review Comment:
   I didn't invent this rule. Someone explained it to me. It might be in the C++ Core Guidelines document which I haven't read, but I worked with people who did in the past.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425915169


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&

Review Comment:
   Fixed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests and filesystem class instantiation [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv merged PR #39207:
URL: https://github.com/apache/arrow/pull/39207


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426941084


##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -25,86 +25,104 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
+
+namespace arrow::fs {
 
 enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;

Review Comment:
   I tried to do on this one, but this is used to configure the URLs in `AzureOptions`, so I will do it in another PR where I'm adding more options to `AzureOptions` and documenting it better.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426951963


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   I ended up with this:
   
   ```cpp
   Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
       const {
     switch (credential_kind_) {
       case CredentialKind::kAnonymous:
         break;
       case CredentialKind::kStorageSharedKeyCredential:
         return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
                                                           storage_shared_key_credential_);
     }
     return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1426955052


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   +1, thank you.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425799617


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;

Review Comment:
   I didn't invent this rule. Someone explained it to me. It might be in the C++ Core Guidelines document which I haven't read, but I worked with people who did in the past.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425875580


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -254,54 +398,20 @@ class AzureFileSystemTest : public ::testing::Test {
     return blob_client;
   }
 
-  std::string PreexistingContainerName() const { return container_name_; }
-
-  std::string PreexistingContainerPath() const {
-    return PreexistingContainerName() + '/';
-  }
-
-  static std::string PreexistingObjectName() { return "test-object-name"; }
-
-  std::string PreexistingObjectPath() const {
-    return PreexistingContainerPath() + PreexistingObjectName();
-  }
-
-  std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; }
-
-  std::string RandomLine(int lineno, std::size_t width) {
-    auto line = std::to_string(lineno) + ":    ";
-    line += RandomChars(width - line.size() - 1);
-    line += '\n';
-    return line;
-  }
-
-  std::size_t RandomIndex(std::size_t end) {
-    return std::uniform_int_distribution<std::size_t>(0, end - 1)(generator_);
-  }
-
-  std::string RandomChars(std::size_t count) {
-    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
-    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
-    std::string s;
-    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; });
-    return s;
-  }
-
-  std::string RandomContainerName() { return RandomChars(32); }
-
-  std::string RandomDirectoryName() { return RandomChars(32); }
-
-  void UploadLines(const std::vector<std::string>& lines, const char* path_to_file,
+  void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
-    const auto path = PreexistingContainerPath() + path_to_file;
     ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
     const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
     ASSERT_OK(output->Write(all_lines));
     ASSERT_OK(output->Close());
   }
 
-  void RunGetFileInfoObjectWithNestedStructureTest();
-  void RunGetFileInfoObjectTest();
+  PreexistingData SetUpPreexistingData() {
+    PreexistingData data(rng_);
+    auto container_client = CreateContainer(data.container_name);
+    CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);

Review Comment:
   Yes. The Azure SDK uses exceptions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425878755


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {

Review Comment:
   this is `AzureFileSystemTest::WithHierarchicalNamespace()` and not `AzureBaseEnv::WithHierarchicalNamespace()`.
   
   It gets called a lot, so I created this helper that doesn't have to return a `Result`, it can `EXPECT_OK` because it's defined within the test class.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425940404


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();

Review Comment:
   Done.
   
   ```vim
   :%s/\(\w\+\)Test\>/Test\1/gc`
   ```



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of AzureFileSystemTest
+
+  void DetectHierarchicalNamespaceTest();

Review Comment:
   Done.
   
   ```vim
   :%s/\(\w\+\)Test\>/Test\1/gc
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425936535


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;
 }
 
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name,
-                                                    const std::string& account_key) {
-  if (this->backend == AzureBackend::Azurite) {
-    account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
-    account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
+                                                   const std::string& account_key) {
+  if (this->backend == AzureBackend::kAzurite) {
+    account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+    account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
   } else {
-    account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
-    account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+    account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+    account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
   }
-  storage_credentials_provider =
-      std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-                                                                   account_key);
-  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  storage_shared_key_credential_ =
+      std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
+std::unique_ptr<Blobs::BlobServiceClient> AzureOptions::MakeBlobServiceClient() const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+                                                        storage_shared_key_credential_);
+  }
+  DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+  return nullptr;

Review Comment:
   Done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425852325


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");

Review Comment:
   Helpful!
   <img width="680" alt="Screenshot 2023-12-13 at 17 28 45" src="https://github.com/apache/arrow/assets/207795/8233a3a6-c5e7-4a77-b38a-0ed8ba195c0e">
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425851522


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
  public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Accessors
+  std::string ContainerPath() const { return container_name + '/'; }
+  std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+  std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; }
+
+  std::string RandomDirectoryPath(RNG& rng) {
+    return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng));

Review Comment:
   Original code didn't use `ConcatAbstractPath` and I was inconsistent in my refactoring. I will add more changes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425917099


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool ret = backend == other.backend &&
+                   default_metadata == other.default_metadata &&
+                   account_blob_url_ == other.account_blob_url_ &&
+                   account_dfs_url_ == other.account_dfs_url_ &&
+                   credential_kind_ == other.credential_kind_;
+  if (!ret) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;

Review Comment:
   Not having the `default` means the compiler will complain when a new enum entry is added reminding the programmer that they need to add the case to this loop. Putting the `DCHECK(false); return false;` after the switch is required by MSVC (if warning are being treated as errors).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] GH-39119: [C++] Refactor the Azure FS tests [arrow]

Posted by "felipecrv (via GitHub)" <gi...@apache.org>.
felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425918013


##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  ~BaseAzureEnv() override = default;
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+    auto* heap_ptr = instance.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+  using BaseAzureEnv::BaseAzureEnv;
+
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name) {
+      return Status::Cancelled(account_name_var + " not set.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    if (!account_key) {
+      return Status::Cancelled(account_key_var + " not set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
+  }
+
+ public:
+  ~AzureEnvImpl() override = default;
+
+  static Result<BaseAzureEnv*> GetInstance() {
+    static auto env = MakeAndAddToGlobalTestEnvironment();
+    if (env.ok()) {
+      return env;
     }
-    status_ = Status::OK();
+    return env.status();

Review Comment:
   Haha. Yes. I don't know how I ended up on this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org