You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/05/27 06:25:25 UTC

[GitHub] [arrow] kou commented on a diff in pull request #12914: ARROW-2034: [C++] Filesystem implementation for Azure Blob Storage

kou commented on code in PR #12914:
URL: https://github.com/apache/arrow/pull/12914#discussion_r883055966


##########
ci/appveyor-cpp-build.bat:
##########
@@ -108,6 +108,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
       -DARROW_PARQUET=ON ^
       -DARROW_PYTHON=ON ^
       -DARROW_S3=%ARROW_S3% ^
+      -DARROW_AZURE=OFF ^

Review Comment:
   Could you keep this list in alphabetical order?



##########
cpp/src/arrow/filesystem/CMakeLists.txt:
##########
@@ -37,6 +37,34 @@ if(ARROW_GCS)
                  Boost::system)
 endif()
 
+if(ARROW_AZURE)
+  set(AZURE_SRCS)
+  list(APPEND
+        AZURE_SRCS
+        azurefs_mock.cc
+        azurefs.cc)
+
+  add_arrow_lib(azurefs
+                SOURCES
+                ${AZURE_SRCS}
+                SHARED_LINK_LIBS
+                ${ARROW_LINK_LIBS}
+                SHARED_PRIVATE_LINK_LIBS
+                ${ARROW_SHARED_PRIVATE_LINK_LIBS}
+                STATIC_LINK_LIBS
+                ${ARROW_STATIC_LINK_LIBS})
+
+  set_target_properties(azurefs_objlib PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON)

Review Comment:
   Ah, sorry. I forgot that we embed filesystem modules to `libarrow.so` instead of creating separated `libarrow_XXX.so`. (We don't have separated CMake target for `libarrow.so` and filesystem modules.)
   
   How about changing the default C++ version?
   
   ```diff
   diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
   index d3a2a1a2d2..cdaafe379b 100644
   --- a/cpp/cmake_modules/SetupCxxFlags.cmake
   +++ b/cpp/cmake_modules/SetupCxxFlags.cmake
   @@ -118,12 +118,16 @@ if(NOT DEFINED CMAKE_C_STANDARD)
      set(CMAKE_C_STANDARD 11)
    endif()
    
   -# This ensures that things like c++11 get passed correctly
   +# This ensures that things like c++11/c++14 get passed correctly
    if(NOT DEFINED CMAKE_CXX_STANDARD)
   -  set(CMAKE_CXX_STANDARD 11)
   +  if(ARROW_AZURE)
   +    set(CMAKE_CXX_STANDARD 14)
   +  else()
   +    set(CMAKE_CXX_STANDARD 11)
   +  endif()
    endif()
    
   -# We require a C++11 compliant compiler
   +# We require a C++11/14 compliant compiler
    set(CMAKE_CXX_STANDARD_REQUIRED ON)
    
    # ARROW-6848: Do not use GNU (or other CXX) extensions
   diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
   index ec6cada1cd..1ded8e59d4 100644
   --- a/cpp/src/arrow/CMakeLists.txt
   +++ b/cpp/src/arrow/CMakeLists.txt
   @@ -469,6 +469,12 @@ if(ARROW_FILESYSTEM)
           filesystem/path_util.cc
           filesystem/util_internal.cc)
    
   +  if(ARROW_AZURE)
   +    list(APPEND ARROW_SRCS filesystem/azurefs.cc filesystem/azurefs_mock.cc)
   +    set_source_files_properties(filesystem/azurefs.cc filesystem/azurefs_mock.cc
   +                                PROPERTIES SKIP_PRECOMPILE_HEADERS ON
   +                                           SKIP_UNITY_BUILD_INCLUSION ON)
   +  endif()
      if(ARROW_GCS)
        list(APPEND ARROW_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc)
        set_source_files_properties(filesystem/gcsfs.cc filesystem/gcsfs_internal.cc
   diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
   index 819eca08cf..bbca231baf 100644
   --- a/cpp/src/arrow/filesystem/CMakeLists.txt
   +++ b/cpp/src/arrow/filesystem/CMakeLists.txt
   @@ -28,8 +28,8 @@ add_arrow_test(filesystem-test
                   EXTRA_LABELS
                   filesystem)
    
   -if(ARROW_GCS)
   -  add_arrow_test(gcsfs_test
   +if(ARROW_AZURE)
   +  add_arrow_test(azurefs_test
                     EXTRA_LABELS
                     filesystem
                     EXTRA_LINK_LIBS
   @@ -37,32 +37,13 @@ if(ARROW_GCS)
                     Boost::system)
    endif()
    
   -if(ARROW_AZURE)
   -  set(AZURE_SRCS)
   -  list(APPEND
   -        AZURE_SRCS
   -        azurefs_mock.cc
   -        azurefs.cc)
   -
   -  add_arrow_lib(azurefs
   -                SOURCES
   -                ${AZURE_SRCS}
   -                SHARED_LINK_LIBS
   -                ${ARROW_LINK_LIBS}
   -                SHARED_PRIVATE_LINK_LIBS
   -                ${ARROW_SHARED_PRIVATE_LINK_LIBS}
   -                STATIC_LINK_LIBS
   -                ${ARROW_STATIC_LINK_LIBS})
   -
   -  set_target_properties(azurefs_objlib PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON)
   -
   -  set(TEST_LIBS_AZURE ${ARROW_TEST_LINK_LIBS})
   -  list(APPEND TEST_LIBS_AZURE azurefs_shared)
   -  add_arrow_test(azurefs_test EXTRA_LABELS filesystem
   -                  STATIC_LINK_LIBS
   -                  ${TEST_LIBS_AZURE}
   -  )
   -  set_target_properties(arrow-azurefs-test PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON)
   +if(ARROW_GCS)
   +  add_arrow_test(gcsfs_test
   +                 EXTRA_LABELS
   +                 filesystem
   +                 EXTRA_LINK_LIBS
   +                 Boost::filesystem
   +                 Boost::system)
    endif()
    
    if(ARROW_S3)
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4593,105 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+
+  set(AZURESDK_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_TESTING=OFF
+      -DCMAKE_INSTALL_LIBDIR=lib
+      "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
+      -DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX})
+
+  file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR})
+
+  # Azure C++ SDK related libraries to link statically
+  set(_AZURESDK_LIBS
+      azure-core
+      azure-identity
+      azure-storage-blobs
+      azure-storage-common
+      azure-storage-files-datalake)
+  set(AZURESDK_LIBRARIES)
+  set(AZURESDK_LIBRARIES_CPP)
+  foreach(_AZURESDK_LIB ${_AZURESDK_LIBS})
+    string(TOUPPER ${_AZURESDK_LIB} _AZURESDK_LIB_UPPER)
+    string(REPLACE "-" "_" _AZURESDK_LIB_NAME_PREFIX ${_AZURESDK_LIB_UPPER})
+    list(APPEND AZURESDK_LIBRARIES_CPP "${_AZURESDK_LIB}-cpp")
+    set(_AZURESDK_TARGET_NAME Azure::${_AZURESDK_LIB})
+    set(_AZURESDK_STATIC_LIBRARY
+        "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${_AZURESDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    add_library(${_AZURESDK_TARGET_NAME} STATIC IMPORTED)
+    set_target_properties(${_AZURESDK_TARGET_NAME}
+                          PROPERTIES IMPORTED_LOCATION ${_AZURESDK_STATIC_LIBRARY}
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${AZURESDK_INCLUDE_DIR}")
+    set("${_AZURESDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AZURESDK_STATIC_LIBRARY})
+    list(APPEND AZURESDK_LIBRARIES ${_AZURESDK_TARGET_NAME})
+  endforeach()
+
+  externalproject_add(azure_core_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_CORE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-core azure_core_ep)
+
+  externalproject_add(azure_identity_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_IDENTITY_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-identity azure_identity_ep)
+
+  externalproject_add(azure_storage_blobs_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_BLOB_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOB_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep)
+
+  externalproject_add(azure_storage_common_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_COMMON_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-common azure_storage_common_ep)
+
+  externalproject_add(azure_storage_files_datalake_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep)
+
+  set_property(TARGET Azure::azure-core
+                APPEND
+                PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl LibXml2::LibXml2)
+
+  set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES})
+endmacro()
+
+if(ARROW_AZURE)
+  # TODO - use resolve_dependency
+  build_azuresdk()
+  foreach(AZURESDK_LIBRARY_CPP ${AZURESDK_LIBRARIES_CPP})
+    find_package(${AZURESDK_LIBRARY_CPP} CONFIG REQUIRED)
+  endforeach()
+  include_directories(SYSTEM ${AZURESDK_INCLUDE_DIR})

Review Comment:
   This is needless.



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4593,105 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+
+  set(AZURESDK_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_TESTING=OFF
+      -DCMAKE_INSTALL_LIBDIR=lib
+      "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
+      -DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX})
+
+  file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR})
+
+  # Azure C++ SDK related libraries to link statically
+  set(_AZURESDK_LIBS
+      azure-core
+      azure-identity
+      azure-storage-blobs
+      azure-storage-common
+      azure-storage-files-datalake)
+  set(AZURESDK_LIBRARIES)
+  set(AZURESDK_LIBRARIES_CPP)
+  foreach(_AZURESDK_LIB ${_AZURESDK_LIBS})
+    string(TOUPPER ${_AZURESDK_LIB} _AZURESDK_LIB_UPPER)
+    string(REPLACE "-" "_" _AZURESDK_LIB_NAME_PREFIX ${_AZURESDK_LIB_UPPER})
+    list(APPEND AZURESDK_LIBRARIES_CPP "${_AZURESDK_LIB}-cpp")
+    set(_AZURESDK_TARGET_NAME Azure::${_AZURESDK_LIB})
+    set(_AZURESDK_STATIC_LIBRARY
+        "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${_AZURESDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    add_library(${_AZURESDK_TARGET_NAME} STATIC IMPORTED)
+    set_target_properties(${_AZURESDK_TARGET_NAME}
+                          PROPERTIES IMPORTED_LOCATION ${_AZURESDK_STATIC_LIBRARY}
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${AZURESDK_INCLUDE_DIR}")
+    set("${_AZURESDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AZURESDK_STATIC_LIBRARY})
+    list(APPEND AZURESDK_LIBRARIES ${_AZURESDK_TARGET_NAME})
+  endforeach()
+
+  externalproject_add(azure_core_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_CORE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-core azure_core_ep)
+
+  externalproject_add(azure_identity_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_IDENTITY_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-identity azure_identity_ep)
+
+  externalproject_add(azure_storage_blobs_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_BLOB_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOB_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep)
+
+  externalproject_add(azure_storage_common_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_COMMON_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-common azure_storage_common_ep)
+
+  externalproject_add(azure_storage_files_datalake_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep)
+
+  set_property(TARGET Azure::azure-core
+                APPEND
+                PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl LibXml2::LibXml2)
+
+  set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES})
+endmacro()
+
+if(ARROW_AZURE)
+  # TODO - use resolve_dependency
+  build_azuresdk()
+  foreach(AZURESDK_LIBRARY_CPP ${AZURESDK_LIBRARIES_CPP})
+    find_package(${AZURESDK_LIBRARY_CPP} CONFIG REQUIRED)
+  endforeach()

Review Comment:
   This is needless.



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/storage/common/storage_credential.hpp>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+namespace fs {
+
+enum class AzureCredentialsKind : int8_t {
+  /// Anonymous access (no credentials used), public
+  Anonymous,
+  /// Use explicitly-provided access key pair
+  StorageCredentials,
+  /// Use ServicePrincipleCredentials
+  ServicePrincipleCredentials,
+  /// Use Sas Token to authenticate
+  Sas,
+  /// Use Connection String
+  ConnectionString
+};
+
+/// Options for the AzureFileSystem implementation.
+struct ARROW_EXPORT AzureOptions {
+  std::string scheme;
+  std::string account_dfs_url;
+  std::string account_blob_url;
+  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+
+  std::string sas_token;
+  std::string connection_string;
+  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+      storage_credentials_provider;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential>
+      service_principle_credentials_provider;
+
+  AzureOptions();
+
+  std::string GetAccountNameFromConnectionString(const std::string& connectionString);
+
+  void ConfigureAnonymousCredentials(const std::string& account_name);
+
+  void ConfigureAccountKeyCredentials(const std::string& account_name,
+                                      const std::string& account_key);
+
+  void ConfigureConnectionStringCredentials(const std::string& connection_string);
+
+  void ConfigureServicePrincipleCredentials(const std::string& account_name,
+                                            const std::string& tenant_id,
+                                            const std::string& client_id,
+                                            const std::string& client_secret);
+
+  void ConfigureSasCredentials(const std::string& sas_token);
+
+  bool Equals(const AzureOptions& other) const;
+
+  static AzureOptions FromAnonymous(const std::string account_name);
+
+  static AzureOptions FromAccountKey(const std::string& account_name,
+                                     const std::string& account_key);
+
+  static AzureOptions FromConnectionString(const std::string& connection_string);
+
+  static AzureOptions FromServicePrincipleCredential(const std::string& account_name,
+                                                     const std::string& tenant_id,
+                                                     const std::string& client_id,
+                                                     const std::string& client_secret);
+
+  static AzureOptions FromSas(const std::string& uri);
+
+  static Result<AzureOptions> FromUri(const ::arrow::internal::Uri& uri,
+                                      std::string* out_path = NULLPTR);
+  static Result<AzureOptions> FromUri(const std::string& uri,
+                                      std::string* out_path = NULLPTR);
+};
+
+class ARROW_EXPORT AzureBlobFileSystem : public FileSystem {
+ public:
+  ~AzureBlobFileSystem() override;
+
+  std::string type_name() const override { return "abfs"; }
+
+  /// Return the original Azure options when constructing the filesystem
+  AzureOptions options() const;
+
+  bool Equals(const FileSystem& other) const override;
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  /// FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path) override;

Review Comment:
   ```suggestion
     Status DeleteDirContents(const std::string& path,
                              bool missing_dir_ok = false) override;
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -4553,6 +4593,105 @@ if(ARROW_S3)
   endif()
 endif()
 
+macro(build_azuresdk)
+  message(STATUS "Building Azure C++ SDK from source")
+
+  set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
+  set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
+
+  set(AZURESDK_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS}
+      -DBUILD_TESTING=OFF
+      -DCMAKE_INSTALL_LIBDIR=lib
+      "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
+      -DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX})
+
+  file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR})
+
+  # Azure C++ SDK related libraries to link statically
+  set(_AZURESDK_LIBS
+      azure-core
+      azure-identity
+      azure-storage-blobs
+      azure-storage-common
+      azure-storage-files-datalake)
+  set(AZURESDK_LIBRARIES)
+  set(AZURESDK_LIBRARIES_CPP)
+  foreach(_AZURESDK_LIB ${_AZURESDK_LIBS})
+    string(TOUPPER ${_AZURESDK_LIB} _AZURESDK_LIB_UPPER)
+    string(REPLACE "-" "_" _AZURESDK_LIB_NAME_PREFIX ${_AZURESDK_LIB_UPPER})
+    list(APPEND AZURESDK_LIBRARIES_CPP "${_AZURESDK_LIB}-cpp")
+    set(_AZURESDK_TARGET_NAME Azure::${_AZURESDK_LIB})
+    set(_AZURESDK_STATIC_LIBRARY
+        "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${_AZURESDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    add_library(${_AZURESDK_TARGET_NAME} STATIC IMPORTED)
+    set_target_properties(${_AZURESDK_TARGET_NAME}
+                          PROPERTIES IMPORTED_LOCATION ${_AZURESDK_STATIC_LIBRARY}
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${AZURESDK_INCLUDE_DIR}")
+    set("${_AZURESDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AZURESDK_STATIC_LIBRARY})
+    list(APPEND AZURESDK_LIBRARIES ${_AZURESDK_TARGET_NAME})
+  endforeach()
+
+  externalproject_add(azure_core_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_CORE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-core azure_core_ep)
+
+  externalproject_add(azure_identity_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_IDENTITY_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-identity azure_identity_ep)
+
+  externalproject_add(azure_storage_blobs_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_BLOB_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOB_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep)
+
+  externalproject_add(azure_storage_common_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_COMMON_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-common azure_storage_common_ep)
+
+  externalproject_add(azure_storage_files_datalake_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}"
+                      CMAKE_ARGS ${AZURESDK_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
+  add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep)
+
+  set_property(TARGET Azure::azure-core
+                APPEND
+                PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl LibXml2::LibXml2)

Review Comment:
   We need to call `find_curl()` for `CURL::libcurl` and `find_package(LibXml2 REQUIRED)` for `LibXml2::LibXml2`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org