You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/10/15 05:31:21 UTC

[arrow] branch master updated: ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new aeba61663f ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
aeba61663f is described below

commit aeba61663fdd82719e6cc0945aba216958ad6970
Author: LouisClt <lo...@hotmail.fr>
AuthorDate: Sat Oct 15 07:31:14 2022 +0200

    ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
    
    Lead-authored-by: LouisClt <lo...@hotmail.fr>
    Co-authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 .github/workflows/cpp.yml                   |  6 +++++-
 ci/appveyor-cpp-build.bat                   |  7 +++----
 ci/conda_env_cpp.txt                        |  9 +++------
 ci/scripts/java_jni_windows_build.sh        |  3 +--
 cpp/cmake_modules/DefineOptions.cmake       |  2 --
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 28 +++++++++++++++++++---------
 cpp/src/arrow/adapters/orc/CMakeLists.txt   | 15 +--------------
 cpp/src/arrow/adapters/orc/adapter_test.cc  | 27 ++++++++++++++-------------
 dev/tasks/java-jars/github.yml              |  5 +++--
 9 files changed, 49 insertions(+), 53 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index b420ec0639..3d6b89f2f5 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -217,6 +217,7 @@ jobs:
       ARROW_HOME: /usr
       ARROW_JEMALLOC: OFF
       ARROW_MIMALLOC: ON
+      ARROW_ORC: ON
       ARROW_PARQUET: ON
       ARROW_USE_GLOG: OFF
       ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
@@ -286,7 +287,10 @@ jobs:
           bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
       - name: Test
         shell: bash
-        run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
+        run: |
+          # For ORC
+          export TZDIR=/c/msys64/usr/share/zoneinfo
+          ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
 
   windows-mingw:
     name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index cbff03d1bb..5ddb8e370a 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -72,6 +72,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
       -DARROW_HDFS=ON ^
       -DARROW_JSON=ON ^
       -DARROW_MIMALLOC=ON ^
+      -DARROW_ORC=ON ^
       -DARROW_PARQUET=ON ^
       -DARROW_S3=%ARROW_S3% ^
       -DARROW_SUBSTRAIT=ON ^
@@ -93,13 +94,11 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
       ..  || exit /B
 cmake --build . --target install --config Release || exit /B
 
-@rem Needed so arrow-python-test.exe works
-set OLD_PYTHONHOME=%PYTHONHOME%
-set PYTHONHOME=%CONDA_PREFIX%
+@rem For ORC C++
+set TZDIR=%CONDA_PREFIX%\share\zoneinfo
 
 ctest --output-on-failure || exit /B
 
-set PYTHONHOME=%OLD_PYTHONHOME%
 popd
 
 @rem
diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index dd313f19d7..98e9364fd1 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -22,21 +22,19 @@ brotli
 bzip2
 c-ares
 cmake
+flatbuffers
 gflags
 glog
 gmock>=1.10.0
 google-cloud-cpp>=1.34.0
-# 1.45.0 appears to segfault on Windows/AppVeyor
-grpc-cpp>=1.27.3,<1.45.0
+grpc-cpp
 gtest>=1.10.0
 libprotobuf
 libutf8proc
 lz4-c
 make
 ninja
-# Required by google-cloud-cpp, the Conda package is missing the dependency:
-#    https://github.com/conda-forge/google-cloud-cpp-feedstock/issues/28
-nlohmann_json
+orc
 pkg-config
 python
 rapidjson
@@ -46,4 +44,3 @@ thrift-cpp>=0.11.0
 xsimd
 zlib
 zstd
-flatbuffers
diff --git a/ci/scripts/java_jni_windows_build.sh b/ci/scripts/java_jni_windows_build.sh
index 0df60b7777..8d142bd425 100755
--- a/ci/scripts/java_jni_windows_build.sh
+++ b/ci/scripts/java_jni_windows_build.sh
@@ -33,8 +33,7 @@ install_dir=${build_dir}/cpp-install
 : ${ARROW_BUILD_TESTS:=ON}
 : ${ARROW_DATASET:=ON}
 export ARROW_DATASET
-# We can enable this after ARROW-17817 is resolved.
-: ${ARROW_ORC:=OFF}
+: ${ARROW_ORC:=ON}
 export ARROW_ORC
 : ${ARROW_PARQUET:=ON}
 : ${ARROW_S3:=ON}
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index b56918e602..0a0f24b47f 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -122,8 +122,6 @@ endmacro()
 
 macro(resolve_option_dependencies)
   if(MSVC_TOOLCHAIN)
-    # ARROW-17817: ORC can't be built on Windows.
-    set(ARROW_ORC OFF)
     # Plasma using glog is not fully tested on windows.
     set(ARROW_USE_GLOG OFF)
   endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index e9e71328af..1c9670208a 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4286,6 +4286,9 @@ macro(build_orc)
   get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
   get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)
 
+  get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
+  get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
+
   # Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding
   # the protoc library.
   set(ORC_CMAKE_ARGS
@@ -4305,8 +4308,8 @@ macro(build_orc)
       "-DPROTOBUF_INCLUDE_DIR=${ORC_PROTOBUF_INCLUDE_DIR}"
       "-DPROTOBUF_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
       "-DPROTOC_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
-      "-DLZ4_HOME=${LZ4_HOME}"
-      "-DZSTD_HOME=${ZSTD_HOME}")
+      "-DLZ4_HOME=${ORC_LZ4_ROOT}"
+      "-DZSTD_HOME=${ORZ_ZSTD_ROOT}")
   if(ORC_PROTOBUF_EXECUTABLE)
     set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS}
                        "-DPROTOBUF_EXECUTABLE:FILEPATH=${ORC_PROTOBUF_EXECUTABLE}")
@@ -4322,20 +4325,27 @@ macro(build_orc)
                       URL ${ORC_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
                       BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
-                      CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS})
-
-  add_dependencies(toolchain orc_ep)
+                      CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}
+                      DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
+                              ${ARROW_ZSTD_LIBZSTD}
+                              ${Snappy_TARGET}
+                              LZ4::lz4
+                              ZLIB::ZLIB)
 
   set(ORC_VENDORED 1)
-  add_dependencies(orc_ep ZLIB::ZLIB)
-  add_dependencies(orc_ep LZ4::lz4)
-  add_dependencies(orc_ep ${Snappy_TARGET})
-  add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF})
 
   add_library(orc::liborc STATIC IMPORTED)
   set_target_properties(orc::liborc
                         PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
                                    INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
+  target_link_libraries(orc::liborc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
+                                              ${Snappy_TARGET})
+  if(NOT MSVC)
+    if(NOT APPLE)
+      target_link_libraries(orc::liborc INTERFACE Threads::Threads)
+    endif()
+    target_link_libraries(orc::liborc INTERFACE ${CMAKE_DL_LIBS})
+  endif()
 
   add_dependencies(toolchain orc_ep)
   add_dependencies(orc::liborc orc_ep)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index 6b2536bb55..3c695abb5a 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -26,27 +26,14 @@ install(FILES adapter.h options.h
 # pkg-config support
 arrow_add_pkg_config("arrow-orc")
 
-set(ORC_MIN_TEST_LIBS
-    GTest::gtest_main
-    GTest::gtest
-    ${Snappy_TARGET}
-    LZ4::lz4
-    ZLIB::ZLIB)
-
 if(ARROW_BUILD_STATIC)
   set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static)
 else()
   set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared)
 endif()
 
-if(APPLE)
-  set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} ${CMAKE_DL_LIBS})
-elseif(NOT MSVC)
-  set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} pthread ${CMAKE_DL_LIBS})
-endif()
-
 set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
-                              ${ORC_MIN_TEST_LIBS})
+                              GTest::gtest_main GTest::gtest)
 
 add_arrow_test(adapter_test
                PREFIX
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 1efc02bc40..5c234cc97c 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -42,22 +42,22 @@ namespace arrow {
 
 using internal::checked_pointer_cast;
 
-constexpr int kDefaultSmallMemStreamSize = 16384 * 5;  // 80KB
-constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
+constexpr size_t kDefaultSmallMemStreamSize = 16384 * 5;  // 80KB
+constexpr size_t kDefaultMemStreamSize = 10 * 1024 * 1024;
 constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
 constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
-const int64_t kMicroMax = std::floor(kNanoMax / 1000);
-const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
-const int64_t kMilliMax = std::floor(kMicroMax / 1000);
-const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
-const int64_t kSecondMax = std::floor(kMilliMax / 1000);
-const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
+const int64_t kMicroMax = static_cast<int64_t>(std::floor(kNanoMax / 1000));
+const int64_t kMicroMin = static_cast<int64_t>(std::ceil(kNanoMin / 1000));
+const int64_t kMilliMax = static_cast<int64_t>(std::floor(kMicroMax / 1000));
+const int64_t kMilliMin = static_cast<int64_t>(std::ceil(kMicroMin / 1000));
+const int64_t kSecondMax = static_cast<int64_t>(std::floor(kMilliMax / 1000));
+const int64_t kSecondMin = static_cast<int64_t>(std::ceil(kMilliMin / 1000));
 
 static constexpr random::SeedType kRandomSeed = 0x0ff1ce;
 
 class MemoryOutputStream : public liborc::OutputStream {
  public:
-  explicit MemoryOutputStream(ssize_t capacity)
+  explicit MemoryOutputStream(size_t capacity)
       : data_(capacity), name_("MemoryOutputStream"), length_(0) {}
 
   uint64_t getLength() const override { return length_; }
@@ -86,12 +86,13 @@ class MemoryOutputStream : public liborc::OutputStream {
 std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
                                                       int64_t length) {
   BufferBuilder builder;
-  int32_t offsets[length];
+  std::vector<int32_t> offsets;
+  offsets.resize(length);
   ARROW_EXPECT_OK(builder.Resize(4 * length));
-  for (int32_t i = 0; i < length; i++) {
-    offsets[i] = fixed_length * i;
+  for (int64_t i = 0; i < length; i++) {
+    offsets[i] = static_cast<int32_t>(fixed_length * i);
   }
-  ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
+  ARROW_EXPECT_OK(builder.Append(offsets.data(), 4 * length));
   std::shared_ptr<Buffer> buffer;
   ARROW_EXPECT_OK(builder.Finish(&buffer));
   return buffer;
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 6f7fdc82d5..763f5df5cd 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -101,6 +101,8 @@ jobs:
         shell: cmd
         run: |
           call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
+          REM For ORC
+          set TZDIR=/c/msys64/usr/share/zoneinfo
           bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist"
       - name: Compress into single artifact to keep directory structure
         shell: bash
@@ -148,8 +150,7 @@ jobs:
           test -f arrow/java-dist/arrow_dataset_jni.dll
           test -f arrow/java-dist/libarrow_orc_jni.dylib
           test -f arrow/java-dist/libarrow_orc_jni.so
-          # We can enable this after ARROW-17817 is resolved.
-          # test -f arrow/java-dist/arrow_orc_jni.dll
+          test -f arrow/java-dist/arrow_orc_jni.dll
           test -f arrow/java-dist/libgandiva_jni.dylib
           test -f arrow/java-dist/libgandiva_jni.so
           test -f arrow/java-dist/libplasma_java.dylib