You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/10/15 05:31:21 UTC
[arrow] branch master updated: ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new aeba61663f ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
aeba61663f is described below
commit aeba61663fdd82719e6cc0945aba216958ad6970
Author: LouisClt <lo...@hotmail.fr>
AuthorDate: Sat Oct 15 07:31:14 2022 +0200
ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
Lead-authored-by: LouisClt <lo...@hotmail.fr>
Co-authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
.github/workflows/cpp.yml | 6 +++++-
ci/appveyor-cpp-build.bat | 7 +++----
ci/conda_env_cpp.txt | 9 +++------
ci/scripts/java_jni_windows_build.sh | 3 +--
cpp/cmake_modules/DefineOptions.cmake | 2 --
cpp/cmake_modules/ThirdpartyToolchain.cmake | 28 +++++++++++++++++++---------
cpp/src/arrow/adapters/orc/CMakeLists.txt | 15 +--------------
cpp/src/arrow/adapters/orc/adapter_test.cc | 27 ++++++++++++++-------------
dev/tasks/java-jars/github.yml | 5 +++--
9 files changed, 49 insertions(+), 53 deletions(-)
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index b420ec0639..3d6b89f2f5 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -217,6 +217,7 @@ jobs:
ARROW_HOME: /usr
ARROW_JEMALLOC: OFF
ARROW_MIMALLOC: ON
+ ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
@@ -286,7 +287,10 @@ jobs:
bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
- name: Test
shell: bash
- run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
+ run: |
+ # For ORC
+ export TZDIR=/c/msys64/usr/share/zoneinfo
+ ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
windows-mingw:
name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index cbff03d1bb..5ddb8e370a 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -72,6 +72,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_HDFS=ON ^
-DARROW_JSON=ON ^
-DARROW_MIMALLOC=ON ^
+ -DARROW_ORC=ON ^
-DARROW_PARQUET=ON ^
-DARROW_S3=%ARROW_S3% ^
-DARROW_SUBSTRAIT=ON ^
@@ -93,13 +94,11 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
.. || exit /B
cmake --build . --target install --config Release || exit /B
-@rem Needed so arrow-python-test.exe works
-set OLD_PYTHONHOME=%PYTHONHOME%
-set PYTHONHOME=%CONDA_PREFIX%
+@rem For ORC C++
+set TZDIR=%CONDA_PREFIX%\share\zoneinfo
ctest --output-on-failure || exit /B
-set PYTHONHOME=%OLD_PYTHONHOME%
popd
@rem
diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index dd313f19d7..98e9364fd1 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -22,21 +22,19 @@ brotli
bzip2
c-ares
cmake
+flatbuffers
gflags
glog
gmock>=1.10.0
google-cloud-cpp>=1.34.0
-# 1.45.0 appears to segfault on Windows/AppVeyor
-grpc-cpp>=1.27.3,<1.45.0
+grpc-cpp
gtest>=1.10.0
libprotobuf
libutf8proc
lz4-c
make
ninja
-# Required by google-cloud-cpp, the Conda package is missing the dependency:
-# https://github.com/conda-forge/google-cloud-cpp-feedstock/issues/28
-nlohmann_json
+orc
pkg-config
python
rapidjson
@@ -46,4 +44,3 @@ thrift-cpp>=0.11.0
xsimd
zlib
zstd
-flatbuffers
diff --git a/ci/scripts/java_jni_windows_build.sh b/ci/scripts/java_jni_windows_build.sh
index 0df60b7777..8d142bd425 100755
--- a/ci/scripts/java_jni_windows_build.sh
+++ b/ci/scripts/java_jni_windows_build.sh
@@ -33,8 +33,7 @@ install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
export ARROW_DATASET
-# We can enable this after ARROW-17817 is resolved.
-: ${ARROW_ORC:=OFF}
+: ${ARROW_ORC:=ON}
export ARROW_ORC
: ${ARROW_PARQUET:=ON}
: ${ARROW_S3:=ON}
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index b56918e602..0a0f24b47f 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -122,8 +122,6 @@ endmacro()
macro(resolve_option_dependencies)
if(MSVC_TOOLCHAIN)
- # ARROW-17817: ORC can't be built on Windows.
- set(ARROW_ORC OFF)
# Plasma using glog is not fully tested on windows.
set(ARROW_USE_GLOG OFF)
endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index e9e71328af..1c9670208a 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4286,6 +4286,9 @@ macro(build_orc)
get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)
+ get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
+ get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
+
# Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding
# the protoc library.
set(ORC_CMAKE_ARGS
@@ -4305,8 +4308,8 @@ macro(build_orc)
"-DPROTOBUF_INCLUDE_DIR=${ORC_PROTOBUF_INCLUDE_DIR}"
"-DPROTOBUF_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
"-DPROTOC_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
- "-DLZ4_HOME=${LZ4_HOME}"
- "-DZSTD_HOME=${ZSTD_HOME}")
+ "-DLZ4_HOME=${ORC_LZ4_ROOT}"
+ "-DZSTD_HOME=${ORZ_ZSTD_ROOT}")
if(ORC_PROTOBUF_EXECUTABLE)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS}
"-DPROTOBUF_EXECUTABLE:FILEPATH=${ORC_PROTOBUF_EXECUTABLE}")
@@ -4322,20 +4325,27 @@ macro(build_orc)
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
- CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS})
-
- add_dependencies(toolchain orc_ep)
+ CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}
+ DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
+ ${ARROW_ZSTD_LIBZSTD}
+ ${Snappy_TARGET}
+ LZ4::lz4
+ ZLIB::ZLIB)
set(ORC_VENDORED 1)
- add_dependencies(orc_ep ZLIB::ZLIB)
- add_dependencies(orc_ep LZ4::lz4)
- add_dependencies(orc_ep ${Snappy_TARGET})
- add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF})
add_library(orc::liborc STATIC IMPORTED)
set_target_properties(orc::liborc
PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
+ target_link_libraries(orc::liborc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
+ ${Snappy_TARGET})
+ if(NOT MSVC)
+ if(NOT APPLE)
+ target_link_libraries(orc::liborc INTERFACE Threads::Threads)
+ endif()
+ target_link_libraries(orc::liborc INTERFACE ${CMAKE_DL_LIBS})
+ endif()
add_dependencies(toolchain orc_ep)
add_dependencies(orc::liborc orc_ep)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index 6b2536bb55..3c695abb5a 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -26,27 +26,14 @@ install(FILES adapter.h options.h
# pkg-config support
arrow_add_pkg_config("arrow-orc")
-set(ORC_MIN_TEST_LIBS
- GTest::gtest_main
- GTest::gtest
- ${Snappy_TARGET}
- LZ4::lz4
- ZLIB::ZLIB)
-
if(ARROW_BUILD_STATIC)
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static)
else()
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared)
endif()
-if(APPLE)
- set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} ${CMAKE_DL_LIBS})
-elseif(NOT MSVC)
- set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} pthread ${CMAKE_DL_LIBS})
-endif()
-
set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
- ${ORC_MIN_TEST_LIBS})
+ GTest::gtest_main GTest::gtest)
add_arrow_test(adapter_test
PREFIX
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 1efc02bc40..5c234cc97c 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -42,22 +42,22 @@ namespace arrow {
using internal::checked_pointer_cast;
-constexpr int kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
-constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
+constexpr size_t kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
+constexpr size_t kDefaultMemStreamSize = 10 * 1024 * 1024;
constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
-const int64_t kMicroMax = std::floor(kNanoMax / 1000);
-const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
-const int64_t kMilliMax = std::floor(kMicroMax / 1000);
-const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
-const int64_t kSecondMax = std::floor(kMilliMax / 1000);
-const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
+const int64_t kMicroMax = static_cast<int64_t>(std::floor(kNanoMax / 1000));
+const int64_t kMicroMin = static_cast<int64_t>(std::ceil(kNanoMin / 1000));
+const int64_t kMilliMax = static_cast<int64_t>(std::floor(kMicroMax / 1000));
+const int64_t kMilliMin = static_cast<int64_t>(std::ceil(kMicroMin / 1000));
+const int64_t kSecondMax = static_cast<int64_t>(std::floor(kMilliMax / 1000));
+const int64_t kSecondMin = static_cast<int64_t>(std::ceil(kMilliMin / 1000));
static constexpr random::SeedType kRandomSeed = 0x0ff1ce;
class MemoryOutputStream : public liborc::OutputStream {
public:
- explicit MemoryOutputStream(ssize_t capacity)
+ explicit MemoryOutputStream(size_t capacity)
: data_(capacity), name_("MemoryOutputStream"), length_(0) {}
uint64_t getLength() const override { return length_; }
@@ -86,12 +86,13 @@ class MemoryOutputStream : public liborc::OutputStream {
std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
int64_t length) {
BufferBuilder builder;
- int32_t offsets[length];
+ std::vector<int32_t> offsets;
+ offsets.resize(length);
ARROW_EXPECT_OK(builder.Resize(4 * length));
- for (int32_t i = 0; i < length; i++) {
- offsets[i] = fixed_length * i;
+ for (int64_t i = 0; i < length; i++) {
+ offsets[i] = static_cast<int32_t>(fixed_length * i);
}
- ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
+ ARROW_EXPECT_OK(builder.Append(offsets.data(), 4 * length));
std::shared_ptr<Buffer> buffer;
ARROW_EXPECT_OK(builder.Finish(&buffer));
return buffer;
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 6f7fdc82d5..763f5df5cd 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -101,6 +101,8 @@ jobs:
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
+ REM For ORC
+ set TZDIR=/c/msys64/usr/share/zoneinfo
bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist"
- name: Compress into single artifact to keep directory structure
shell: bash
@@ -148,8 +150,7 @@ jobs:
test -f arrow/java-dist/arrow_dataset_jni.dll
test -f arrow/java-dist/libarrow_orc_jni.dylib
test -f arrow/java-dist/libarrow_orc_jni.so
- # We can enable this after ARROW-17817 is resolved.
- # test -f arrow/java-dist/arrow_orc_jni.dll
+ test -f arrow/java-dist/arrow_orc_jni.dll
test -f arrow/java-dist/libgandiva_jni.dylib
test -f arrow/java-dist/libgandiva_jni.so
test -f arrow/java-dist/libplasma_java.dylib