You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/06/30 06:55:46 UTC

[arrow] branch main updated: GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 377811f47d GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)
377811f47d is described below

commit 377811f47dfbcc57a4fd3b4b90aea96e2d6587fb
Author: Gang Wu <us...@gmail.com>
AuthorDate: Fri Jun 30 14:55:40 2023 +0800

    GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)
    
    ### Rationale for this change
    
    Apache ORC has released 1.9.0 recently: https://orc.apache.org/news/2023/06/28/ORC-1.9.0/
    
    The code base does not compile if we upgrade directly due to a new API below:
    ```cpp
        virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(
            uint64_t size, MemoryPool& pool,
            bool encoded = false) const = 0;
    
        virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(
            uint64_t size, MemoryPool& pool, bool encoded = false,
            bool useTightNumericVector = false) const = 0;
    
    ```
    
    ### What changes are included in this PR?
    
    Explicitly specify which overload of `createRowBatch` to use in the orc test.
    
    ### Are these changes tested?
    
    Yes, make sure all tests build and pass.
    
    ### Are there any user-facing changes?
    
    NO.
    * Closes: #36405
    
    Authored-by: Gang Wu <us...@gmail.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 cpp/src/arrow/adapters/orc/adapter_test.cc | 10 ++++++++--
 cpp/thirdparty/versions.txt                |  4 ++--
 java/adapter/orc/pom.xml                   |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index cff9a6d7f9..93cc4f4649 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -81,7 +81,8 @@ class MemoryOutputStream : public liborc::OutputStream {
  private:
   std::vector<char> data_;
   std::string name_;
-  uint64_t length_, natural_write_size_;
+  uint64_t length_;
+  const uint64_t natural_write_size_ = 64 * 1024;
 };
 
 std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
@@ -1041,7 +1042,12 @@ std::shared_ptr<Array> FlattenSparseUnionArray(std::shared_ptr<Array> array) {
 void TestUnionConversion(std::shared_ptr<Array> array) {
   auto length = array->length();
   auto orc_type = liborc::Type::buildTypeFromString("uniontype<string,int>");
-  auto orc_batch = orc_type->createRowBatch(array->length(), *liborc::getDefaultPool());
+
+  // Workaround for an unfortunate breaking change introduced by ORC-1.9.0.
+  MemoryOutputStream mem_stream(/*capacity=*/1024);
+  auto writer = CreateWriter(/*stripe_size=*/1024, *orc_type, &mem_stream);
+  auto orc_batch = writer->createRowBatch(length);
+  // auto orc_batch = orc_type->createRowBatch(length, *liborc::getDefaultPool());
 
   // Convert from arrow to orc
   int arrow_chunk_offset = 0;
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index a229c34041..c05ff42284 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -87,8 +87,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.8.1
 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=3d640201594b07f08dade9cd1017bd0b59674daca26223b560b9bb6bf56264c2
 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
-ARROW_ORC_BUILD_VERSION=1.8.3
-ARROW_ORC_BUILD_SHA256_CHECKSUM=a78678ec425c8129d63370cb8a9bacb54186aa66af1e2bec01ce92e7eaf72e20
+ARROW_ORC_BUILD_VERSION=1.9.0
+ARROW_ORC_BUILD_SHA256_CHECKSUM=0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa
 ARROW_PROTOBUF_BUILD_VERSION=v21.3
 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index a3f27de8ac..de3ffb9a57 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -37,7 +37,7 @@
         <dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
-            <version>1.8.3</version>
+            <version>1.9.0</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>