You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/06/30 06:55:46 UTC
[arrow] branch main updated: GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 377811f47d GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)
377811f47d is described below
commit 377811f47dfbcc57a4fd3b4b90aea96e2d6587fb
Author: Gang Wu <us...@gmail.com>
AuthorDate: Fri Jun 30 14:55:40 2023 +0800
GH-36405: [C++][ORC] Upgrade ORC to 1.9.0 (#36406)
### Rationale for this change
Apache ORC has released 1.9.0 recently: https://orc.apache.org/news/2023/06/28/ORC-1.9.0/
The code base does not compile if we upgrade directly due to a new API below:
```cpp
virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(
uint64_t size, MemoryPool& pool,
bool encoded = false) const = 0;
virtual std::unique_ptr<ColumnVectorBatch> createRowBatch(
uint64_t size, MemoryPool& pool, bool encoded = false,
bool useTightNumericVector = false) const = 0;
```
### What changes are included in this PR?
Explicitly specify which overload of `createRowBatch` to use in the orc test.
### Are these changes tested?
Yes, make sure all tests build and pass.
### Are there any user-facing changes?
NO.
* Closes: #36405
Authored-by: Gang Wu <us...@gmail.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
cpp/src/arrow/adapters/orc/adapter_test.cc | 10 ++++++++--
cpp/thirdparty/versions.txt | 4 ++--
java/adapter/orc/pom.xml | 2 +-
3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index cff9a6d7f9..93cc4f4649 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -81,7 +81,8 @@ class MemoryOutputStream : public liborc::OutputStream {
private:
std::vector<char> data_;
std::string name_;
- uint64_t length_, natural_write_size_;
+ uint64_t length_;
+ const uint64_t natural_write_size_ = 64 * 1024;
};
std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
@@ -1041,7 +1042,12 @@ std::shared_ptr<Array> FlattenSparseUnionArray(std::shared_ptr<Array> array) {
void TestUnionConversion(std::shared_ptr<Array> array) {
auto length = array->length();
auto orc_type = liborc::Type::buildTypeFromString("uniontype<string,int>");
- auto orc_batch = orc_type->createRowBatch(array->length(), *liborc::getDefaultPool());
+
+ // Workaround for an unfortunate breaking change introduced by ORC-1.9.0.
+ MemoryOutputStream mem_stream(/*capacity=*/1024);
+ auto writer = CreateWriter(/*stripe_size=*/1024, *orc_type, &mem_stream);
+ auto orc_batch = writer->createRowBatch(length);
+ // auto orc_batch = orc_type->createRowBatch(length, *liborc::getDefaultPool());
// Convert from arrow to orc
int arrow_chunk_offset = 0;
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index a229c34041..c05ff42284 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -87,8 +87,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.8.1
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=3d640201594b07f08dade9cd1017bd0b59674daca26223b560b9bb6bf56264c2
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
-ARROW_ORC_BUILD_VERSION=1.8.3
-ARROW_ORC_BUILD_SHA256_CHECKSUM=a78678ec425c8129d63370cb8a9bacb54186aa66af1e2bec01ce92e7eaf72e20
+ARROW_ORC_BUILD_VERSION=1.9.0
+ARROW_ORC_BUILD_SHA256_CHECKSUM=0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa
ARROW_PROTOBUF_BUILD_VERSION=v21.3
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index a3f27de8ac..de3ffb9a57 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -37,7 +37,7 @@
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
- <version>1.8.3</version>
+ <version>1.9.0</version>
<scope>test</scope>
<exclusions>
<exclusion>