You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/10/16 19:35:07 UTC
[arrow-rs] branch master updated: Copying inappropriately aligned buffer in ipc reader (#2883)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new bfd87bd8d Copying inappropriately aligned buffer in ipc reader (#2883)
bfd87bd8d is described below
commit bfd87bd8d26d3d3d4851ea0e93035de28e59681e
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Oct 16 12:35:01 2022 -0700
Copying inappropriately aligned buffer in ipc reader (#2883)
* Fix ptr alignment error.
* Rewrite test
* Add values
* Copy buffer if it is not aligned properly
* Move to a function
* Cover IntervalMonthDayNanoType too
* Remove unnecessary change
* For review
* Make it generic for i256
* Lift into parent match and use minimum length.
---
arrow/src/ipc/reader.rs | 39 +++++++++++++++++++++++++++++++++++----
1 file changed, 35 insertions(+), 4 deletions(-)
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 63c587455..cc45b2237 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -20,6 +20,7 @@
//! The `FileReader` and `StreamReader` have similar interfaces,
//! however the `FileReader` expects a reader that supports `Seek`ing
+use arrow_buffer::i256;
use std::collections::HashMap;
use std::fmt;
use std::io::{BufReader, Read, Seek, SeekFrom};
@@ -477,18 +478,30 @@ fn create_primitive_array(
| Timestamp(_, _)
| Date64
| Duration(_)
- | Interval(IntervalUnit::DayTime)
- | Interval(IntervalUnit::MonthDayNano) => ArrayData::builder(data_type.clone())
+ | Interval(IntervalUnit::DayTime) => ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer)
.build()
.unwrap(),
- Decimal128(_, _) | Decimal256(_, _) => {
+ Interval(IntervalUnit::MonthDayNano) | Decimal128(_, _) => {
+ let buffer = get_aligned_buffer::<i128>(&buffers[1], length);
+
// read 2 buffers: null buffer (optional) and data buffer
ArrayData::builder(data_type.clone())
.len(length)
- .add_buffer(buffers[1].clone())
+ .add_buffer(buffer)
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap()
+ }
+ Decimal256(_, _) => {
+ let buffer = get_aligned_buffer::<i256>(&buffers[1], length);
+
+ // read 2 buffers: null buffer (optional) and data buffer
+ ArrayData::builder(data_type.clone())
+ .len(length)
+ .add_buffer(buffer)
.null_bit_buffer(null_buffer)
.build()
.unwrap()
@@ -499,6 +512,24 @@ fn create_primitive_array(
make_array(array_data)
}
+/// Checks if given `Buffer` is properly aligned with `T`.
+/// If not, copying the data and padded it for alignment.
+fn get_aligned_buffer<T>(buffer: &Buffer, length: usize) -> Buffer {
+ let ptr = buffer.as_ptr();
+ let align_req = std::mem::align_of::<T>();
+ let align_offset = ptr.align_offset(align_req);
+ // The buffer is not aligned properly. The writer might use a smaller alignment
+ // e.g. 8 bytes, but on some platform (e.g. ARM) i128 requires 16 bytes alignment.
+ // We need to copy the buffer as fallback.
+ if align_offset != 0 {
+ let len_in_bytes = (length * std::mem::size_of::<T>()).min(buffer.len());
+ let slice = &buffer.as_slice()[0..len_in_bytes];
+ Buffer::from_slice_ref(&slice)
+ } else {
+ buffer.clone()
+ }
+}
+
/// Reads the correct number of buffers based on list type and null_count, and creates a
/// list array ref
fn create_list_array(