You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/09 10:33:12 UTC
[arrow-rs] branch master updated: refactor: Make read_num_bytes a function instead of a macro (#2364)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 56f7904ef refactor: Make read_num_bytes a function instead of a macro (#2364)
56f7904ef is described below
commit 56f7904ef89ea02b49e6b346d2456d79085f7502
Author: Markus Westerlind <mw...@influxdata.com>
AuthorDate: Tue Aug 9 12:33:07 2022 +0200
refactor: Make read_num_bytes a function instead of a macro (#2364)
* refactor: Make read_num_bytes a function instead of a macro
Reduces duplication of this code (now it is just once per type)
* chore: cargo clippy
---
parquet/src/column/reader.rs | 4 ++--
parquet/src/data_type.rs | 25 +++++--------------------
parquet/src/encodings/decoding.rs | 2 +-
parquet/src/encodings/levels.rs | 4 ++--
parquet/src/util/bit_util.rs | 28 ++++++++++++++--------------
5 files changed, 24 insertions(+), 39 deletions(-)
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 8eee807c2..1432c72b5 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -28,7 +28,7 @@ use crate::column::reader::decoder::{
use crate::data_type::*;
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::{ceil, num_required_bits};
+use crate::util::bit_util::{ceil, num_required_bits, read_num_bytes};
use crate::util::memory::ByteBufferPtr;
pub(crate) mod decoder;
@@ -520,7 +520,7 @@ fn parse_v1_level(
match encoding {
Encoding::RLE => {
let i32_size = std::mem::size_of::<i32>();
- let data_size = read_num_bytes!(i32, i32_size, buf.as_ref()) as usize;
+ let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
Ok((i32_size + data_size, buf.range(i32_size, data_size)))
}
Encoding::BIT_PACKED => {
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 005b35570..7870ca36a 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -565,7 +565,7 @@ impl AsBytes for str {
pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
- use crate::util::bit_util::{BitReader, BitWriter};
+ use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
use crate::util::memory::ByteBufferPtr;
use crate::basic::Type;
@@ -892,21 +892,6 @@ pub(crate) mod private {
}
}
- // TODO - Why does macro importing fail?
- /// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
- /// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
- /// compile.
- /// This is copied and modified from byteorder crate.
- macro_rules! read_num_bytes {
- ($ty:ty, $size:expr, $src:expr) => {{
- assert!($size <= $src.len());
- let mut buffer =
- <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
- buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
- <$ty>::from_ne_bytes(buffer)
- }};
- }
-
impl ParquetValueType for super::ByteArray {
const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;
@@ -946,9 +931,9 @@ pub(crate) mod private {
.as_mut()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
- for i in 0..num_values {
+ for val_array in buffer.iter_mut().take(num_values) {
let len: usize =
- read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
+ read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>();
@@ -956,7 +941,7 @@ pub(crate) mod private {
return Err(eof_err!("Not enough bytes to decode"));
}
- let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
+ let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap();
val.set_data(data.range(decoder.start, len));
decoder.start += len;
@@ -975,7 +960,7 @@ pub(crate) mod private {
for _ in 0..num_values {
let len: usize =
- read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
+ read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>() + len;
}
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 3db050a0f..bb1e7137a 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -424,7 +424,7 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
// We still need to remove prefix of i32 from the stream.
const I32_SIZE: usize = mem::size_of::<i32>();
- let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
+ let data_size = bit_util::read_num_bytes::<i32>(I32_SIZE, data.as_ref()) as usize;
self.decoder = RleDecoder::new(1);
self.decoder.set_data(data.range(I32_SIZE, data_size));
self.values_left = num_values;
diff --git a/parquet/src/encodings/levels.rs b/parquet/src/encodings/levels.rs
index 40af135ff..95384926d 100644
--- a/parquet/src/encodings/levels.rs
+++ b/parquet/src/encodings/levels.rs
@@ -23,7 +23,7 @@ use crate::basic::Encoding;
use crate::data_type::AsBytes;
use crate::errors::Result;
use crate::util::{
- bit_util::{ceil, num_required_bits, BitReader, BitWriter},
+ bit_util::{ceil, num_required_bits, read_num_bytes, BitReader, BitWriter},
memory::ByteBufferPtr,
};
@@ -192,7 +192,7 @@ impl LevelDecoder {
LevelDecoder::Rle(ref mut num_values, ref mut decoder) => {
*num_values = Some(num_buffered_values);
let i32_size = mem::size_of::<i32>();
- let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
+ let data_size = read_num_bytes::<i32>(i32_size, data.as_ref()) as usize;
decoder.set_data(data.range(i32_size, data_size));
i32_size + data_size
}
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index c70384e7a..5d76a8dbf 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -88,17 +88,17 @@ impl FromBytes for bool {
from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }
-/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-/// compile.
+/// Reads `size` of bytes from `src`, and reinterprets them as type `ty`, in
+/// little-endian order.
/// This is copied and modified from byteorder crate.
-macro_rules! read_num_bytes {
- ($ty:ty, $size:expr, $src:expr) => {{
- assert!($size <= $src.len());
- let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
- buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
- <$ty>::from_ne_bytes(buffer)
- }};
+pub(crate) fn read_num_bytes<T>(size: usize, src: &[u8]) -> T
+where
+ T: FromBytes,
+{
+ assert!(size <= src.len());
+ let mut buffer = <T as FromBytes>::Buffer::default();
+ buffer.as_mut()[..size].copy_from_slice(&src[..size]);
+ <T>::from_ne_bytes(buffer)
}
/// Returns the ceil of value/divisor.
@@ -341,7 +341,7 @@ impl BitReader {
pub fn new(buffer: ByteBufferPtr) -> Self {
let total_bytes = buffer.len();
let num_bytes = cmp::min(8, total_bytes);
- let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
+ let buffered_values = read_num_bytes::<u64>(num_bytes, buffer.as_ref());
BitReader {
buffer,
buffered_values,
@@ -355,7 +355,7 @@ impl BitReader {
self.buffer = buffer;
self.total_bytes = self.buffer.len();
let num_bytes = cmp::min(8, self.total_bytes);
- self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
+ self.buffered_values = read_num_bytes::<u64>(num_bytes, self.buffer.as_ref());
self.byte_offset = 0;
self.bit_offset = 0;
}
@@ -624,7 +624,7 @@ impl BitReader {
// Advance byte_offset to next unread byte and read num_bytes
self.byte_offset += bytes_read;
- let v = read_num_bytes!(T, num_bytes, self.buffer.data()[self.byte_offset..]);
+ let v = read_num_bytes::<T>(num_bytes, &self.buffer.data()[self.byte_offset..]);
self.byte_offset += num_bytes;
// Reset buffered_values
@@ -675,7 +675,7 @@ impl BitReader {
fn reload_buffer_values(&mut self) {
let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
self.buffered_values =
- read_num_bytes!(u64, bytes_to_read, self.buffer.data()[self.byte_offset..]);
+ read_num_bytes::<u64>(bytes_to_read, &self.buffer.data()[self.byte_offset..]);
}
}