You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/09 10:33:12 UTC

[arrow-rs] branch master updated: refactor: Make read_num_bytes a function instead of a macro (#2364)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 56f7904ef refactor: Make read_num_bytes a function instead of a macro (#2364)
56f7904ef is described below

commit 56f7904ef89ea02b49e6b346d2456d79085f7502
Author: Markus Westerlind <mw...@influxdata.com>
AuthorDate: Tue Aug 9 12:33:07 2022 +0200

    refactor: Make read_num_bytes a function instead of a macro (#2364)
    
    * refactor: Make read_num_bytes a function instead of a macro
    
    Reduces duplication of this code (now it is just once per type)
    
    * chore: cargo clippy
---
 parquet/src/column/reader.rs      |  4 ++--
 parquet/src/data_type.rs          | 25 +++++--------------------
 parquet/src/encodings/decoding.rs |  2 +-
 parquet/src/encodings/levels.rs   |  4 ++--
 parquet/src/util/bit_util.rs      | 28 ++++++++++++++--------------
 5 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 8eee807c2..1432c72b5 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -28,7 +28,7 @@ use crate::column::reader::decoder::{
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::{ceil, num_required_bits};
+use crate::util::bit_util::{ceil, num_required_bits, read_num_bytes};
 use crate::util::memory::ByteBufferPtr;
 
 pub(crate) mod decoder;
@@ -520,7 +520,7 @@ fn parse_v1_level(
     match encoding {
         Encoding::RLE => {
             let i32_size = std::mem::size_of::<i32>();
-            let data_size = read_num_bytes!(i32, i32_size, buf.as_ref()) as usize;
+            let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
             Ok((i32_size + data_size, buf.range(i32_size, data_size)))
         }
         Encoding::BIT_PACKED => {
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 005b35570..7870ca36a 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -565,7 +565,7 @@ impl AsBytes for str {
 
 pub(crate) mod private {
     use crate::encodings::decoding::PlainDecoderDetails;
-    use crate::util::bit_util::{BitReader, BitWriter};
+    use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
     use crate::util::memory::ByteBufferPtr;
 
     use crate::basic::Type;
@@ -892,21 +892,6 @@ pub(crate) mod private {
         }
     }
 
-    // TODO - Why does macro importing fail?
-    /// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-    /// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-    /// compile.
-    /// This is copied and modified from byteorder crate.
-    macro_rules! read_num_bytes {
-        ($ty:ty, $size:expr, $src:expr) => {{
-            assert!($size <= $src.len());
-            let mut buffer =
-                <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-            buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-            <$ty>::from_ne_bytes(buffer)
-        }};
-    }
-
     impl ParquetValueType for super::ByteArray {
         const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;
 
@@ -946,9 +931,9 @@ pub(crate) mod private {
                 .as_mut()
                 .expect("set_data should have been called");
             let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            for i in 0..num_values {
+            for val_array in buffer.iter_mut().take(num_values) {
                 let len: usize =
-                    read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
+                    read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
                         as usize;
                 decoder.start += std::mem::size_of::<u32>();
 
@@ -956,7 +941,7 @@ pub(crate) mod private {
                     return Err(eof_err!("Not enough bytes to decode"));
                 }
 
-                let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
+                let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap();
 
                 val.set_data(data.range(decoder.start, len));
                 decoder.start += len;
@@ -975,7 +960,7 @@ pub(crate) mod private {
 
             for _ in 0..num_values {
                 let len: usize =
-                    read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
+                    read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
                         as usize;
                 decoder.start += std::mem::size_of::<u32>() + len;
             }
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 3db050a0f..bb1e7137a 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -424,7 +424,7 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
 
         // We still need to remove prefix of i32 from the stream.
         const I32_SIZE: usize = mem::size_of::<i32>();
-        let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
+        let data_size = bit_util::read_num_bytes::<i32>(I32_SIZE, data.as_ref()) as usize;
         self.decoder = RleDecoder::new(1);
         self.decoder.set_data(data.range(I32_SIZE, data_size));
         self.values_left = num_values;
diff --git a/parquet/src/encodings/levels.rs b/parquet/src/encodings/levels.rs
index 40af135ff..95384926d 100644
--- a/parquet/src/encodings/levels.rs
+++ b/parquet/src/encodings/levels.rs
@@ -23,7 +23,7 @@ use crate::basic::Encoding;
 use crate::data_type::AsBytes;
 use crate::errors::Result;
 use crate::util::{
-    bit_util::{ceil, num_required_bits, BitReader, BitWriter},
+    bit_util::{ceil, num_required_bits, read_num_bytes, BitReader, BitWriter},
     memory::ByteBufferPtr,
 };
 
@@ -192,7 +192,7 @@ impl LevelDecoder {
             LevelDecoder::Rle(ref mut num_values, ref mut decoder) => {
                 *num_values = Some(num_buffered_values);
                 let i32_size = mem::size_of::<i32>();
-                let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
+                let data_size = read_num_bytes::<i32>(i32_size, data.as_ref()) as usize;
                 decoder.set_data(data.range(i32_size, data_size));
                 i32_size + data_size
             }
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index c70384e7a..5d76a8dbf 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -88,17 +88,17 @@ impl FromBytes for bool {
 
 from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }
 
-/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-/// compile.
+/// Reads `size` of bytes from `src`, and reinterprets them as type `ty`, in
+/// little-endian order.
 /// This is copied and modified from byteorder crate.
-macro_rules! read_num_bytes {
-    ($ty:ty, $size:expr, $src:expr) => {{
-        assert!($size <= $src.len());
-        let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-        buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-        <$ty>::from_ne_bytes(buffer)
-    }};
+pub(crate) fn read_num_bytes<T>(size: usize, src: &[u8]) -> T
+where
+    T: FromBytes,
+{
+    assert!(size <= src.len());
+    let mut buffer = <T as FromBytes>::Buffer::default();
+    buffer.as_mut()[..size].copy_from_slice(&src[..size]);
+    <T>::from_ne_bytes(buffer)
 }
 
 /// Returns the ceil of value/divisor.
@@ -341,7 +341,7 @@ impl BitReader {
     pub fn new(buffer: ByteBufferPtr) -> Self {
         let total_bytes = buffer.len();
         let num_bytes = cmp::min(8, total_bytes);
-        let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
+        let buffered_values = read_num_bytes::<u64>(num_bytes, buffer.as_ref());
         BitReader {
             buffer,
             buffered_values,
@@ -355,7 +355,7 @@ impl BitReader {
         self.buffer = buffer;
         self.total_bytes = self.buffer.len();
         let num_bytes = cmp::min(8, self.total_bytes);
-        self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
+        self.buffered_values = read_num_bytes::<u64>(num_bytes, self.buffer.as_ref());
         self.byte_offset = 0;
         self.bit_offset = 0;
     }
@@ -624,7 +624,7 @@ impl BitReader {
 
         // Advance byte_offset to next unread byte and read num_bytes
         self.byte_offset += bytes_read;
-        let v = read_num_bytes!(T, num_bytes, self.buffer.data()[self.byte_offset..]);
+        let v = read_num_bytes::<T>(num_bytes, &self.buffer.data()[self.byte_offset..]);
         self.byte_offset += num_bytes;
 
         // Reset buffered_values
@@ -675,7 +675,7 @@ impl BitReader {
     fn reload_buffer_values(&mut self) {
         let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
         self.buffered_values =
-            read_num_bytes!(u64, bytes_to_read, self.buffer.data()[self.byte_offset..]);
+            read_num_bytes::<u64>(bytes_to_read, &self.buffer.data()[self.byte_offset..]);
     }
 }