You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/07 15:03:35 UTC

[arrow-rs] branch master updated: Restrict Decoder to compatible types (#1276) (#1277)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e7b6c4  Restrict Decoder to compatible types (#1276) (#1277)
3e7b6c4 is described below

commit 3e7b6c4232e6985cddd62a7917607462e5156063
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Mon Feb 7 15:03:30 2022 +0000

    Restrict Decoder to compatible types (#1276) (#1277)
---
 parquet/src/data_type.rs          |   1 +
 parquet/src/encodings/decoding.rs | 142 +++++++++++++++++++++++++++++++++-----
 2 files changed, 126 insertions(+), 17 deletions(-)

diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 1a67c9d..fde46d5 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -599,6 +599,7 @@ pub(crate) mod private {
         + super::SliceAsBytes
         + PartialOrd
         + Send
+        + crate::encodings::decoding::private::GetDecoder
     {
         /// Encode the value directly from a higher level encoder
         fn encode<W: std::io::Write>(
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index b3f98d1..f044dd2 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -22,7 +22,7 @@ use std::{cmp, marker::PhantomData, mem};
 use super::rle::RleDecoder;
 
 use crate::basic::*;
-use crate::data_type::private::*;
+use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
@@ -31,6 +31,111 @@ use crate::util::{
     memory::{ByteBuffer, ByteBufferPtr},
 };
 
+pub(crate) mod private {
+    use super::*;
+
+    /// A trait that allows getting a [`Decoder`] implementation for a [`DataType`] with
+    /// the corresponding [`ParquetValueType`]. This is necessary to support
+    /// [`Decoder`] implementations that may not be applicable for all [`DataType`]
+    /// and by extension all [`ParquetValueType`]
+    pub trait GetDecoder {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            get_decoder_default(descr, encoding)
+        }
+    }
+
+    fn get_decoder_default<T: DataType>(
+        descr: ColumnDescPtr,
+        encoding: Encoding,
+    ) -> Result<Box<dyn Decoder<T>>> {
+        match encoding {
+            Encoding::PLAIN => Ok(Box::new(PlainDecoder::new(descr.type_length()))),
+            Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => Err(general_err!(
+                "Cannot initialize this encoding through this function"
+            )),
+            Encoding::RLE
+            | Encoding::DELTA_BINARY_PACKED
+            | Encoding::DELTA_BYTE_ARRAY
+            | Encoding::DELTA_LENGTH_BYTE_ARRAY => Err(general_err!(
+                "Encoding {} is not supported for type",
+                encoding
+            )),
+            e => Err(nyi_err!("Encoding {} is not supported", e)),
+        }
+    }
+
+    impl GetDecoder for bool {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            match encoding {
+                Encoding::RLE => Ok(Box::new(RleValueDecoder::new())),
+                _ => get_decoder_default(descr, encoding),
+            }
+        }
+    }
+
+    impl GetDecoder for i32 {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            match encoding {
+                Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())),
+                _ => get_decoder_default(descr, encoding),
+            }
+        }
+    }
+
+    impl GetDecoder for i64 {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            match encoding {
+                Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())),
+                _ => get_decoder_default(descr, encoding),
+            }
+        }
+    }
+
+    impl GetDecoder for f32 {}
+    impl GetDecoder for f64 {}
+
+    impl GetDecoder for ByteArray {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            match encoding {
+                Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())),
+                Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                    Ok(Box::new(DeltaLengthByteArrayDecoder::new()))
+                }
+                _ => get_decoder_default(descr, encoding),
+            }
+        }
+    }
+
+    impl GetDecoder for FixedLenByteArray {
+        fn get_decoder<T: DataType<T = Self>>(
+            descr: ColumnDescPtr,
+            encoding: Encoding,
+        ) -> Result<Box<dyn Decoder<T>>> {
+            match encoding {
+                Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())),
+                _ => get_decoder_default(descr, encoding),
+            }
+        }
+    }
+
+    impl GetDecoder for Int96 {}
+}
+
 // ----------------------------------------------------------------------
 // Decoders
 
@@ -109,20 +214,8 @@ pub fn get_decoder<T: DataType>(
     descr: ColumnDescPtr,
     encoding: Encoding,
 ) -> Result<Box<dyn Decoder<T>>> {
-    let decoder: Box<dyn Decoder<T>> = match encoding {
-        Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())),
-        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
-            return Err(general_err!(
-                "Cannot initialize this encoding through this function"
-            ));
-        }
-        Encoding::RLE => Box::new(RleValueDecoder::new()),
-        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()),
-        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()),
-        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()),
-        e => return Err(nyi_err!("Encoding {} is not supported", e)),
-    };
-    Ok(decoder)
+    use self::private::GetDecoder;
+    T::T::get_decoder(descr, encoding)
 }
 
 // ----------------------------------------------------------------------
@@ -817,8 +910,11 @@ mod tests {
         // supported encodings
         create_and_check_decoder::<Int32Type>(Encoding::PLAIN, None);
         create_and_check_decoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
+        create_and_check_decoder::<ByteArrayType>(
+            Encoding::DELTA_LENGTH_BYTE_ARRAY,
+            None,
+        );
+        create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY, None);
         create_and_check_decoder::<BoolType>(Encoding::RLE, None);
 
         // error when initializing
@@ -834,6 +930,18 @@ mod tests {
                 "Cannot initialize this encoding through this function"
             )),
         );
+        create_and_check_decoder::<Int32Type>(
+            Encoding::DELTA_LENGTH_BYTE_ARRAY,
+            Some(general_err!(
+                "Encoding DELTA_LENGTH_BYTE_ARRAY is not supported for type"
+            )),
+        );
+        create_and_check_decoder::<Int32Type>(
+            Encoding::DELTA_BYTE_ARRAY,
+            Some(general_err!(
+                "Encoding DELTA_BYTE_ARRAY is not supported for type"
+            )),
+        );
 
         // unsupported
         create_and_check_decoder::<Int32Type>(