You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/28 17:43:42 UTC

[arrow-rs] branch master updated: Remove Type from NativeIndex (#4146)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e5561e61 Remove Type from NativeIndex (#4146)
5e5561e61 is described below

commit 5e5561e619739f383a54e34292d09998a693ad4f
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 28 13:43:36 2023 -0400

    Remove Type from NativeIndex (#4146)
    
    * Remove Type from NativeIndex
    
    * Review feedback
---
 parquet/src/file/page_index/index.rs        | 16 ++++++----------
 parquet/src/file/page_index/index_reader.rs | 18 ++++++++----------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs
index 8f9cb6629..f3a09046a 100644
--- a/parquet/src/file/page_index/index.rs
+++ b/parquet/src/file/page_index/index.rs
@@ -19,7 +19,7 @@
 
 use crate::basic::Type;
 use crate::data_type::private::ParquetValueType;
-use crate::data_type::{ByteArray, Int96};
+use crate::data_type::{ByteArray, FixedLenByteArray, Int96};
 use crate::errors::ParquetError;
 use crate::format::{BoundaryOrder, ColumnIndex};
 use crate::util::bit_util::from_le_slice;
@@ -73,7 +73,7 @@ pub enum Index {
     FLOAT(NativeIndex<f32>),
     DOUBLE(NativeIndex<f64>),
     BYTE_ARRAY(NativeIndex<ByteArray>),
-    FIXED_LEN_BYTE_ARRAY(NativeIndex<ByteArray>),
+    FIXED_LEN_BYTE_ARRAY(NativeIndex<FixedLenByteArray>),
 }
 
 impl Index {
@@ -103,11 +103,9 @@ impl Index {
     }
 }
 
-/// Stores the [`PageIndex`] for each page of a column with [`Type`]
+/// Stores the [`PageIndex`] for each page of a column
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct NativeIndex<T: ParquetValueType> {
-    /// The physical type of this column
-    pub physical_type: Type,
     /// The indexes, one item per page
     pub indexes: Vec<PageIndex<T>>,
     /// If the min/max elements are ordered, and if so in which
@@ -118,11 +116,10 @@ pub struct NativeIndex<T: ParquetValueType> {
 }
 
 impl<T: ParquetValueType> NativeIndex<T> {
+    pub const PHYSICAL_TYPE: Type = T::PHYSICAL_TYPE;
+
     /// Creates a new [`NativeIndex`]
-    pub(crate) fn try_new(
-        index: ColumnIndex,
-        physical_type: Type,
-    ) -> Result<Self, ParquetError> {
+    pub(crate) fn try_new(index: ColumnIndex) -> Result<Self, ParquetError> {
         let len = index.min_values.len();
 
         let null_counts = index
@@ -153,7 +150,6 @@ impl<T: ParquetValueType> NativeIndex<T> {
             .collect::<Result<Vec<_>, ParquetError>>()?;
 
         Ok(Self {
-            physical_type,
             indexes,
             boundary_order: index.boundary_order,
         })
diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs
index 3ae37cf87..27e9a6260 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -174,17 +174,15 @@ pub(crate) fn deserialize_column_index(
     let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
 
     let index = match column_type {
-        Type::BOOLEAN => {
-            Index::BOOLEAN(NativeIndex::<bool>::try_new(index, column_type)?)
-        }
-        Type::INT32 => Index::INT32(NativeIndex::<i32>::try_new(index, column_type)?),
-        Type::INT64 => Index::INT64(NativeIndex::<i64>::try_new(index, column_type)?),
-        Type::INT96 => Index::INT96(NativeIndex::<Int96>::try_new(index, column_type)?),
-        Type::FLOAT => Index::FLOAT(NativeIndex::<f32>::try_new(index, column_type)?),
-        Type::DOUBLE => Index::DOUBLE(NativeIndex::<f64>::try_new(index, column_type)?),
-        Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new(index, column_type)?),
+        Type::BOOLEAN => Index::BOOLEAN(NativeIndex::<bool>::try_new(index)?),
+        Type::INT32 => Index::INT32(NativeIndex::<i32>::try_new(index)?),
+        Type::INT64 => Index::INT64(NativeIndex::<i64>::try_new(index)?),
+        Type::INT96 => Index::INT96(NativeIndex::<Int96>::try_new(index)?),
+        Type::FLOAT => Index::FLOAT(NativeIndex::<f32>::try_new(index)?),
+        Type::DOUBLE => Index::DOUBLE(NativeIndex::<f64>::try_new(index)?),
+        Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new(index)?),
         Type::FIXED_LEN_BYTE_ARRAY => {
-            Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index, column_type)?)
+            Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index)?)
         }
     };