You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by su...@apache.org on 2021/08/10 00:58:08 UTC

[arrow-rs] branch master updated: Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662)

This is an automated email from the ASF dual-hosted git repository.

sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new fa5acd9  Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662)
fa5acd9 is described below

commit fa5acd971c973161f17e69d5c6b50d6e77c7da03
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Aug 9 20:58:03 2021 -0400

    Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662)
---
 parquet/src/column/writer.rs | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index af76c84..0da9439 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -924,11 +924,28 @@ impl<T: DataType> ColumnWriterImpl<T> {
             Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls),
             Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls),
             Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls),
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
+            Type::BYTE_ARRAY => {
                 let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
                 let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
                 Statistics::byte_array(min, max, distinct, nulls, false)
             }
+            Type::FIXED_LEN_BYTE_ARRAY => {
+                let min = min
+                    .as_ref()
+                    .map(|v| ByteArray::from(v.as_bytes().to_vec()))
+                    .map(|ba| {
+                        let ba: FixedLenByteArray = ba.into();
+                        ba
+                    });
+                let max = max
+                    .as_ref()
+                    .map(|v| ByteArray::from(v.as_bytes().to_vec()))
+                    .map(|ba| {
+                        let ba: FixedLenByteArray = ba.into();
+                        ba
+                    });
+                Statistics::fixed_len_byte_array(min, max, distinct, nulls, false)
+            }
         }
     }
 
@@ -1797,13 +1814,13 @@ mod tests {
 
         let stats = statistics_roundtrip::<FixedLenByteArrayType>(&input);
         assert!(stats.has_min_max_set());
-        // should it be FixedLenByteArray?
-        // https://github.com/apache/arrow-rs/issues/660
-        if let Statistics::ByteArray(stats) = stats {
-            assert_eq!(stats.min(), &ByteArray::from("aaw  "));
-            assert_eq!(stats.max(), &ByteArray::from("zz   "));
+        if let Statistics::FixedLenByteArray(stats) = stats {
+            let expected_min: FixedLenByteArray = ByteArray::from("aaw  ").into();
+            assert_eq!(stats.min(), &expected_min);
+            let expected_max: FixedLenByteArray = ByteArray::from("zz   ").into();
+            assert_eq!(stats.max(), &expected_max);
         } else {
-            panic!("expecting Statistics::ByteArray, got {:?}", stats);
+            panic!("expecting Statistics::FixedLenByteArray, got {:?}", stats);
         }
     }