You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by su...@apache.org on 2021/08/10 00:58:08 UTC
[arrow-rs] branch master updated: Write FixedLenByteArray stats for
FixedLenByteArray columns (not ByteArray stats) (#662)
This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fa5acd9 Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662)
fa5acd9 is described below
commit fa5acd971c973161f17e69d5c6b50d6e77c7da03
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Aug 9 20:58:03 2021 -0400
Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662)
---
parquet/src/column/writer.rs | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index af76c84..0da9439 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -924,11 +924,28 @@ impl<T: DataType> ColumnWriterImpl<T> {
Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls),
Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls),
Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls),
- Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
+ Type::BYTE_ARRAY => {
let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
Statistics::byte_array(min, max, distinct, nulls, false)
}
+ Type::FIXED_LEN_BYTE_ARRAY => {
+ let min = min
+ .as_ref()
+ .map(|v| ByteArray::from(v.as_bytes().to_vec()))
+ .map(|ba| {
+ let ba: FixedLenByteArray = ba.into();
+ ba
+ });
+ let max = max
+ .as_ref()
+ .map(|v| ByteArray::from(v.as_bytes().to_vec()))
+ .map(|ba| {
+ let ba: FixedLenByteArray = ba.into();
+ ba
+ });
+ Statistics::fixed_len_byte_array(min, max, distinct, nulls, false)
+ }
}
}
@@ -1797,13 +1814,13 @@ mod tests {
let stats = statistics_roundtrip::<FixedLenByteArrayType>(&input);
assert!(stats.has_min_max_set());
- // should it be FixedLenByteArray?
- // https://github.com/apache/arrow-rs/issues/660
- if let Statistics::ByteArray(stats) = stats {
- assert_eq!(stats.min(), &ByteArray::from("aaw "));
- assert_eq!(stats.max(), &ByteArray::from("zz "));
+ if let Statistics::FixedLenByteArray(stats) = stats {
+ let expected_min: FixedLenByteArray = ByteArray::from("aaw ").into();
+ assert_eq!(stats.min(), &expected_min);
+ let expected_max: FixedLenByteArray = ByteArray::from("zz ").into();
+ assert_eq!(stats.max(), &expected_max);
} else {
- panic!("expecting Statistics::ByteArray, got {:?}", stats);
+ panic!("expecting Statistics::FixedLenByteArray, got {:?}", stats);
}
}