You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/07/12 23:26:09 UTC
[arrow-rs] branch master updated: Remove null count from write_batch_with_statistics (#2047)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 88e0de5d6 Remove null count from write_batch_with_statistics (#2047)
88e0de5d6 is described below
commit 88e0de5d661def7d7a45e4bc51314a366d017dda
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Jul 12 19:26:04 2022 -0400
Remove null count from write_batch_with_statistics (#2047)
* Remove null count rom write_batch_with_statistics
* Fix test
---
parquet/src/column/writer.rs | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index 5def72135..1fc5207f6 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -292,7 +292,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
rep_levels: Option<&[i16]>,
min: Option<&T::T>,
max: Option<&T::T>,
- null_count: Option<u64>,
distinct_count: Option<u64>,
) -> Result<usize> {
// We check for DataPage limits only after we have inserted the values. If a user
@@ -346,10 +345,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
self.column_distinct_count = None;
}
- if let Some(nulls) = null_count {
- self.num_column_nulls += nulls;
- }
-
let mut values_offset = 0;
let mut levels_offset = 0;
for _ in 0..num_batches {
@@ -389,7 +384,7 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
def_levels: Option<&[i16]>,
rep_levels: Option<&[i16]>,
) -> Result<usize> {
- self.write_batch_internal(values, def_levels, rep_levels, None, None, None, None)
+ self.write_batch_internal(values, def_levels, rep_levels, None, None, None)
}
/// Writer may optionally provide pre-calculated statistics for use when computing
@@ -406,7 +401,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
rep_levels: Option<&[i16]>,
min: Option<&T::T>,
max: Option<&T::T>,
- nulls_count: Option<u64>,
distinct_count: Option<u64>,
) -> Result<usize> {
self.write_batch_internal(
@@ -415,7 +409,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
rep_levels,
min,
max,
- nulls_count,
distinct_count,
)
}
@@ -1726,7 +1719,6 @@ mod tests {
None,
Some(&-17),
Some(&9000),
- Some(21),
Some(55),
)
.unwrap();
@@ -1745,7 +1737,7 @@ mod tests {
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
assert!(stats.has_min_max_set());
- assert_eq!(stats.null_count(), 21);
+ assert_eq!(stats.null_count(), 0);
assert_eq!(stats.distinct_count().unwrap_or(0), 55);
if let Statistics::Int32(stats) = stats {
assert_eq!(stats.min(), &-17);
@@ -1774,7 +1766,6 @@ mod tests {
None,
Some(&5),
Some(&7),
- Some(0),
Some(3),
)
.unwrap();