You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/07/12 23:26:09 UTC

[arrow-rs] branch master updated: Remove null count from write_batch_with_statistics (#2047)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 88e0de5d6 Remove null count from write_batch_with_statistics (#2047)
88e0de5d6 is described below

commit 88e0de5d661def7d7a45e4bc51314a366d017dda
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Jul 12 19:26:04 2022 -0400

    Remove null count from write_batch_with_statistics (#2047)
    
    * Remove null count rom write_batch_with_statistics
    
    * Fix test
---
 parquet/src/column/writer.rs | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index 5def72135..1fc5207f6 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -292,7 +292,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
         rep_levels: Option<&[i16]>,
         min: Option<&T::T>,
         max: Option<&T::T>,
-        null_count: Option<u64>,
         distinct_count: Option<u64>,
     ) -> Result<usize> {
         // We check for DataPage limits only after we have inserted the values. If a user
@@ -346,10 +345,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
             self.column_distinct_count = None;
         }
 
-        if let Some(nulls) = null_count {
-            self.num_column_nulls += nulls;
-        }
-
         let mut values_offset = 0;
         let mut levels_offset = 0;
         for _ in 0..num_batches {
@@ -389,7 +384,7 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
         def_levels: Option<&[i16]>,
         rep_levels: Option<&[i16]>,
     ) -> Result<usize> {
-        self.write_batch_internal(values, def_levels, rep_levels, None, None, None, None)
+        self.write_batch_internal(values, def_levels, rep_levels, None, None, None)
     }
 
     /// Writer may optionally provide pre-calculated statistics for use when computing
@@ -406,7 +401,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
         rep_levels: Option<&[i16]>,
         min: Option<&T::T>,
         max: Option<&T::T>,
-        nulls_count: Option<u64>,
         distinct_count: Option<u64>,
     ) -> Result<usize> {
         self.write_batch_internal(
@@ -415,7 +409,6 @@ impl<'a, T: DataType> ColumnWriterImpl<'a, T> {
             rep_levels,
             min,
             max,
-            nulls_count,
             distinct_count,
         )
     }
@@ -1726,7 +1719,6 @@ mod tests {
                 None,
                 Some(&-17),
                 Some(&9000),
-                Some(21),
                 Some(55),
             )
             .unwrap();
@@ -1745,7 +1737,7 @@ mod tests {
         assert_eq!(metadata.dictionary_page_offset(), Some(0));
         if let Some(stats) = metadata.statistics() {
             assert!(stats.has_min_max_set());
-            assert_eq!(stats.null_count(), 21);
+            assert_eq!(stats.null_count(), 0);
             assert_eq!(stats.distinct_count().unwrap_or(0), 55);
             if let Statistics::Int32(stats) = stats {
                 assert_eq!(stats.min(), &-17);
@@ -1774,7 +1766,6 @@ mod tests {
                 None,
                 Some(&5),
                 Some(&7),
-                Some(0),
                 Some(3),
             )
             .unwrap();