You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/28 18:53:56 UTC
[arrow-rs] branch master updated: Fix empty offset index for all null columns (#4459) (#4460)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 554aebe3b Fix empty offset index for all null columns (#4459) (#4460)
554aebe3b is described below
commit 554aebe3b523737b3aaf6109846f4735110b26f8
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Jun 28 19:53:51 2023 +0100
Fix empty offset index for all null columns (#4459) (#4460)
---
parquet/src/arrow/arrow_writer/mod.rs | 27 +++++++++++++++++++++++++++
parquet/src/column/writer/mod.rs | 7 +++----
2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 0aca77f5b..ccec4ffb2 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -790,6 +790,7 @@ mod tests {
use arrow::util::pretty::pretty_format_batches;
use arrow::{array::*, buffer::Buffer};
use arrow_array::RecordBatch;
+ use arrow_buffer::NullBuffer;
use arrow_schema::Fields;
use crate::basic::Encoding;
@@ -2609,4 +2610,30 @@ mod tests {
writer.close().unwrap();
}
+
+ #[test]
+ fn test_writer_all_null() {
+ let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ let b = Int32Array::new(vec![0; 5].into(), Some(NullBuffer::new_null(5)));
+ let batch = RecordBatch::try_from_iter(vec![
+ ("a", Arc::new(a) as ArrayRef),
+ ("b", Arc::new(b) as ArrayRef),
+ ])
+ .unwrap();
+
+ let mut buf = Vec::with_capacity(1024);
+ let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), None).unwrap();
+ writer.write(&batch).unwrap();
+ writer.close().unwrap();
+
+ let bytes = Bytes::from(buf);
+ let options = ReadOptionsBuilder::new().with_page_index().build();
+ let reader = SerializedFileReader::new_with_options(bytes, options).unwrap();
+ let index = reader.metadata().offset_index().unwrap();
+
+ assert_eq!(index.len(), 1);
+ assert_eq!(index[0].len(), 2); // 2 columns
+ assert_eq!(index[0][0].len(), 1); // 1 page
+ assert_eq!(index[0][1].len(), 1); // 1 page
+ }
}
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 4aefef98f..1cacfe793 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -690,11 +690,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
}
}
}
-
- // update the offset index
- self.offset_index_builder
- .append_row_count(self.page_metrics.num_buffered_rows as i64);
}
+ // update the offset index
+ self.offset_index_builder
+ .append_row_count(self.page_metrics.num_buffered_rows as i64);
}
fn truncate_min_value(&self, data: &[u8]) -> Vec<u8> {