You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/03 16:14:19 UTC
[arrow-rs] branch master updated: Do not assume dictionaries exists in footer (#1631)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8f24c45fb Do not assume dictionaries exists in footer (#1631)
8f24c45fb is described below
commit 8f24c45fbf944486b2f9fd921eeede0725b4316b
Author: Peter C. Jentsch <pj...@uwaterloo.ca>
AuthorDate: Tue May 3 09:14:13 2022 -0700
Do not assume dictionaries exists in footer (#1631)
* do not assume footer exists, fixes issue #1335
* fix cargo fmt and clippy errors
---
arrow/src/ipc/reader.rs | 74 ++++++++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index f3e46e27f..4b050229d 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -651,43 +651,53 @@ impl<R: Read + Seek> FileReader<R> {
// Create an array of optional dictionary value arrays, one per field.
let mut dictionaries_by_field = vec![None; schema.all_fields().len()];
- for block in footer.dictionaries().unwrap() {
- // read length from end of offset
- let mut message_size: [u8; 4] = [0; 4];
- reader.seek(SeekFrom::Start(block.offset() as u64))?;
- reader.read_exact(&mut message_size)?;
- if message_size == CONTINUATION_MARKER {
+ if let Some(dictionaries) = footer.dictionaries() {
+ for block in dictionaries {
+ // read length from end of offset
+ let mut message_size: [u8; 4] = [0; 4];
+ reader.seek(SeekFrom::Start(block.offset() as u64))?;
reader.read_exact(&mut message_size)?;
- }
- let footer_len = i32::from_le_bytes(message_size);
- let mut block_data = vec![0; footer_len as usize];
-
- reader.read_exact(&mut block_data)?;
-
- let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
- ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
- })?;
+ if message_size == CONTINUATION_MARKER {
+ reader.read_exact(&mut message_size)?;
+ }
+ let footer_len = i32::from_le_bytes(message_size);
+ let mut block_data = vec![0; footer_len as usize];
- match message.header_type() {
- ipc::MessageHeader::DictionaryBatch => {
- let batch = message.header_as_dictionary_batch().unwrap();
+ reader.read_exact(&mut block_data)?;
- // read the block that makes up the dictionary batch into a buffer
- let mut buf = vec![0; block.bodyLength() as usize];
- reader.seek(SeekFrom::Start(
- block.offset() as u64 + block.metaDataLength() as u64,
- ))?;
- reader.read_exact(&mut buf)?;
+ let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
+ ArrowError::IoError(format!(
+ "Unable to get root as message: {:?}",
+ err
+ ))
+ })?;
- read_dictionary(&buf, batch, &schema, &mut dictionaries_by_field)?;
- }
- t => {
- return Err(ArrowError::IoError(format!(
- "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
- t
- )));
+ match message.header_type() {
+ ipc::MessageHeader::DictionaryBatch => {
+ let batch = message.header_as_dictionary_batch().unwrap();
+
+ // read the block that makes up the dictionary batch into a buffer
+ let mut buf = vec![0; block.bodyLength() as usize];
+ reader.seek(SeekFrom::Start(
+ block.offset() as u64 + block.metaDataLength() as u64,
+ ))?;
+ reader.read_exact(&mut buf)?;
+
+ read_dictionary(
+ &buf,
+ batch,
+ &schema,
+ &mut dictionaries_by_field,
+ )?;
+ }
+ t => {
+ return Err(ArrowError::IoError(format!(
+ "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
+ t
+ )));
+ }
}
- };
+ }
}
let projection = match projection {
Some(projection_indices) => {