You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/03 16:14:19 UTC

[arrow-rs] branch master updated: Do not assume dictionaries exists in footer (#1631)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f24c45fb Do not assume dictionaries exists in footer (#1631)
8f24c45fb is described below

commit 8f24c45fbf944486b2f9fd921eeede0725b4316b
Author: Peter C. Jentsch <pj...@uwaterloo.ca>
AuthorDate: Tue May 3 09:14:13 2022 -0700

    Do not assume dictionaries exists in footer (#1631)
    
    * do not assume footer exists, fixes issue #1335
    
    * fix cargo fmt and clippy errors
---
 arrow/src/ipc/reader.rs | 74 ++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index f3e46e27f..4b050229d 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -651,43 +651,53 @@ impl<R: Read + Seek> FileReader<R> {
 
         // Create an array of optional dictionary value arrays, one per field.
         let mut dictionaries_by_field = vec![None; schema.all_fields().len()];
-        for block in footer.dictionaries().unwrap() {
-            // read length from end of offset
-            let mut message_size: [u8; 4] = [0; 4];
-            reader.seek(SeekFrom::Start(block.offset() as u64))?;
-            reader.read_exact(&mut message_size)?;
-            if message_size == CONTINUATION_MARKER {
+        if let Some(dictionaries) = footer.dictionaries() {
+            for block in dictionaries {
+                // read length from end of offset
+                let mut message_size: [u8; 4] = [0; 4];
+                reader.seek(SeekFrom::Start(block.offset() as u64))?;
                 reader.read_exact(&mut message_size)?;
-            }
-            let footer_len = i32::from_le_bytes(message_size);
-            let mut block_data = vec![0; footer_len as usize];
-
-            reader.read_exact(&mut block_data)?;
-
-            let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
-                ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-            })?;
+                if message_size == CONTINUATION_MARKER {
+                    reader.read_exact(&mut message_size)?;
+                }
+                let footer_len = i32::from_le_bytes(message_size);
+                let mut block_data = vec![0; footer_len as usize];
 
-            match message.header_type() {
-                ipc::MessageHeader::DictionaryBatch => {
-                    let batch = message.header_as_dictionary_batch().unwrap();
+                reader.read_exact(&mut block_data)?;
 
-                    // read the block that makes up the dictionary batch into a buffer
-                    let mut buf = vec![0; block.bodyLength() as usize];
-                    reader.seek(SeekFrom::Start(
-                        block.offset() as u64 + block.metaDataLength() as u64,
-                    ))?;
-                    reader.read_exact(&mut buf)?;
+                let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
+                    ArrowError::IoError(format!(
+                        "Unable to get root as message: {:?}",
+                        err
+                    ))
+                })?;
 
-                    read_dictionary(&buf, batch, &schema, &mut dictionaries_by_field)?;
-                }
-                t => {
-                    return Err(ArrowError::IoError(format!(
-                        "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
-                        t
-                    )));
+                match message.header_type() {
+                    ipc::MessageHeader::DictionaryBatch => {
+                        let batch = message.header_as_dictionary_batch().unwrap();
+
+                        // read the block that makes up the dictionary batch into a buffer
+                        let mut buf = vec![0; block.bodyLength() as usize];
+                        reader.seek(SeekFrom::Start(
+                            block.offset() as u64 + block.metaDataLength() as u64,
+                        ))?;
+                        reader.read_exact(&mut buf)?;
+
+                        read_dictionary(
+                            &buf,
+                            batch,
+                            &schema,
+                            &mut dictionaries_by_field,
+                        )?;
+                    }
+                    t => {
+                        return Err(ArrowError::IoError(format!(
+                            "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
+                            t
+                        )));
+                    }
                 }
-            };
+            }
         }
         let projection = match projection {
             Some(projection_indices) => {