You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2020/09/25 17:13:46 UTC

[arrow] 01/01: ARROW-10095: [Rust] Update rust-parquet-arrow-writer branch's encode_arrow_schema with ipc changes

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch rust-parquet-arrow-writer
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 3fb8bfa60f82b20b89965df6f67acf08f98fc431
Author: Carol (Nichols || Goulding) <ca...@gmail.com>
AuthorDate: Fri Sep 25 17:54:11 2020 +0200

    ARROW-10095: [Rust] Update rust-parquet-arrow-writer branch's encode_arrow_schema with ipc changes
    
    Note that this PR is deliberately filed against the rust-parquet-arrow-writer branch, not master!!
    
    Hi! 👋 I'm looking to help out with the rust-parquet-arrow-writer branch, and I just pulled it down and it wasn't compiling because in 75f804efbfe367175fef5a2238d9cd2d30ed3afe, `schema_to_bytes` was changed to take `IpcWriteOptions` and to return `EncodedData`. This updates `encode_arrow_schema` to use those changes, which should get this branch compiling and passing tests again.
    
    I'm kind of guessing which JIRA ticket this should be associated with; honestly I think this commit can just be squashed with https://github.com/apache/arrow/commit/8f0ed91469f2e569472edaa3b69ffde051088555 next time this branch gets rebased.
    
    Please let me know if I should change anything, I'm happy to!
    
    Closes #8274 from carols10cents/update-with-ipc-changes
    
    Authored-by: Carol (Nichols || Goulding) <ca...@gmail.com>
    Signed-off-by: Neville Dipale <ne...@gmail.com>
---
 rust/parquet/src/arrow/arrow_writer.rs | 2 +-
 rust/parquet/src/arrow/schema.rs       | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index 1ca8d50..e0ad207 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -22,7 +22,7 @@ use std::rc::Rc;
 use arrow::array as arrow_array;
 use arrow::datatypes::{DataType as ArrowDataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use arrow_array::Array;
+use arrow_array::{Array, PrimitiveArrayOps};
 
 use super::schema::add_encoded_arrow_schema_to_metadata;
 use crate::column::writer::ColumnWriter;
diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
index d4cfe1f..d5a0ff9 100644
--- a/rust/parquet/src/arrow/schema.rs
+++ b/rust/parquet/src/arrow/schema.rs
@@ -27,6 +27,7 @@ use std::collections::{HashMap, HashSet};
 use std::rc::Rc;
 
 use arrow::datatypes::{DataType, DateUnit, Field, Schema, TimeUnit};
+use arrow::ipc::writer;
 
 use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
 use crate::errors::{ParquetError::ArrowError, Result};
@@ -120,15 +121,16 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option<Schema> {
 
 /// Encodes the Arrow schema into the IPC format, and base64 encodes it
 fn encode_arrow_schema(schema: &Schema) -> String {
-    let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema);
+    let options = writer::IpcWriteOptions::default();
+    let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema, &options);
 
     // manually prepending the length to the schema as arrow uses the legacy IPC format
     // TODO: change after addressing ARROW-9777
-    let schema_len = serialized_schema.len();
+    let schema_len = serialized_schema.ipc_message.len();
     let mut len_prefix_schema = Vec::with_capacity(schema_len + 8);
     len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]);
     len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut());
-    len_prefix_schema.append(&mut serialized_schema);
+    len_prefix_schema.append(&mut serialized_schema.ipc_message);
 
     base64::encode(&len_prefix_schema)
 }