You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/04/20 09:10:42 UTC
[arrow] branch master updated: ARROW-5187: [Rust] Add ability to
convert StructArray to RecordBatch
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b38936f ARROW-5187: [Rust] Add ability to convert StructArray to RecordBatch
b38936f is described below
commit b38936f08bcfe1594f7e2b2a199920186a3f1fbe
Author: Neville Dipale <ne...@gmail.com>
AuthorDate: Sat Apr 20 11:10:23 2019 +0200
ARROW-5187: [Rust] Add ability to convert StructArray to RecordBatch
The CPP version (http://arrow.apache.org/docs/python/generated/pyarrow.StructArray.html?highlight=flatten#pyarrow.StructArray.flatten) returns `Vec<ArrayRef>`, so not sure if we should also do the same.
My justification for returning a `RecordBatch` is that we have the added convenience of not losing the schema of the columns.
I'm using this for reading rows from a SQL database into Arrow data.
Author: Neville Dipale <ne...@gmail.com>
Closes #4178 from nevi-me/ARROW-5187 and squashes the following commits:
7afbcfecb <Neville Dipale> make struct_array -> record_batch a conversion trait impl
418bfe1e2 <Neville Dipale> ARROW-5187: Add StructArray::flatten
---
rust/arrow/src/record_batch.rs | 51 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs
index 62f93b8..b166951 100644
--- a/rust/arrow/src/record_batch.rs
+++ b/rust/arrow/src/record_batch.rs
@@ -95,6 +95,24 @@ impl RecordBatch {
}
}
+impl From<&StructArray> for RecordBatch {
+ /// Create a record batch from struct array.
+ ///
+ /// This currently does not flatten and nested struct types
+ fn from(struct_array: &StructArray) -> Self {
+ if let DataType::Struct(fields) = struct_array.data_type() {
+ let schema = Schema::new(fields.clone());
+ let columns = struct_array.boxed_fields.clone();
+ RecordBatch {
+ schema: Arc::new(schema),
+ columns,
+ }
+ } else {
+ unreachable!("unable to get datatype as struct")
+ }
+ }
+}
+
unsafe impl Send for RecordBatch {}
unsafe impl Sync for RecordBatch {}
@@ -161,4 +179,37 @@ mod tests {
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
assert!(!batch.is_ok());
}
+
+ #[test]
+ fn create_record_batch_from_struct_array() {
+ let boolean_data = ArrayData::builder(DataType::Boolean)
+ .len(4)
+ .add_buffer(Buffer::from([12_u8]))
+ .build();
+ let int_data = ArrayData::builder(DataType::Int32)
+ .len(4)
+ .add_buffer(Buffer::from([42, 28, 19, 31].to_byte_slice()))
+ .build();
+ let struct_array = StructArray::from(vec![
+ (
+ Field::new("b", DataType::Boolean, false),
+ Arc::new(BooleanArray::from(vec![false, false, true, true]))
+ as Arc<Array>,
+ ),
+ (
+ Field::new("c", DataType::Int32, false),
+ Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
+ ),
+ ]);
+
+ let batch = RecordBatch::from(&struct_array);
+ assert_eq!(2, batch.num_columns());
+ assert_eq!(4, batch.num_rows());
+ assert_eq!(
+ struct_array.data_type(),
+ &DataType::Struct(batch.schema().fields().to_vec())
+ );
+ assert_eq!(batch.column(0).data(), boolean_data);
+ assert_eq!(batch.column(1).data(), int_data);
+ }
}