You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2018/11/20 12:54:04 UTC

[arrow] branch master updated: ARROW-3787: [Rust] Implement From for BinaryArray

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new dedaee6  ARROW-3787: [Rust] Implement From<ListArray> for BinaryArray
dedaee6 is described below

commit dedaee62e67b1b126585bec04fd03534809f31f4
Author: Paddy Horan <pa...@hotmail.com>
AuthorDate: Tue Nov 20 13:53:43 2018 +0100

    ARROW-3787: [Rust] Implement From<ListArray> for BinaryArray
    
    Author: Paddy Horan <pa...@hotmail.com>
    
    Closes #2957 from paddyhoran/ARROW-3787 and squashes the following commits:
    
    27251e7f <Paddy Horan> Fixed broken test
    0faa0772 <Paddy Horan> Merge remote-tracking branch 'my-arrow/ARROW-3787' into ARROW-3787
    d2e09b5c <Paddy Horan> Added additional test
    1e288753 <Paddy Horan> Addressed review
    87b0537e <Paddy Horan> Implemented `From<ListArray>` for `BinaryArray`
    3a07e62b <Paddy Horan> Offsets should be the first buffer in BinaryArray
---
 rust/src/array.rs        | 106 +++++++++++++++++++++++++++++++++++++++++++++++
 rust/src/record_batch.rs |   2 +-
 2 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/rust/src/array.rs b/rust/src/array.rs
index 9157897..ff3e785 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -585,6 +585,29 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
     }
 }
 
+/// Creates a `BinaryArray` from `List<u8>` array
+impl From<ListArray> for BinaryArray {
+    fn from(v: ListArray) -> Self {
+        assert_eq!(
+            v.data().child_data()[0].child_data().len(),
+            0,
+            "BinaryArray can only be created from list array of u8 values (i.e. List<PrimitiveArray<u8>>)."
+        );
+        assert_eq!(
+            v.data().child_data()[0].data_type(),
+            &DataType::UInt8,
+            "BinaryArray can only be created from List<u8> arrays, mismatched data types."
+        );
+
+        let data = ArrayData::builder(DataType::Utf8)
+            .len(v.len())
+            .add_buffer(v.data().buffers()[0].clone())
+            .add_buffer(v.data().child_data()[0].buffers()[0].clone())
+            .build();
+        Self::from(data)
+    }
+}
+
 impl Array for BinaryArray {
     fn as_any(&self) -> &Any {
         self
@@ -988,6 +1011,89 @@ mod tests {
     }
 
     #[test]
+    fn test_binary_array_from_list_array() {
+        let values: [u8; 12] = [
+            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
+        ];
+        let values_data = ArrayData::builder(DataType::UInt8)
+            .len(12)
+            .add_buffer(Buffer::from(&values[..]))
+            .build();
+        let offsets: [i32; 4] = [0, 5, 5, 12];
+
+        // Array data: ["hello", "", "parquet"]
+        let array_data1 = ArrayData::builder(DataType::Utf8)
+            .len(3)
+            .add_buffer(Buffer::from(offsets.to_byte_slice()))
+            .add_buffer(Buffer::from(&values[..]))
+            .build();
+        let binary_array1 = BinaryArray::from(array_data1);
+
+        let array_data2 = ArrayData::builder(DataType::Utf8)
+            .len(3)
+            .add_buffer(Buffer::from(offsets.to_byte_slice()))
+            .add_child_data(values_data)
+            .build();
+        let list_array = ListArray::from(array_data2);
+        let binary_array2 = BinaryArray::from(list_array);
+
+        assert_eq!(2, binary_array2.data().buffers().len());
+        assert_eq!(0, binary_array2.data().child_data().len());
+
+        assert_eq!(binary_array1.len(), binary_array2.len());
+        assert_eq!(binary_array1.null_count(), binary_array2.null_count());
+        for i in 0..binary_array1.len() {
+            assert_eq!(binary_array1.get_value(i), binary_array2.get_value(i));
+            assert_eq!(binary_array1.get_string(i), binary_array2.get_string(i));
+            assert_eq!(binary_array1.value_offset(i), binary_array2.value_offset(i));
+            assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
+        }
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
+    )]
+    fn test_binary_array_from_incorrect_list_array_type() {
+        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
+        let values_data = ArrayData::builder(DataType::UInt32)
+            .len(12)
+            .add_buffer(Buffer::from(values[..].to_byte_slice()))
+            .build();
+        let offsets: [i32; 4] = [0, 5, 5, 12];
+
+        let array_data = ArrayData::builder(DataType::Utf8)
+            .len(3)
+            .add_buffer(Buffer::from(offsets.to_byte_slice()))
+            .add_child_data(values_data)
+            .build();
+        let list_array = ListArray::from(array_data);
+        BinaryArray::from(list_array);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "BinaryArray can only be created from list array of u8 values (i.e. List<PrimitiveArray<u8>>)."
+    )]
+    fn test_binary_array_from_incorrect_list_array() {
+        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
+        let values_data = ArrayData::builder(DataType::UInt32)
+            .len(12)
+            .add_buffer(Buffer::from(values[..].to_byte_slice()))
+            .add_child_data(ArrayData::builder(DataType::Boolean).build())
+            .build();
+        let offsets: [i32; 4] = [0, 5, 5, 12];
+
+        let array_data = ArrayData::builder(DataType::Utf8)
+            .len(3)
+            .add_buffer(Buffer::from(offsets.to_byte_slice()))
+            .add_child_data(values_data)
+            .build();
+        let list_array = ListArray::from(array_data);
+        BinaryArray::from(list_array);
+    }
+
+    #[test]
     #[should_panic(expected = "BinaryArray out of bounds access")]
     fn test_binary_array_get_value_index_out_of_bound() {
         let values: [u8; 12] = [
diff --git a/rust/src/record_batch.rs b/rust/src/record_batch.rs
index 6819b63..fe1f39f 100644
--- a/rust/src/record_batch.rs
+++ b/rust/src/record_batch.rs
@@ -88,8 +88,8 @@ mod tests {
         let offset_data = vec![0, 1, 2, 3, 4, 5, 6];
         let array_data = ArrayData::builder(DataType::Utf8)
             .len(5)
-            .add_buffer(Buffer::from(v.to_byte_slice()))
             .add_buffer(Buffer::from(offset_data.to_byte_slice()))
+            .add_buffer(Buffer::from(v.to_byte_slice()))
             .build();
         let b = BinaryArray::from(array_data);