You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/01/15 05:26:00 UTC

[GitHub] [arrow] jorgecarleitao commented on a change in pull request #9114: ARROW-11149: [Rust] DF Support List/LargeList/FixedSizeList in create_batch_empty

jorgecarleitao commented on a change in pull request #9114:
URL: https://github.com/apache/arrow/pull/9114#discussion_r557868676



##########
File path: rust/arrow/src/array/array_list.rs
##########
@@ -295,6 +299,264 @@ impl fmt::Debug for FixedSizeListArray {
     }
 }
 
+macro_rules! build_empty_list_array_with_primitive_items {
+    ($item_type:ident, $offset_type:ident) => {{
+        let values_builder = PrimitiveBuilder::<$item_type>::new(0);
+        let mut builder =
+            GenericListBuilder::<$offset_type, PrimitiveBuilder<$item_type>>::new(
+                values_builder,
+            );
+        let empty_list_array = builder.finish();
+        Ok(Arc::new(empty_list_array))
+    }};
+}
+
+macro_rules! build_empty_list_array_with_non_primitive_items {
+    ($type_builder:ident, $offset_type:ident) => {{
+        let values_builder = $type_builder::new(0);
+        let mut builder =
+            GenericListBuilder::<$offset_type, $type_builder>::new(values_builder);

Review comment:
       Note that for an empty list, we know that the offset buffer will be a single entry, `0`, and the values buffer will be an empty buffer (len = `0`). Therefore, this code could be simplified by just passing the buffers directly instead of using builders.
   

##########
File path: rust/arrow/src/error.rs
##########
@@ -90,6 +91,9 @@ impl From<serde_json::Error> for ArrowError {
 impl Display for ArrowError {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
+            ArrowError::Unsupported(source) => {

Review comment:
       What do you think of `Unimplemented` instead of `Unsupported`? Just to be consistent with the `unimplemented!` macro that rust already offers.

##########
File path: rust/arrow/src/array/array_list.rs
##########
@@ -295,6 +299,264 @@ impl fmt::Debug for FixedSizeListArray {
     }
 }
 
+macro_rules! build_empty_list_array_with_primitive_items {
+    ($item_type:ident, $offset_type:ident) => {{
+        let values_builder = PrimitiveBuilder::<$item_type>::new(0);
+        let mut builder =
+            GenericListBuilder::<$offset_type, PrimitiveBuilder<$item_type>>::new(
+                values_builder,
+            );
+        let empty_list_array = builder.finish();
+        Ok(Arc::new(empty_list_array))
+    }};
+}
+
+macro_rules! build_empty_list_array_with_non_primitive_items {
+    ($type_builder:ident, $offset_type:ident) => {{
+        let values_builder = $type_builder::new(0);
+        let mut builder =
+            GenericListBuilder::<$offset_type, $type_builder>::new(values_builder);
+        let empty_list_array = builder.finish();
+        Ok(Arc::new(empty_list_array))
+    }};
+}
+
+pub fn build_empty_list_array<OffsetSize: OffsetSizeTrait>(
+    item_type: DataType,
+) -> Result<ArrayRef> {
+    match item_type {
+        DataType::UInt8 => {
+            build_empty_list_array_with_primitive_items!(UInt8Type, OffsetSize)
+        }
+        DataType::UInt16 => {
+            build_empty_list_array_with_primitive_items!(UInt16Type, OffsetSize)
+        }
+        DataType::UInt32 => {
+            build_empty_list_array_with_primitive_items!(UInt32Type, OffsetSize)
+        }
+        DataType::UInt64 => {
+            build_empty_list_array_with_primitive_items!(UInt64Type, OffsetSize)
+        }
+        DataType::Int8 => {
+            build_empty_list_array_with_primitive_items!(Int8Type, OffsetSize)
+        }
+        DataType::Int16 => {
+            build_empty_list_array_with_primitive_items!(Int16Type, OffsetSize)
+        }
+        DataType::Int32 => {
+            build_empty_list_array_with_primitive_items!(Int32Type, OffsetSize)
+        }
+        DataType::Int64 => {
+            build_empty_list_array_with_primitive_items!(Int64Type, OffsetSize)
+        }
+        DataType::Float32 => {
+            build_empty_list_array_with_primitive_items!(Float32Type, OffsetSize)
+        }
+        DataType::Float64 => {
+            build_empty_list_array_with_primitive_items!(Float64Type, OffsetSize)
+        }
+        DataType::Boolean => {
+            build_empty_list_array_with_non_primitive_items!(BooleanBuilder, OffsetSize)
+        }
+        DataType::Date32(_) => {
+            build_empty_list_array_with_primitive_items!(Date32Type, OffsetSize)
+        }
+        DataType::Date64(_) => {
+            build_empty_list_array_with_primitive_items!(Date64Type, OffsetSize)
+        }
+        DataType::Time32(TimeUnit::Second) => {
+            build_empty_list_array_with_primitive_items!(Time32SecondType, OffsetSize)
+        }
+        DataType::Time32(TimeUnit::Millisecond) => {
+            build_empty_list_array_with_primitive_items!(
+                Time32MillisecondType,
+                OffsetSize
+            )
+        }
+        DataType::Time64(TimeUnit::Microsecond) => {
+            build_empty_list_array_with_primitive_items!(
+                Time64MicrosecondType,
+                OffsetSize
+            )
+        }
+        DataType::Time64(TimeUnit::Nanosecond) => {
+            build_empty_list_array_with_primitive_items!(Time64NanosecondType, OffsetSize)
+        }
+        DataType::Duration(TimeUnit::Second) => {
+            build_empty_list_array_with_primitive_items!(DurationSecondType, OffsetSize)
+        }
+        DataType::Duration(TimeUnit::Millisecond) => {
+            build_empty_list_array_with_primitive_items!(
+                DurationMillisecondType,
+                OffsetSize
+            )
+        }
+        DataType::Duration(TimeUnit::Microsecond) => {
+            build_empty_list_array_with_primitive_items!(
+                DurationMicrosecondType,
+                OffsetSize
+            )
+        }
+        DataType::Duration(TimeUnit::Nanosecond) => {
+            build_empty_list_array_with_primitive_items!(
+                DurationNanosecondType,
+                OffsetSize
+            )
+        }
+        DataType::Timestamp(TimeUnit::Second, _) => {
+            build_empty_list_array_with_primitive_items!(TimestampSecondType, OffsetSize)
+        }
+        DataType::Timestamp(TimeUnit::Millisecond, _) => {
+            build_empty_list_array_with_primitive_items!(
+                TimestampMillisecondType,
+                OffsetSize
+            )
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, _) => {
+            build_empty_list_array_with_primitive_items!(
+                TimestampMicrosecondType,
+                OffsetSize
+            )
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+            build_empty_list_array_with_primitive_items!(
+                TimestampNanosecondType,
+                OffsetSize
+            )
+        }
+        DataType::Utf8 => {
+            build_empty_list_array_with_non_primitive_items!(StringBuilder, OffsetSize)
+        }
+        DataType::Binary => {
+            build_empty_list_array_with_non_primitive_items!(BinaryBuilder, OffsetSize)
+        }
+        _ => Err(ArrowError::Unsupported(format!(
+            "GenericListBuilder of type List({:?}) is not supported",
+            item_type
+        ))),
+    }
+}
+
+macro_rules! build_empty_fixed_size_list_array_with_primitive_items {
+    ($item_type:ident) => {{
+        let values_builder = PrimitiveBuilder::<$item_type>::new(0);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 0);
+        let empty_list_array = builder.finish();
+        Ok(Arc::new(empty_list_array))
+    }};
+}
+
+macro_rules! build_empty_fixed_size_list_array_with_non_primitive_items {
+    ($type_builder:ident) => {{
+        let values_builder = $type_builder::new(0);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 0);
+        let empty_list_array = builder.finish();
+        Ok(Arc::new(empty_list_array))
+    }};
+}
+
+pub fn build_empty_fixed_size_list_array(item_type: DataType) -> Result<ArrayRef> {
+    match item_type {
+        DataType::UInt8 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(UInt8Type)
+        }
+        DataType::UInt16 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(UInt16Type)
+        }
+        DataType::UInt32 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(UInt32Type)
+        }
+        DataType::UInt64 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(UInt64Type)
+        }
+        DataType::Int8 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Int8Type)
+        }
+        DataType::Int16 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Int16Type)
+        }
+        DataType::Int32 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Int32Type)
+        }
+        DataType::Int64 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Int64Type)
+        }
+        DataType::Float32 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Float32Type)
+        }
+        DataType::Float64 => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Float64Type)
+        }
+        DataType::Boolean => {
+            build_empty_fixed_size_list_array_with_non_primitive_items!(BooleanBuilder)
+        }
+        DataType::Date32(_) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Date32Type)
+        }
+        DataType::Date64(_) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Date64Type)
+        }
+        DataType::Time32(TimeUnit::Second) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Time32SecondType)
+        }
+        DataType::Time32(TimeUnit::Millisecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Time32MillisecondType)
+        }
+        DataType::Time64(TimeUnit::Microsecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Time64MicrosecondType)
+        }
+        DataType::Time64(TimeUnit::Nanosecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(Time64NanosecondType)
+        }
+        DataType::Duration(TimeUnit::Second) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(DurationSecondType)
+        }
+        DataType::Duration(TimeUnit::Millisecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                DurationMillisecondType
+            )
+        }
+        DataType::Duration(TimeUnit::Microsecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                DurationMicrosecondType
+            )
+        }
+        DataType::Duration(TimeUnit::Nanosecond) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                DurationNanosecondType
+            )
+        }
+        DataType::Timestamp(TimeUnit::Second, _) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(TimestampSecondType)
+        }
+        DataType::Timestamp(TimeUnit::Millisecond, _) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                TimestampMillisecondType
+            )
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, _) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                TimestampMicrosecondType
+            )
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+            build_empty_fixed_size_list_array_with_primitive_items!(
+                TimestampNanosecondType
+            )
+        }
+        DataType::Utf8 => {
+            build_empty_fixed_size_list_array_with_non_primitive_items!(StringBuilder)
+        }
+        DataType::Binary => {
+            build_empty_fixed_size_list_array_with_non_primitive_items!(BinaryBuilder)
+        }
+        _ => Err(ArrowError::Unsupported(format!(
+            "FixedSizeListBuilder of type FixedSizeList({:?}) is not supported",
+            item_type
+        ))),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::{

Review comment:
       Could you add a test just to verify?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org