You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/05 21:49:27 UTC

[GitHub] [arrow-datafusion] alamb commented on a change in pull request #1524: Remove one copy of ballista datatype serialization code

alamb commented on a change in pull request #1524:
URL: https://github.com/apache/arrow-datafusion/pull/1524#discussion_r779159639



##########
File path: ballista/rust/core/src/serde/logical_plan/to_proto.rs
##########
@@ -153,115 +153,7 @@ impl TryInto<DataType> for &protobuf::ArrowType {
                 "Protobuf deserialization error: ArrowType missing required field 'data_type'",
             )
         })?;
-        Ok(match pb_arrow_type {
-            protobuf::arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
-            protobuf::arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
-            protobuf::arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
-            protobuf::arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
-            protobuf::arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
-            protobuf::arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
-            protobuf::arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
-            protobuf::arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
-            protobuf::arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
-            protobuf::arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
-            protobuf::arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
-            protobuf::arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
-            protobuf::arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
-            protobuf::arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
-            protobuf::arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
-            protobuf::arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
-                DataType::FixedSizeBinary(*size)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
-            protobuf::arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
-            protobuf::arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
-            protobuf::arrow_type::ArrowTypeEnum::Duration(time_unit_i32) => {
-                DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Timestamp(timestamp) => {
-                DataType::Timestamp(
-                    protobuf::TimeUnit::from_i32_to_arrow(timestamp.time_unit)?,
-                    match timestamp.timezone.is_empty() {
-                        true => None,
-                        false => Some(timestamp.timezone.to_owned()),
-                    },
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time32(time_unit_i32) => {
-                DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time64(time_unit_i32) => {
-                DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Interval(interval_unit_i32) => {
-                DataType::Interval(protobuf::IntervalUnit::from_i32_to_arrow(
-                    *interval_unit_i32,
-                )?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
-                whole,
-                fractional,
-            }) => DataType::Decimal(*whole as usize, *fractional as usize),
-            protobuf::arrow_type::ArrowTypeEnum::List(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::List(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeList(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::LargeList(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeList(boxed_list) => {
-                let fsl_ref = boxed_list.as_ref();
-                let pb_fieldtype = fsl_ref
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: FixedSizeList message was missing required field 'field_type'"))?;
-                DataType::FixedSizeList(
-                    Box::new(pb_fieldtype.as_ref().try_into()?),
-                    fsl_ref.list_size,
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Struct(struct_type) => {
-                let fields = struct_type
-                    .sub_field_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                DataType::Struct(fields)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Union(union) => {
-                let union_types = union
-                    .union_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                DataType::Union(union_types)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Dictionary(boxed_dict) => {
-                let dict_ref = boxed_dict.as_ref();
-                let pb_key = dict_ref
-                    .key
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'key'"))?;
-                let pb_value = dict_ref
-                    .value
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'value'"))?;
-                DataType::Dictionary(
-                    Box::new(pb_key.as_ref().try_into()?),
-                    Box::new(pb_value.as_ref().try_into()?),
-                )
-            }
-        })
+        pb_arrow_type.try_into()

Review comment:
       The other copy of this logic is here: https://github.com/apache/arrow-datafusion/blob/master/ballista/rust/core/src/serde/mod.rs#L161-L267 in case you are interested




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org