You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/18 11:18:32 UTC

[GitHub] [arrow] alamb commented on a change in pull request #8485: ARROW-10002: [Rust] Remove trait specialization from arrow crate

alamb commented on a change in pull request #8485:
URL: https://github.com/apache/arrow/pull/8485#discussion_r507094172



##########
File path: rust/arrow/src/array/array.rs
##########
@@ -562,105 +617,36 @@ where
     ///
     /// `Date32` and `Date64` return UTC midnight as they do not have time resolution
     pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
-        match self.data_type() {
-            DataType::Time32(unit) => {
-                // safe to immediately cast to u32 as `self.value(i)` is positive i32
-                let v = i64::from(self.value(i)) as u32;
-                match unit {
-                    TimeUnit::Second => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(v, 0))
-                    }
-                    TimeUnit::Millisecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from milliseconds
-                            v / MILLISECONDS as u32,
-                            // discard extracted seconds and convert milliseconds to
-                            // nanoseconds
-                            v % MILLISECONDS as u32 * MICROSECONDS as u32,
-                        ))
-                    }
-                    _ => None,
-                }
-            }
-            DataType::Time64(unit) => {
-                let v = i64::from(self.value(i));
-                match unit {
-                    TimeUnit::Microsecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from microseconds
-                            (v / MICROSECONDS) as u32,
-                            // discard extracted seconds and convert microseconds to
-                            // nanoseconds
-                            (v % MICROSECONDS * MILLISECONDS) as u32,
-                        ))
-                    }
-                    TimeUnit::Nanosecond => {
-                        Some(NaiveTime::from_num_seconds_from_midnight(
-                            // extract seconds from nanoseconds
-                            (v / NANOSECONDS) as u32,
-                            // discard extracted seconds
-                            (v % NANOSECONDS) as u32,
-                        ))
-                    }
-                    _ => None,
-                }
-            }
-            DataType::Timestamp(_, _) => {
-                self.value_as_datetime(i).map(|datetime| datetime.time())
-            }
-            DataType::Date32(_) | DataType::Date64(_) => {
-                Some(NaiveTime::from_hms(0, 0, 0))
-            }
-            DataType::Interval(_) => None,
-            _ => None,
-        }
+        as_time::<T>(i64::from(self.value(i)))
     }
 }
 
 impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
-    default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T> {
-    default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<T: ArrowNumericType + ArrowTemporalType> fmt::Debug for PrimitiveArray<T>
-where
-    i64: std::convert::From<T::Native>,
-{
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?;
-        print_long_array(self, f, |array, index, f| match T::get_data_type() {
+        write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
+        print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
             DataType::Date32(_) | DataType::Date64(_) => {
-                match array.value_as_date(index) {

Review comment:
       so the primary difference here is that all types get converted to i64 and then converted back to the appropriate (potentially smaller) native type by the code (and the compiler is presumably smart enough to make it all happen efficiently)
   
   And by structuring the code in that way, we only need a single type parameter (i64->T rather than all combinations of input and output types?)

##########
File path: rust/arrow/src/array/builder.rs
##########
@@ -253,8 +253,18 @@ pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
 }
 
 impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
-    default fn new(capacity: usize) -> Self {
-        let buffer = MutableBuffer::new(capacity * mem::size_of::<T::Native>());
+    #[inline]
+    fn new(capacity: usize) -> Self {
+        let buffer = if T::DATA_TYPE == DataType::Boolean {

Review comment:
       I think using an `if` for specialization rather than the compiler is totally legit




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org