You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "tustvold (via GitHub)" <gi...@apache.org> on 2023/02/01 19:16:20 UTC

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #3647: Lazy array display (#3638)

tustvold commented on code in PR #3647:
URL: https://github.com/apache/arrow-rs/pull/3647#discussion_r1093634643


##########
arrow-cast/src/display.rs:
##########
@@ -19,50 +19,301 @@
 //! purposes. See the `pretty` crate for additional functions for
 //! record batch pretty printing.
 
-use std::fmt::Write;
+use std::fmt::{Debug, Display, Formatter};
 use std::sync::Arc;
 
+use arrow_array::temporal_conversions::*;
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::*;
-use chrono::prelude::SecondsFormat;
+use chrono::{DateTime, NaiveDate, TimeZone, Utc};
 
-macro_rules! make_string {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+/// Options for formatting arrays
+#[derive(Debug, Clone, Default)]
+pub struct FormatOptions {
+    safe: bool,
+}
 
-        Ok(array.value($row).to_string())
-    }};
+impl FormatOptions {
+    /// If set to `true` any formatting errors will be written to the output
+    /// instead of being converted into a [`std::fmt::Error`]
+    pub fn with_display_error(mut self, safe: bool) -> Self {
+        self.safe = safe;
+        self
+    }
 }
 
-macro_rules! make_string_interval_year_month {
-    ($column: ident, $row: ident) => {{
-        let array = $column
-            .as_any()
-            .downcast_ref::<array::IntervalYearMonthArray>()
-            .unwrap();
+/// Implements [`Display`] for a specific array value
+pub struct ValueFormatter<'a> {
+    idx: usize,
+    formatter: &'a ArrayFormatter<'a>,
+}
+
+impl<'a> Display for ValueFormatter<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self.formatter.format.fmt(self.idx, f) {
+            Ok(()) => Ok(()),
+            Err(FormatError::Arrow(e)) if self.formatter.safe => {
+                write!(f, "ERROR: {}", e)
+            }
+            Err(_) => Err(std::fmt::Error),
+        }
+    }
+}
+
+/// A string formatter for an [`Array`]
+pub struct ArrayFormatter<'a> {
+    format: Box<dyn DisplayIndex + 'a>,
+    safe: bool,
+}
+
+impl<'a> ArrayFormatter<'a> {
+    /// Returns an [`ArrayFormatter`] that can be used to format `array`
+    ///
+    /// This returns an error if an array of the given data type cannot be formatted
+    pub fn try_new(
+        array: &'a dyn Array,
+        options: &FormatOptions,
+    ) -> Result<Self, ArrowError> {
+        let format = downcast_primitive_array! {
+            array => Box::new(ArrayFormat::try_new(array)?) as _,
+            _ => todo!()
+        };
+
+        Ok(Self {
+            format,
+            safe: options.safe,
+        })
+    }
+
+    /// Returns a [`ValueFormatter`] that implements [`Display`] for
+    /// the value of the array at `idx`
+    pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
+        ValueFormatter {
+            formatter: self,
+            idx,
+        }
+    }
+}
+
+/// Either an [`ArrowError`] or [`std::fmt::Error`]
+enum FormatError {
+    Format(std::fmt::Error),
+    Arrow(ArrowError),
+}
+
+type FormatResult = Result<(), FormatError>;
+
+impl From<std::fmt::Error> for FormatError {
+    fn from(value: std::fmt::Error) -> Self {
+        Self::Format(value)
+    }
+}
+
+impl From<ArrowError> for FormatError {
+    fn from(value: ArrowError) -> Self {
+        Self::Arrow(value)
+    }
+}
+
+/// [`Display`] but accepting an index
+trait DisplayIndex {
+    fn fmt(&self, idx: usize, f: &mut Formatter<'_>) -> FormatResult;
+}
+
+/// [`DisplayIndex`] with additional state
+trait DisplayIndexState {
+    type State;
+
+    fn prepare(&self) -> Result<Self::State, ArrowError>;
+
+    fn fmt(&self, state: &Self::State, idx: usize, f: &mut Formatter<'_>)
+        -> FormatResult;
+}
+
+impl<T: DisplayIndex> DisplayIndexState for T {
+    type State = ();
+
+    fn prepare(&self) -> Result<Self::State, ArrowError> {
+        Ok(())
+    }
+
+    fn fmt(&self, _: &Self::State, idx: usize, f: &mut Formatter<'_>) -> FormatResult {
+        DisplayIndex::fmt(self, idx, f)
+    }
+}
+
+struct ArrayFormat<F: DisplayIndexState> {
+    state: F::State,
+    array: F,
+}
+
+impl<F: DisplayIndexState> ArrayFormat<F> {
+    fn try_new(array: F) -> Result<Self, ArrowError> {
+        let state = array.prepare()?;
+        Ok(Self { state, array })
+    }
+}
+
+impl<F: DisplayIndexState + Array> DisplayIndex for ArrayFormat<F> {
+    fn fmt(&self, idx: usize, f: &mut Formatter<'_>) -> FormatResult {
+        if self.array.is_null(idx) {
+            return Ok(());
+        }
+        DisplayIndexState::fmt(&self.array, &self.state, idx, f)
+    }
+}
+
+macro_rules! primitive_display {
+    ($($t:ty),+) => {
+        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
+        {
+            fn fmt(&self, idx: usize, f: &mut Formatter<'_>) -> FormatResult {
+                write!(f, "{}", self.value(idx))?;
+                Ok(())
+            }
+        })+
+    };
+}
+
+primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
+primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
+primitive_display!(Float16Type, Float32Type, Float64Type);
+
+macro_rules! decimal_display {
+    ($($t:ty),+) => {
+        $(impl<'a> DisplayIndexState for &'a PrimitiveArray<$t> {
+            type State = (u8, i8);
+
+            fn prepare(&self) -> Result<Self::State, ArrowError> {
+                Ok((self.precision(), self.scale()))
+            }
+
+            fn fmt(&self, s: &Self::State, idx: usize, f: &mut Formatter<'_>) -> FormatResult {
+                write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
+                Ok(())
+            }
+        })+
+    };
+}
+
+decimal_display!(Decimal128Type, Decimal256Type);
+
+macro_rules! timestamp_display {
+    ($($t:ty),+) => {
+        $(impl<'a> DisplayIndexState for &'a PrimitiveArray<$t> {
+            type State = Option<Tz>;

Review Comment:
   This is the motivation for the "deviation" from the proposal in https://github.com/apache/arrow-rs/issues/3638, we can parse the timezone once and use this for formatting



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org