You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/06/19 11:56:45 UTC

[arrow-rs] branch cherry_pick_fb451125 created (now d812a5a)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a change to branch cherry_pick_fb451125
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


      at d812a5a  Add Decimal to CsvWriter and improve debug display (#406)

This branch includes the following new commits:

     new d812a5a  Add Decimal to CsvWriter and improve debug display (#406)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[arrow-rs] 01/01: Add Decimal to CsvWriter and improve debug display (#406)

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch cherry_pick_fb451125
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit d812a5ab14225b124f33e0389d4f6c9bd92601c8
Author: Ádám Lippai <ad...@rigo.sk>
AuthorDate: Sun Jun 13 02:20:08 2021 +0200

    Add Decimal to CsvWriter and improve debug display (#406)
    
    * Add Decimal to CsvWriter and improve debug display
    
    * Measure CSV writer instead of file and data creation
    
    * Re-use decimal formatting
---
 arrow/src/array/array_binary.rs | 36 ++++++++++++++++++++++++------------
 arrow/src/csv/writer.rs         | 23 ++++++++++++++++-------
 arrow/src/util/display.rs       | 27 ++++++++++++---------------
 3 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 0cb4db4..0b374db 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -666,6 +666,17 @@ impl DecimalArray {
         self.length * i as i32
     }
 
+    #[inline]
+    pub fn value_as_string(&self, row: usize) -> String {
+        let decimal_string = self.value(row).to_string();
+        if self.scale == 0 {
+            decimal_string
+        } else {
+            let splits = decimal_string.split_at(decimal_string.len() - self.scale);
+            format!("{}.{}", splits.0, splits.1)
+        }
+    }
+
     pub fn from_fixed_size_list_array(
         v: FixedSizeListArray,
         precision: usize,
@@ -729,7 +740,9 @@ impl fmt::Debug for DecimalArray {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
         print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
+            let formatted_decimal = array.value_as_string(index);
+
+            write!(f, "{}", formatted_decimal)
         })?;
         write!(f, "]")
     }
@@ -758,7 +771,7 @@ impl Array for DecimalArray {
 #[cfg(test)]
 mod tests {
     use crate::{
-        array::{LargeListArray, ListArray},
+        array::{DecimalBuilder, LargeListArray, ListArray},
         datatypes::Field,
     };
 
@@ -1163,17 +1176,16 @@ mod tests {
 
     #[test]
     fn test_decimal_array_fmt_debug() {
-        let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
-            .len(2)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let arr = DecimalArray::from(array_data);
+        let values: Vec<i128> = vec![8887000000, -8887000000];
+        let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
+
+        values.iter().for_each(|&value| {
+            decimal_builder.append_value(value).unwrap();
+        });
+        decimal_builder.append_null().unwrap();
+        let arr = decimal_builder.finish();
         assert_eq!(
-            "DecimalArray<23, 6>\n[\n  8887000000,\n  -8887000000,\n]",
+            "DecimalArray<23, 6>\n[\n  8887.000000,\n  -8887.000000,\n  null,\n]",
             format!("{:?}", arr)
         );
     }
diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs
index f2f4ce8..b94036c 100644
--- a/arrow/src/csv/writer.rs
+++ b/arrow/src/csv/writer.rs
@@ -72,6 +72,7 @@ use std::io::Write;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 use crate::record_batch::RecordBatch;
+use crate::util::display::make_string_from_decimal;
 use crate::{array::*, util::serialization::lexical_to_string};
 const DEFAULT_DATE_FORMAT: &str = "%F";
 const DEFAULT_TIME_FORMAT: &str = "%T";
@@ -244,6 +245,7 @@ impl<W: Write> Writer<W> {
                     };
                     format!("{}", datetime.format(&self.timestamp_format))
                 }
+                DataType::Decimal(..) => make_string_from_decimal(col, row_index)?,
                 t => {
                     // List and Struct arrays not supported by the writer, any
                     // other type needs to be implemented
@@ -568,6 +570,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             Field::new("c4", DataType::Boolean, true),
             Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
             Field::new("c6", DataType::Time32(TimeUnit::Second), false),
+            Field::new("c7", DataType::Decimal(6, 2), false),
         ]);
 
         let c1 = StringArray::from(vec![
@@ -587,6 +590,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             None,
         );
         let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
+        let mut c7_builder = DecimalBuilder::new(5, 6, 2);
+        c7_builder.append_value(12345_i128).unwrap();
+        c7_builder.append_value(-12345_i128).unwrap();
+        c7_builder.append_null().unwrap();
+        let c7 = c7_builder.finish();
 
         let batch = RecordBatch::try_new(
             Arc::new(schema),
@@ -597,6 +605,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
                 Arc::new(c4),
                 Arc::new(c5),
                 Arc::new(c6),
+                Arc::new(c7),
             ],
         )
         .unwrap();
@@ -608,13 +617,13 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             writer.write(batch).unwrap();
         }
 
-        let left = "c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n";
+        let left = "c1,c2,c3,c4,c5,c6,c7
+Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,
+Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
         let right = writer.writer.into_inner().map(|s| s.to_string());
         assert_eq!(Some(left.to_string()), right.ok());
     }
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 61f549a..999e424 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -19,6 +19,8 @@
 //! purposes. See the `pretty` crate for additional functions for
 //! record batch pretty printing.
 
+use std::sync::Arc;
+
 use crate::array::Array;
 use crate::datatypes::{
     ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
@@ -192,18 +194,15 @@ macro_rules! make_string_from_list {
     }};
 }
 
-macro_rules! make_string_from_decimal {
-    ($array_type: ty, $column: ident, $row: ident, $scale: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-        let decimal_string = array.value($row).to_string();
-        let formatted_decimal = if *$scale == 0 {
-            decimal_string
-        } else {
-            let splits = decimal_string.split_at(decimal_string.len() - *$scale);
-            format!("{}.{}", splits.0, splits.1)
-        };
-        Ok(formatted_decimal)
-    }};
+#[inline(always)]
+pub fn make_string_from_decimal(column: &Arc<dyn Array>, row: usize) -> Result<String> {
+    let array = column
+        .as_any()
+        .downcast_ref::<array::DecimalArray>()
+        .unwrap();
+
+    let formatted_decimal = array.value_as_string(row);
+    Ok(formatted_decimal)
 }
 
 /// Get the value at the given row in an array as a String.
@@ -231,9 +230,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
         DataType::Float16 => make_string!(array::Float32Array, column, row),
         DataType::Float32 => make_string!(array::Float32Array, column, row),
         DataType::Float64 => make_string!(array::Float64Array, column, row),
-        DataType::Decimal(_, scale) => {
-            make_string_from_decimal!(array::DecimalArray, column, row, scale)
-        }
+        DataType::Decimal(..) => make_string_from_decimal(column, row),
         DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
             make_string_datetime!(array::TimestampSecondArray, column, row)
         }