You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by su...@apache.org on 2019/04/20 03:12:04 UTC

[arrow] branch master updated: ARROW-5189: [Rust] [Parquet] Format / display individual fields within a parquet row

This is an automated email from the ASF dual-hosted git repository.

sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 271599e  ARROW-5189: [Rust] [Parquet] Format / display individual fields within a parquet row
271599e is described below

commit 271599ee6d8348e718d280d3bac9f4972a73e998
Author: Fabio B. Silva <fa...@gmail.com>
AuthorDate: Fri Apr 19 20:11:51 2019 -0700

    ARROW-5189: [Rust] [Parquet] Format / display individual fields within a parquet row
    
    Hi,
    
    I'm working on a simple cli app to sample and analyze parquet files.
    And couldn't find simple way to get a simple string representation of each column within a `Row`.
    
    All `Field`s in a row already implement `fmt::Display` but there is now way to format individual fields.
    Since the `Row#fields`  is not exposed by the api. Which i assume is by design.
    
    Having a way to format individual fields seems like a common problem,
    so I came up with a `RowFormatter` which provides a way to access the field as a `fmt::Display`.
    
    ```rust
    use parquet::record::RowFormatter;
    
    // ...
    
    let row = make_row(vec![
        ("id".to_string(), Field::Int(5)),
        ("name".to_string(), Field::Str("abc".to_string()))
    ]);
    
    println!("row id   : {}", row.fmt(0).unwrap());
    println!("row name : {}", row.fmt(1).unwrap());
    ```
    
    I'm just getting started with Rust so please let me know if i can do anything better here..
    
    Author: Fabio B. Silva <fa...@gmail.com>
    
    Closes #4174 from FabioBatSilva/row-fmt and squashes the following commits:
    
    2cae53af <Fabio B. Silva> fmt fixes
    ef94b5de <Fabio B. Silva> Simplify RowFormatter#fmt by removing Result
    ceb91194 <Fabio B. Silva> Add RowFormatter to pub api
    df522752 <Fabio B. Silva>  format / display fields within a Row
---
 rust/parquet/src/record/api.rs | 95 ++++++++++++++++++++++++++++++++++++++++++
 rust/parquet/src/record/mod.rs |  4 +-
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
index 87f88b3..9c76a0c 100644
--- a/rust/parquet/src/record/api.rs
+++ b/rust/parquet/src/record/api.rs
@@ -74,6 +74,11 @@ pub trait RowAccessor {
     fn get_map(&self, i: usize) -> Result<&Map>;
 }
 
+/// Trait for formating fields within a Row.
+pub trait RowFormatter {
+    fn fmt(&self, i: usize) -> &fmt::Display;
+}
+
 /// Macro to generate type-safe get_xxx methods for primitive types,
 /// e.g. `get_bool`, `get_short`.
 macro_rules! row_primitive_accessor {
@@ -102,6 +107,13 @@ macro_rules! row_complex_accessor {
   }
 }
 
+impl RowFormatter for Row {
+    /// Get Display reference for a given field.
+    fn fmt(&self, i: usize) -> &fmt::Display {
+        &self.fields[i].1
+    }
+}
+
 impl RowAccessor for Row {
     row_primitive_accessor!(get_bool, Bool, bool);
 
@@ -1124,6 +1136,89 @@ mod tests {
     }
 
     #[test]
+    fn test_row_primitive_field_fmt() {
+        // Primitives types
+        let row = make_row(vec![
+            ("00".to_string(), Field::Null),
+            ("01".to_string(), Field::Bool(false)),
+            ("02".to_string(), Field::Byte(3)),
+            ("03".to_string(), Field::Short(4)),
+            ("04".to_string(), Field::Int(5)),
+            ("05".to_string(), Field::Long(6)),
+            ("06".to_string(), Field::UByte(7)),
+            ("07".to_string(), Field::UShort(8)),
+            ("08".to_string(), Field::UInt(9)),
+            ("09".to_string(), Field::ULong(10)),
+            ("10".to_string(), Field::Float(11.1)),
+            ("11".to_string(), Field::Double(12.1)),
+            ("12".to_string(), Field::Str("abc".to_string())),
+            (
+                "13".to_string(),
+                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
+            ),
+            ("14".to_string(), Field::Date(14611)),
+            ("15".to_string(), Field::Timestamp(1262391174000)),
+            ("16".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+        ]);
+
+        assert_eq!("null", format!("{}", row.fmt(0)));
+        assert_eq!("false", format!("{}", row.fmt(1)));
+        assert_eq!("3", format!("{}", row.fmt(2)));
+        assert_eq!("4", format!("{}", row.fmt(3)));
+        assert_eq!("5", format!("{}", row.fmt(4)));
+        assert_eq!("6", format!("{}", row.fmt(5)));
+        assert_eq!("7", format!("{}", row.fmt(6)));
+        assert_eq!("8", format!("{}", row.fmt(7)));
+        assert_eq!("9", format!("{}", row.fmt(8)));
+        assert_eq!("10", format!("{}", row.fmt(9)));
+        assert_eq!("11.1", format!("{}", row.fmt(10)));
+        assert_eq!("12.1", format!("{}", row.fmt(11)));
+        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
+        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
+        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
+        assert_eq!(
+            convert_timestamp_to_string(1262391174000),
+            format!("{}", row.fmt(15))
+        );
+        assert_eq!("0.04", format!("{}", row.fmt(16)));
+    }
+
+    #[test]
+    fn test_row_complex_field_fmt() {
+        // Complex types
+        let row = make_row(vec![
+            (
+                "00".to_string(),
+                Field::Group(make_row(vec![
+                    ("x".to_string(), Field::Null),
+                    ("Y".to_string(), Field::Int(2)),
+                ])),
+            ),
+            (
+                "01".to_string(),
+                Field::ListInternal(make_list(vec![
+                    Field::Int(2),
+                    Field::Int(1),
+                    Field::Null,
+                    Field::Int(12),
+                ])),
+            ),
+            (
+                "02".to_string(),
+                Field::MapInternal(make_map(vec![
+                    (Field::Int(1), Field::Float(1.2)),
+                    (Field::Int(2), Field::Float(4.5)),
+                    (Field::Int(3), Field::Float(2.3)),
+                ])),
+            ),
+        ]);
+
+        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
+        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
+        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
+    }
+
+    #[test]
     fn test_row_primitive_accessors() {
         // primitives
         let row = make_row(vec![
diff --git a/rust/parquet/src/record/mod.rs b/rust/parquet/src/record/mod.rs
index 0dba8a7..4427ada 100644
--- a/rust/parquet/src/record/mod.rs
+++ b/rust/parquet/src/record/mod.rs
@@ -21,4 +21,6 @@ mod api;
 pub mod reader;
 mod triplet;
 
-pub use self::api::{List, ListAccessor, Map, MapAccessor, Row, RowAccessor};
+pub use self::api::{
+    List, ListAccessor, Map, MapAccessor, Row, RowAccessor, RowFormatter,
+};