You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by su...@apache.org on 2019/04/20 03:12:04 UTC
[arrow] branch master updated: ARROW-5189: [Rust] [Parquet] Format
/ display individual fields within a parquet row
This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 271599e ARROW-5189: [Rust] [Parquet] Format / display individual fields within a parquet row
271599e is described below
commit 271599ee6d8348e718d280d3bac9f4972a73e998
Author: Fabio B. Silva <fa...@gmail.com>
AuthorDate: Fri Apr 19 20:11:51 2019 -0700
ARROW-5189: [Rust] [Parquet] Format / display individual fields within a parquet row
Hi,
I'm working on a simple cli app to sample and analyze parquet files.
And couldn't find simple way to get a simple string representation of each column within a `Row`.
All `Field`s in a row already implement `fmt::Display` but there is now way to format individual fields.
Since the `Row#fields` is not exposed by the api. Which i assume is by design.
Having a way to format individual fields seems like a common problem,
so I came up with a `RowFormatter` which provides a way to access the field as a `fmt::Display`.
```rust
use parquet::record::RowFormatter;
// ...
let row = make_row(vec![
("id".to_string(), Field::Int(5)),
("name".to_string(), Field::Str("abc".to_string()))
]);
println!("row id : {}", row.fmt(0).unwrap());
println!("row name : {}", row.fmt(1).unwrap());
```
I'm just getting started with Rust so please let me know if i can do anything better here..
Author: Fabio B. Silva <fa...@gmail.com>
Closes #4174 from FabioBatSilva/row-fmt and squashes the following commits:
2cae53af <Fabio B. Silva> fmt fixes
ef94b5de <Fabio B. Silva> Simplify RowFormatter#fmt by removing Result
ceb91194 <Fabio B. Silva> Add RowFormatter to pub api
df522752 <Fabio B. Silva> format / display fields within a Row
---
rust/parquet/src/record/api.rs | 95 ++++++++++++++++++++++++++++++++++++++++++
rust/parquet/src/record/mod.rs | 4 +-
2 files changed, 98 insertions(+), 1 deletion(-)
diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
index 87f88b3..9c76a0c 100644
--- a/rust/parquet/src/record/api.rs
+++ b/rust/parquet/src/record/api.rs
@@ -74,6 +74,11 @@ pub trait RowAccessor {
fn get_map(&self, i: usize) -> Result<&Map>;
}
+/// Trait for formating fields within a Row.
+pub trait RowFormatter {
+ fn fmt(&self, i: usize) -> &fmt::Display;
+}
+
/// Macro to generate type-safe get_xxx methods for primitive types,
/// e.g. `get_bool`, `get_short`.
macro_rules! row_primitive_accessor {
@@ -102,6 +107,13 @@ macro_rules! row_complex_accessor {
}
}
+impl RowFormatter for Row {
+ /// Get Display reference for a given field.
+ fn fmt(&self, i: usize) -> &fmt::Display {
+ &self.fields[i].1
+ }
+}
+
impl RowAccessor for Row {
row_primitive_accessor!(get_bool, Bool, bool);
@@ -1124,6 +1136,89 @@ mod tests {
}
#[test]
+ fn test_row_primitive_field_fmt() {
+ // Primitives types
+ let row = make_row(vec![
+ ("00".to_string(), Field::Null),
+ ("01".to_string(), Field::Bool(false)),
+ ("02".to_string(), Field::Byte(3)),
+ ("03".to_string(), Field::Short(4)),
+ ("04".to_string(), Field::Int(5)),
+ ("05".to_string(), Field::Long(6)),
+ ("06".to_string(), Field::UByte(7)),
+ ("07".to_string(), Field::UShort(8)),
+ ("08".to_string(), Field::UInt(9)),
+ ("09".to_string(), Field::ULong(10)),
+ ("10".to_string(), Field::Float(11.1)),
+ ("11".to_string(), Field::Double(12.1)),
+ ("12".to_string(), Field::Str("abc".to_string())),
+ (
+ "13".to_string(),
+ Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
+ ),
+ ("14".to_string(), Field::Date(14611)),
+ ("15".to_string(), Field::Timestamp(1262391174000)),
+ ("16".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+ ]);
+
+ assert_eq!("null", format!("{}", row.fmt(0)));
+ assert_eq!("false", format!("{}", row.fmt(1)));
+ assert_eq!("3", format!("{}", row.fmt(2)));
+ assert_eq!("4", format!("{}", row.fmt(3)));
+ assert_eq!("5", format!("{}", row.fmt(4)));
+ assert_eq!("6", format!("{}", row.fmt(5)));
+ assert_eq!("7", format!("{}", row.fmt(6)));
+ assert_eq!("8", format!("{}", row.fmt(7)));
+ assert_eq!("9", format!("{}", row.fmt(8)));
+ assert_eq!("10", format!("{}", row.fmt(9)));
+ assert_eq!("11.1", format!("{}", row.fmt(10)));
+ assert_eq!("12.1", format!("{}", row.fmt(11)));
+ assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
+ assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
+ assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
+ assert_eq!(
+ convert_timestamp_to_string(1262391174000),
+ format!("{}", row.fmt(15))
+ );
+ assert_eq!("0.04", format!("{}", row.fmt(16)));
+ }
+
+ #[test]
+ fn test_row_complex_field_fmt() {
+ // Complex types
+ let row = make_row(vec![
+ (
+ "00".to_string(),
+ Field::Group(make_row(vec![
+ ("x".to_string(), Field::Null),
+ ("Y".to_string(), Field::Int(2)),
+ ])),
+ ),
+ (
+ "01".to_string(),
+ Field::ListInternal(make_list(vec![
+ Field::Int(2),
+ Field::Int(1),
+ Field::Null,
+ Field::Int(12),
+ ])),
+ ),
+ (
+ "02".to_string(),
+ Field::MapInternal(make_map(vec![
+ (Field::Int(1), Field::Float(1.2)),
+ (Field::Int(2), Field::Float(4.5)),
+ (Field::Int(3), Field::Float(2.3)),
+ ])),
+ ),
+ ]);
+
+ assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
+ assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
+ assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
+ }
+
+ #[test]
fn test_row_primitive_accessors() {
// primitives
let row = make_row(vec![
diff --git a/rust/parquet/src/record/mod.rs b/rust/parquet/src/record/mod.rs
index 0dba8a7..4427ada 100644
--- a/rust/parquet/src/record/mod.rs
+++ b/rust/parquet/src/record/mod.rs
@@ -21,4 +21,6 @@ mod api;
pub mod reader;
mod triplet;
-pub use self::api::{List, ListAccessor, Map, MapAccessor, Row, RowAccessor};
+pub use self::api::{
+ List, ListAccessor, Map, MapAccessor, Row, RowAccessor, RowFormatter,
+};