You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/07/27 04:26:41 UTC

[arrow] branch master updated: ARROW-2918: [C++] Improve formatting of Struct pretty prints

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 76033f4  ARROW-2918: [C++] Improve formatting of Struct pretty prints
76033f4 is described below

commit 76033f481fb6946184dbf99c064206eb9889a867
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Fri Jul 27 00:26:33 2018 -0400

    ARROW-2918: [C++] Improve formatting of Struct pretty prints
    
    cc @kszucs
    
    Author: Korn, Uwe <Uw...@blue-yonder.com>
    
    Closes #2327 from xhochy/ARROW-2918 and squashes the following commits:
    
    22b9a313 <Korn, Uwe> ARROW-2918:  Improve formatting of Struct pretty prints
---
 c_glib/test/test-orc-file-reader.rb | 166 +++++++++++++++---------------
 cpp/src/arrow/pretty_print-test.cc  | 198 ++++++++++++++++++++++++++++++++----
 cpp/src/arrow/pretty_print.cc       |   9 +-
 ruby/red-arrow/test/test-orc.rb     |  83 ++++++++-------
 4 files changed, 308 insertions(+), 148 deletions(-)

diff --git a/c_glib/test/test-orc-file-reader.rb b/c_glib/test/test-orc-file-reader.rb
index 714ee5a..5f65520 100644
--- a/c_glib/test/test-orc-file-reader.rb
+++ b/c_glib/test/test-orc-file-reader.rb
@@ -79,29 +79,28 @@ map: list<item: struct<key: string, value: struct<int1: int32, string1: string>>
                        "list<item: struct<int1: int32, string1: string>>>",
                        [
                          <<-STRUCT.chomp
-
--- is_valid:
-all not null
--- child 0 type: list<item: struct<int1: int32, string1: string>> values:   [
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+-- is_valid: all not null
+-- child 0 type: list<item: struct<int1: int32, string1: string>>
+  [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ],
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ]
@@ -114,26 +113,26 @@ all not null
                        [
                          <<-LIST.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       3,
       4
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "good",
       "bad"
     ],
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       100000000,
       -100000,
       1234
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "cat",
       "in",
       "hat"
@@ -149,30 +148,30 @@ all not null
                        [
                          <<-MAP.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     []
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       []
-    -- child 1 type: string values:       [],
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     [
+  -- is_valid: all not null
+  -- child 0 type: string
+    []
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      []
+    -- child 1 type: string
+      [],
+  -- is_valid: all not null
+  -- child 0 type: string
+    [
       "chani",
       "mauddib"
     ]
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         5,
         1
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "chani",
         "mauddib"
       ]
@@ -231,29 +230,28 @@ all not null
                        "struct<list: " +
                        "list<item: struct<int1: int32, string1: string>>>",
                        <<-STRUCT.chomp
-
--- is_valid:
-all not null
--- child 0 type: list<item: struct<int1: int32, string1: string>> values:   [
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+-- is_valid: all not null
+-- child 0 type: list<item: struct<int1: int32, string1: string>>
+  [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ],
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ]
@@ -264,26 +262,26 @@ all not null
                        "list: list<item: struct<int1: int32, string1: string>>",
                        <<-LIST.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       3,
       4
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "good",
       "bad"
     ],
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       100000000,
       -100000,
       1234
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "cat",
       "in",
       "hat"
@@ -297,30 +295,30 @@ all not null
                        "struct<int1: int32, string1: string>>>",
                        <<-MAP.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     []
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       []
-    -- child 1 type: string values:       [],
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     [
+  -- is_valid: all not null
+  -- child 0 type: string
+    []
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      []
+    -- child 1 type: string
+      [],
+  -- is_valid: all not null
+  -- child 0 type: string
+    [
       "chani",
       "mauddib"
     ]
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         5,
         1
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "chani",
         "mauddib"
       ]
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index b7fef06..9724b8e 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -85,32 +85,161 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
   std::vector<bool> is_valid = {true, true, false, true, false};
 
   std::vector<int32_t> values = {0, 1, 2, 3, 4};
-  static const char* expected = "[\n  0,\n  1,\n  null,\n  3,\n  null\n]";
+  static const char* expected = R"expected([
+  0,
+  1,
+  null,
+  3,
+  null
+])expected";
   CheckPrimitive<Int32Type, int32_t>({0, 10}, is_valid, values, expected);
 
-  static const char* expected_na = "[\n  0,\n  1,\n  NA,\n  3,\n  NA\n]";
+  static const char* expected_na = R"expected([
+  0,
+  1,
+  NA,
+  3,
+  NA
+])expected";
   CheckPrimitive<Int32Type, int32_t>({0, 10, 2, "NA"}, is_valid, values, expected_na,
                                      false);
 
-  static const char* ex_in2 = "  [\n    0,\n    1,\n    null,\n    3,\n    null\n  ]";
+  static const char* ex_in2 = R"expected(  [
+    0,
+    1,
+    null,
+    3,
+    null
+  ])expected";
   CheckPrimitive<Int32Type, int32_t>({2, 10}, is_valid, values, ex_in2);
-  static const char* ex_in2_w2 = "  [\n    0,\n    1,\n    ...\n    3,\n    null\n  ]";
+  static const char* ex_in2_w2 = R"expected(  [
+    0,
+    1,
+    ...
+    3,
+    null
+  ])expected";
   CheckPrimitive<Int32Type, int32_t>({2, 2}, is_valid, values, ex_in2_w2);
 
   std::vector<double> values2 = {0., 1., 2., 3., 4.};
-  static const char* ex2 = "[\n  0,\n  1,\n  null,\n  3,\n  null\n]";
+  static const char* ex2 = R"expected([
+  0,
+  1,
+  null,
+  3,
+  null
+])expected";
   CheckPrimitive<DoubleType, double>({0, 10}, is_valid, values2, ex2);
-  static const char* ex2_in2 = "  [\n    0,\n    1,\n    null,\n    3,\n    null\n  ]";
+  static const char* ex2_in2 = R"expected(  [
+    0,
+    1,
+    null,
+    3,
+    null
+  ])expected";
   CheckPrimitive<DoubleType, double>({2, 10}, is_valid, values2, ex2_in2);
 
   std::vector<std::string> values3 = {"foo", "bar", "", "baz", ""};
-  static const char* ex3 = "[\n  \"foo\",\n  \"bar\",\n  null,\n  \"baz\",\n  null\n]";
+  static const char* ex3 = R"expected([
+  "foo",
+  "bar",
+  null,
+  "baz",
+  null
+])expected";
   CheckPrimitive<StringType, std::string>({0, 10}, is_valid, values3, ex3);
-  static const char* ex3_in2 =
-      "  [\n    \"foo\",\n    \"bar\",\n    null,\n    \"baz\",\n    null\n  ]";
+  static const char* ex3_in2 = R"expected(  [
+    "foo",
+    "bar",
+    null,
+    "baz",
+    null
+  ])expected";
   CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
 }
 
+TEST_F(TestPrettyPrint, StructTypeBasic) {
+  auto simple_1 = field("one", int32());
+  auto simple_2 = field("two", int32());
+  auto simple_struct = struct_({simple_1, simple_2});
+
+  auto int_builder_1 = std::make_shared<Int32Builder>();
+  auto int_builder_2 = std::make_shared<Int32Builder>();
+  StructBuilder builder(simple_struct, default_memory_pool(),
+                        {int_builder_1, int_builder_2});
+  ASSERT_OK(builder.Append());
+  ASSERT_OK(int_builder_1->Append(11));
+  ASSERT_OK(int_builder_2->Append(22));
+
+  std::shared_ptr<Array> array;
+  ASSERT_OK(builder.Finish(&array));
+
+  static const char* ex = R"expected(-- is_valid: all not null
+-- child 0 type: int32
+  [
+    11
+  ]
+-- child 1 type: int32
+  [
+    22
+  ])expected";
+  CheckStream(*array, {0, 10}, ex);
+
+  static const char* ex_2 = R"expected(  -- is_valid: all not null
+  -- child 0 type: int32
+    [
+      11
+    ]
+  -- child 1 type: int32
+    [
+      22
+    ])expected";
+  CheckStream(*array, {2, 10}, ex_2);
+}
+
+TEST_F(TestPrettyPrint, StructTypeAdvanced) {
+  auto simple_1 = field("one", int32());
+  auto simple_2 = field("two", int32());
+  auto simple_struct = struct_({simple_1, simple_2});
+
+  auto int_builder_1 = std::make_shared<Int32Builder>();
+  auto int_builder_2 = std::make_shared<Int32Builder>();
+  StructBuilder builder(simple_struct, default_memory_pool(),
+                        {int_builder_1, int_builder_2});
+  ASSERT_OK(builder.Append());
+  ASSERT_OK(int_builder_1->Append(11));
+  ASSERT_OK(int_builder_2->Append(22));
+  ASSERT_OK(builder.AppendNull());
+  ASSERT_OK(int_builder_1->AppendNull());
+  ASSERT_OK(int_builder_2->AppendNull());
+  ASSERT_OK(builder.Append());
+  ASSERT_OK(int_builder_1->AppendNull());
+  ASSERT_OK(int_builder_2->Append(33));
+
+  std::shared_ptr<Array> array;
+  ASSERT_OK(builder.Finish(&array));
+
+  static const char* ex = R"expected(-- is_valid:
+  [
+    true,
+    false,
+    true
+  ]
+-- child 0 type: int32
+  [
+    11,
+    null,
+    null
+  ]
+-- child 1 type: int32
+  [
+    22,
+    null,
+    33
+  ])expected";
+  CheckStream(*array, {0, 10}, ex);
+}
+
 TEST_F(TestPrettyPrint, BinaryType) {
   std::vector<bool> is_valid = {true, true, false, true, false};
   std::vector<std::string> values = {"foo", "bar", "", "baz", ""};
@@ -140,17 +269,50 @@ TEST_F(TestPrettyPrint, ListType) {
 
   std::shared_ptr<Array> array;
   ASSERT_OK(list_builder.Finish(&array));
-  static const char* ex =
-      "[\n  [\n    null\n  ],\n  [],\n  null,\n  [\n    4,\n    6,\n    7\n  ],\n  [\n   "
-      " "
-      "2,\n    3\n  ]\n]";
+  static const char* ex = R"expected([
+  [
+    null
+  ],
+  [],
+  null,
+  [
+    4,
+    6,
+    7
+  ],
+  [
+    2,
+    3
+  ]
+])expected";
   CheckArray(*array, {0, 10}, ex);
-  static const char* ex_2 =
-      "  [\n    [\n      null\n    ],\n    [],\n    null,\n    [\n      4,\n      6,\n   "
-      "   "
-      "7\n    ],\n    [\n      2,\n      3\n    ]\n  ]";
+  static const char* ex_2 = R"expected(  [
+    [
+      null
+    ],
+    [],
+    null,
+    [
+      4,
+      6,
+      7
+    ],
+    [
+      2,
+      3
+    ]
+  ])expected";
   CheckArray(*array, {2, 10}, ex_2);
-  static const char* ex_3 = "[\n  [\n    null\n  ],\n  ...\n  [\n    2,\n    3\n  ]\n]";
+  static const char* ex_3 = R"expected([
+  [
+    null
+  ],
+  ...
+  [
+    2,
+    3
+  ]
+])expected";
   CheckStream(*array, {0, 1}, ex_3);
 }
 
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index b8c3f7d..141b0a4 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -251,7 +251,7 @@ class ArrayPrinter : public PrettyPrinter {
     for (size_t i = 0; i < fields.size(); ++i) {
       Newline();
       std::stringstream ss;
-      ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << " values: ";
+      ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
       Write(ss.str());
 
       std::shared_ptr<Array> field = fields[i];
@@ -321,15 +321,16 @@ class ArrayPrinter : public PrettyPrinter {
 };
 
 Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
-  Newline();
-  Write("-- is_valid:\n");
+  Indent();
+  Write("-- is_valid:");
 
   if (array.null_count() > 0) {
+    Newline();
     BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
                           array.offset());
     return PrettyPrint(is_valid, indent_ + indent_size_, sink_);
   } else {
-    Write("all not null");
+    Write(" all not null");
     return Status::OK();
   }
 }
diff --git a/ruby/red-arrow/test/test-orc.rb b/ruby/red-arrow/test/test-orc.rb
index 9f31449..e534e07 100644
--- a/ruby/red-arrow/test/test-orc.rb
+++ b/ruby/red-arrow/test/test-orc.rb
@@ -55,29 +55,28 @@ class ORCTest < Test::Unit::TestCase
                        "list<item: struct<int1: int32, string1: string>>>",
                        [
                          <<-STRUCT.chomp
-
--- is_valid:
-all not null
--- child 0 type: list<item: struct<int1: int32, string1: string>> values:   [
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+-- is_valid: all not null
+-- child 0 type: list<item: struct<int1: int32, string1: string>>
+  [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ],
-
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         1,
         2
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "bye",
         "sigh"
       ]
@@ -90,26 +89,26 @@ all not null
                        [
                          <<-LIST.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       3,
       4
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "good",
       "bad"
     ],
-
-  -- is_valid:
-all not null
-  -- child 0 type: int32 values:     [
+  -- is_valid: all not null
+  -- child 0 type: int32
+    [
       100000000,
       -100000,
       1234
     ]
-  -- child 1 type: string values:     [
+  -- child 1 type: string
+    [
       "cat",
       "in",
       "hat"
@@ -125,30 +124,30 @@ all not null
                        [
                          <<-MAP.chomp
 [
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     []
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       []
-    -- child 1 type: string values:       [],
-
-  -- is_valid:
-all not null
-  -- child 0 type: string values:     [
+  -- is_valid: all not null
+  -- child 0 type: string
+    []
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      []
+    -- child 1 type: string
+      [],
+  -- is_valid: all not null
+  -- child 0 type: string
+    [
       "chani",
       "mauddib"
     ]
-  -- child 1 type: struct<int1: int32, string1: string> values: 
-    -- is_valid:
-all not null
-    -- child 0 type: int32 values:       [
+  -- child 1 type: struct<int1: int32, string1: string>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
         5,
         1
       ]
-    -- child 1 type: string values:       [
+    -- child 1 type: string
+      [
         "chani",
         "mauddib"
       ]