You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/02/24 17:23:52 UTC
[arrow] branch master updated: ARROW-14798: [C++][Python][R] Add container window to PrettyPrintOptions
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ae1ce19 ARROW-14798: [C++][Python][R] Add container window to PrettyPrintOptions
ae1ce19 is described below
commit ae1ce197306eecd5b887577784eb204f3488a688
Author: Will Jones <wi...@gmail.com>
AuthorDate: Thu Feb 24 18:21:55 2022 +0100
ARROW-14798: [C++][Python][R] Add container window to PrettyPrintOptions
# Summary
This PR makes a few changes to PrettyPrinting to make output shorter, particularly for ChunkedArray and ListArray types.
* Introduces `container_window` argument to `PrettyPrinterOptions`, which controls the window for ChunkedArray and ListArray separately from other types.
* Modified `PrettyPrinter` to pass down `ChildOptions()` to recursive calls. The main effect of this is that `skip_new_lines` is now passed down to children of StructArrays. It also makes sure that `window` and `container` window are passed down to children.
* Modified `ChunkedArray` printer to always put new lines between sub-arrays of StructArray.
* Added missing comma in `ChunkedArray` print output after ellipsis.
* Changed `MapArray` printer to only indent if being printed on multiple lines.
These changes affect the C++, Python, and R implementations.
## Example
Here's a little test snippet:
```python
from random import sample, choice
import pyarrow as pa
arr_int = pa.array(range(50))
tree_parts = ["roots", "trunk", "crown", "seeds"]
arr_list = pa.array([sample(tree_parts, k=choice(range(len(tree_parts)))) for _ in range(50)])
arr_struct = pa.StructArray.from_arrays([arr_int, arr_list], names=['int_nested', 'list_nested'])
arr_map = pa.array(
[
[(part, choice(range(10))) for part in sample(tree_parts, k=choice(range(len(tree_parts))))]
for _ in range(50)
],
type=pa.map_(pa.utf8(), pa.int64())
)
table = pa.table({
'int': pa.chunked_array([arr_int] * 10),
'list': pa.chunked_array([arr_list] * 10),
'struct': pa.chunked_array([arr_struct] * 10),
'map': pa.chunked_array([arr_map] * 10),
})
print(table)
```
<details>
<summary>
Output Before
</summary>
```
pyarrow.Table
int: int64
list: list<item: string>
child 0, item: string
struct: struct<int_nested: int64, list_nested: list<item: string>>
child 0, int_nested: int64
child 1, list_nested: list<item: string>
child 0, item: string
map: map<string, int64>
child 0, entries: struct<key: string not null, value: int64> not null
child 0, key: string not null
child 1, value: int64
----
int: [[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49],[0,1,2,3,4,5,6,7,8,9,...,40,41,42,4 [...]
list: [[["roots","trunk"],["trunk","crown","roots"],["crown","seeds"],["trunk"],[],["crown"],["seeds","crown"],["seeds","roots","trunk"],["roots"],["crown"],...,["trunk","seeds","crown"],["roots","crown","trunk"],["roots"],["crown","trunk","roots"],["crown"],["crown"],["trunk"],["seeds","crown","roots"],[],["trunk","roots"]],[["roots","trunk"],["trunk","crown","roots"],["crown","seeds"],["trunk"],[],["crown"],["seeds","crown"],["seeds","roots","trunk"],["roots"],["crown"],...,["trunk" [...]
struct: [ -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
], -- is_valid: all not null -- child 0 type: int64
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
...
40,
41,
42,
43,
44,
45,
46,
47,
48,
49
] -- child 1 type: list<item: string>
[
[
"roots",
"trunk"
],
[
"trunk",
"crown",
"roots"
],
[
"crown",
"seeds"
],
[
"trunk"
],
[],
[
"crown"
],
[
"seeds",
"crown"
],
[
"seeds",
"roots",
"trunk"
],
[
"roots"
],
[
"crown"
],
...
[
"trunk",
"seeds",
"crown"
],
[
"roots",
"crown",
"trunk"
],
[
"roots"
],
[
"crown",
"trunk",
"roots"
],
[
"crown"
],
[
"crown"
],
[
"trunk"
],
[
"seeds",
"crown",
"roots"
],
[],
[
"trunk",
"roots"
]
]]
map: [[ keys:["crown"]values:[4], keys:["seeds"]values:[7], keys:["trunk"]values:[7], keys:["roots","trunk","crown"]values:[4,8,0], keys:["crown","trunk","roots"]values:[3,6,8], keys:["crown","trunk","seeds"]values:[9,3,2], keys:["crown","seeds","roots"]values:[1,3,8], keys:["trunk","seeds"]values:[3,1], keys:[]values:[], keys:["roots","seeds","trunk"]values:[0,8,2],..., keys:[]values:[], keys:["trunk","crown","roots"]values:[7,2,8], keys:["seeds [...]
```
</details>
<details open>
<summary>
Output after
</summary>
```
pyarrow.Table
int: int64
list: list<item: string>
child 0, item: string
struct: struct<int_nested: int64, list_nested: list<item: string>>
child 0, int_nested: int64
child 1, list_nested: list<item: string>
child 0, item: string
map: map<string, int64>
child 0, entries: struct<key: string not null, value: int64> not null
child 0, key: string not null
child 1, value: int64
----
int: [[0,1,2,3,4,...,45,46,47,48,49],[0,1,2,3,4,...,45,46,47,48,49],...,[0,1,2,3,4,...,45,46,47,48,49],[0,1,2,3,4,...,45,46,47,48,49]]
list: [[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],...,[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]]]
struct: [
-- is_valid: all not null
-- child 0 type: int64
[0,1,2,3,4,...,45,46,47,48,49]
-- child 1 type: list<item: string>
[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],
-- is_valid: all not null
-- child 0 type: int64
[0,1,2,3,4,...,45,46,47,48,49]
-- child 1 type: list<item: string>
[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],
...,
-- is_valid: all not null
-- child 0 type: int64
[0,1,2,3,4,...,45,46,47,48,49]
-- child 1 type: list<item: string>
[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]],
-- is_valid: all not null
-- child 0 type: int64
[0,1,2,3,4,...,45,46,47,48,49]
-- child 1 type: list<item: string>
[["crown","trunk","roots"],["roots","seeds"],...,[],["crown"]]]
map: [[keys:["trunk"]values:[2],keys:["seeds","roots"]values:[2,4],keys:["trunk","crown"]values:[2,7],keys:["trunk","crown","roots"]values:[8,8,0],keys:[]values:[],...,keys:["trunk","roots"]values:[2,8],keys:["trunk","crown"]values:[6,9],keys:[]values:[],keys:["seeds","trunk"]values:[9,6],keys:["crown","roots","trunk"]values:[0,3,9]],[keys:["trunk"]values:[2],keys:["seeds","roots"]values:[2,4],keys:["trunk","crown"]values:[2,7],keys:["trunk","crown","roots"]values:[8,8,0],keys:[]value [...]
```
</details>
Closes #12091 from wjones127/ARROW-14798-repr-child-limit
Lead-authored-by: Will Jones <wi...@gmail.com>
Co-authored-by: Antoine Pitrou <pi...@free.fr>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/pretty_print.cc | 49 +++++++++-------
cpp/src/arrow/pretty_print.h | 25 +++++----
cpp/src/arrow/pretty_print_test.cc | 105 ++++++++++++++++++++++++++++++++---
python/pyarrow/array.pxi | 12 ++--
python/pyarrow/includes/libarrow.pxd | 1 +
python/pyarrow/table.pxi | 12 +++-
python/pyarrow/tests/test_table.py | 4 +-
7 files changed, 161 insertions(+), 47 deletions(-)
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 37135ed..01d7f21 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -66,9 +66,13 @@ class PrettyPrinter {
void CloseArray(const Array& array);
void Flush() { (*sink_) << std::flush; }
- PrettyPrintOptions ChildOptions() const {
+ PrettyPrintOptions ChildOptions(bool increment_indent = false) const {
PrettyPrintOptions child_options = options_;
- child_options.indent = indent_;
+ if (increment_indent) {
+ child_options.indent = indent_ + child_options.indent_size;
+ } else {
+ child_options.indent = indent_;
+ }
return child_options;
}
@@ -134,18 +138,19 @@ class ArrayPrinter : public PrettyPrinter {
private:
template <typename FormatFunction>
Status WriteValues(const Array& array, FormatFunction&& func,
- bool indent_non_null_values = true) {
+ bool indent_non_null_values = true, bool is_container = false) {
// `indent_non_null_values` should be false if `FormatFunction` applies
// indentation itself.
+ int window = is_container ? options_.container_window : options_.window;
for (int64_t i = 0; i < array.length(); ++i) {
const bool is_last = (i == array.length() - 1);
- if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+ if ((i >= window) && (i < (array.length() - window))) {
IndentAfterNewline();
(*sink_) << "...";
if (!is_last && options_.skip_new_lines) {
(*sink_) << ",";
}
- i = array.length() - options_.window - 1;
+ i = array.length() - window - 1;
} else if (array.IsNull(i)) {
IndentAfterNewline();
(*sink_) << options_.null_rep;
@@ -187,7 +192,7 @@ class ArrayPrinter : public PrettyPrinter {
Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
int64_t length) {
for (size_t i = 0; i < fields.size(); ++i) {
- Newline();
+ Write("\n"); // Always want newline before child array description
Indent();
std::stringstream ss;
ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
@@ -197,7 +202,8 @@ class ArrayPrinter : public PrettyPrinter {
if (offset != 0) {
field = field->Slice(offset, length);
}
- RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
+ // Indent();
+ RETURN_NOT_OK(PrettyPrint(*field, ChildOptions(true), sink_));
}
return Status::OK();
}
@@ -256,7 +262,8 @@ class ArrayPrinter : public PrettyPrinter {
return values_printer.Print(
*values->Slice(array.value_offset(i), array.value_length(i)));
},
- /*indent_non_null_values=*/false);
+ /*indent_non_null_values=*/false,
+ /*is_container=*/true);
}
Status WriteDataValues(const MapArray& array) {
@@ -268,7 +275,7 @@ class ArrayPrinter : public PrettyPrinter {
return WriteValues(
array,
[&](int64_t i) {
- Indent();
+ IndentAfterNewline();
(*sink_) << "keys:";
Newline();
RETURN_NOT_OK(values_printer.Print(
@@ -334,7 +341,7 @@ class ArrayPrinter : public PrettyPrinter {
Indent();
Write("-- type_ids: ");
UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
- RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
+ RETURN_NOT_OK(PrettyPrint(type_codes, ChildOptions(true), sink_));
if (array.mode() == UnionMode::DENSE) {
Newline();
@@ -343,7 +350,7 @@ class ArrayPrinter : public PrettyPrinter {
Int32Array value_offsets(
array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
nullptr, 0, array.offset());
- RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
+ RETURN_NOT_OK(PrettyPrint(value_offsets, ChildOptions(true), sink_));
}
// Print the children without any offset, because the type ids are absolute
@@ -359,13 +366,12 @@ class ArrayPrinter : public PrettyPrinter {
Newline();
Indent();
Write("-- dictionary:\n");
- RETURN_NOT_OK(
- PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
+ RETURN_NOT_OK(PrettyPrint(*array.dictionary(), ChildOptions(true), sink_));
Newline();
Indent();
Write("-- indices:\n");
- return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
+ return PrettyPrint(*array.indices(), ChildOptions(true), sink_);
}
Status Print(const Array& array) {
@@ -384,7 +390,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
Indent();
BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
array.offset());
- return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
+ return PrettyPrint(is_valid, ChildOptions(true), sink_);
} else {
Write(" all not null");
return Status::OK();
@@ -418,13 +424,16 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
std::ostream* sink) {
int num_chunks = chunked_arr.num_chunks();
int indent = options.indent;
- int window = options.window;
+ int window = options.container_window;
+ // Struct fields are always on new line
+ bool skip_new_lines =
+ options.skip_new_lines && (chunked_arr.type()->id() != Type::STRUCT);
for (int i = 0; i < indent; ++i) {
(*sink) << " ";
}
(*sink) << "[";
- if (!options.skip_new_lines) {
+ if (!skip_new_lines) {
*sink << "\n";
}
bool skip_comma = true;
@@ -433,7 +442,7 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
skip_comma = false;
} else {
(*sink) << ",";
- if (!options.skip_new_lines) {
+ if (!skip_new_lines) {
*sink << "\n";
}
}
@@ -441,8 +450,8 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
for (int i = 0; i < indent; ++i) {
(*sink) << " ";
}
- (*sink) << "...";
- if (!options.skip_new_lines) {
+ (*sink) << "...,";
+ if (!skip_new_lines) {
*sink << "\n";
}
i = num_chunks - window - 1;
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index 1bc086a..5d22fd5 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -35,16 +35,17 @@ class Table;
struct PrettyPrintOptions {
PrettyPrintOptions() = default;
- PrettyPrintOptions(int indent_arg, // NOLINT runtime/explicit
- int window_arg = 10, int indent_size_arg = 2,
- std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
- bool truncate_metadata_arg = true)
- : indent(indent_arg),
- indent_size(indent_size_arg),
- window(window_arg),
- null_rep(std::move(null_rep_arg)),
- skip_new_lines(skip_new_lines_arg),
- truncate_metadata(truncate_metadata_arg) {}
+ PrettyPrintOptions(int indent, // NOLINT runtime/explicit
+ int window = 10, int indent_size = 2, std::string null_rep = "null",
+ bool skip_new_lines = false, bool truncate_metadata = true,
+ int container_window = 2)
+ : indent(indent),
+ indent_size(indent_size),
+ window(window),
+ container_window(container_window),
+ null_rep(std::move(null_rep)),
+ skip_new_lines(skip_new_lines),
+ truncate_metadata(truncate_metadata) {}
static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }
@@ -57,6 +58,10 @@ struct PrettyPrintOptions {
/// Maximum number of elements to show at the beginning and at the end.
int window = 10;
+ /// Maximum number of elements to show at the beginning and at the end, for elements
+ /// that are containers (that is, list in ListArray and chunks in ChunkedArray)
+ int container_window = 2;
+
/// String to use for representing a null value, defaults to "null"
std::string null_rep = "null";
diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc
index 7b47a05..bebbc6e 100644
--- a/cpp/src/arrow/pretty_print_test.cc
+++ b/cpp/src/arrow/pretty_print_test.cc
@@ -642,6 +642,24 @@ TEST_F(TestPrettyPrint, StructTypeAdvanced) {
CheckStream(*array, {0, 10}, ex);
}
+TEST_F(TestPrettyPrint, StructTypeNoNewLines) {
+ // Struct types will at least have new lines for arrays
+ auto simple_1 = field("one", int32());
+ auto simple_2 = field("two", int32());
+ auto simple_struct = struct_({simple_1, simple_2});
+
+ auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]");
+ auto options = PrettyPrintOptions();
+ options.skip_new_lines = true;
+
+ static const char* ex = R"expected(-- is_valid:[true,false,true]
+-- child 0 type: int32
+[11,0,null]
+-- child 1 type: int32
+[22,0,33])expected";
+ CheckStream(*array, options, ex);
+}
+
TEST_F(TestPrettyPrint, BinaryType) {
std::vector<bool> is_valid = {true, true, false, true, true, true};
std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
@@ -715,17 +733,46 @@ TEST_F(TestPrettyPrint, ListType) {
3
]
])expected";
+ static const char* ex_4 = R"expected([
+ [
+ null
+ ],
+ [],
+ ...
+ [
+ 4,
+ 6,
+ 7
+ ],
+ [
+ 2,
+ 3
+ ]
+])expected";
auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
- CheckArray(*array, {0, 10}, ex);
- CheckArray(*array, {2, 10}, ex_2);
- CheckStream(*array, {0, 1}, ex_3);
+ auto make_options = [](int indent, int window, int container_window) {
+ auto options = PrettyPrintOptions(indent, window);
+ options.container_window = container_window;
+ return options;
+ };
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5),
+ ex);
+ CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5),
+ ex_2);
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1),
+ ex_3);
+ CheckArray(*array, {0, 10}, ex_4);
list_type = large_list(int64());
array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
- CheckArray(*array, {0, 10}, ex);
- CheckArray(*array, {2, 10}, ex_2);
- CheckStream(*array, {0, 1}, ex_3);
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5),
+ ex);
+ CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5),
+ ex_2);
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1),
+ ex_3);
+ CheckArray(*array, {0, 10}, ex_4);
}
TEST_F(TestPrettyPrint, ListTypeNoNewlines) {
@@ -736,10 +783,12 @@ TEST_F(TestPrettyPrint, ListTypeNoNewlines) {
PrettyPrintOptions options{};
options.skip_new_lines = true;
options.null_rep = "NA";
+ options.container_window = 10;
CheckArray(*empty_array, options, "[]", false);
CheckArray(*array, options, "[[NA],[],NA,[4,5,6,7,8],[2,3]]", false);
options.window = 2;
+ options.container_window = 2;
CheckArray(*empty_array, options, "[]", false);
CheckArray(*array, options, "[[NA],[],...,[4,5,...,7,8],[2,3]]", false);
}
@@ -779,6 +828,14 @@ TEST_F(TestPrettyPrint, MapType) {
[]
])expected";
CheckArray(*array, {0, 10}, ex);
+
+ PrettyPrintOptions options{};
+ options.skip_new_lines = true;
+
+ static const char* ex_flat =
+ R"expected([keys:["joe","mark"]values:[0,null],null,)expected"
+ R"expected(keys:["cap"]values:[8],keys:[]values:[]])expected";
+ CheckArray(*array, options, ex_flat, false);
}
TEST_F(TestPrettyPrint, FixedSizeListType) {
@@ -797,7 +854,7 @@ TEST_F(TestPrettyPrint, FixedSizeListType) {
3,
null
],
- null,
+ ...
[
4,
6,
@@ -809,7 +866,39 @@ TEST_F(TestPrettyPrint, FixedSizeListType) {
5
]
])expected");
- CheckStream(*array, {0, 1}, R"expected([
+
+ auto make_options = [](int indent, int window, int container_window) {
+ auto options = PrettyPrintOptions(indent, window);
+ options.container_window = container_window;
+ return options;
+ };
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/3),
+ R"expected([
+ [
+ null,
+ ...
+ 1
+ ],
+ [
+ 2,
+ ...
+ null
+ ],
+ null,
+ [
+ 4,
+ ...
+ 7
+ ],
+ [
+ 8,
+ ...
+ 5
+ ]
+])expected");
+
+ CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/1),
+ R"expected([
[
null,
...
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 0db3fb2..6964ea4 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1038,7 +1038,7 @@ cdef class Array(_PandasConvertible):
return '{0}\n{1}'.format(type_format, str(self))
def to_string(self, *, int indent=2, int top_level_indent=0, int window=10,
- c_bool skip_new_lines=False):
+ int container_window=2, c_bool skip_new_lines=False):
"""
Render a "pretty-printed" string representation of the Array.
@@ -1051,9 +1051,13 @@ cdef class Array(_PandasConvertible):
How much to indent right the entire content of the array,
by default ``0``.
window : int
- How many items to preview at the begin and end
- of the array when the arrays is bigger than the window.
- The other elements will be ellipsed.
+ How many primitive items to preview at the begin and end
+ of the array when the array is bigger than the window.
+ The other items will be ellipsed.
+ container_window : int
+ How many container items (such as a list in a list array)
+ to preview at the begin and end of the array when the array
+ is bigger than the window.
skip_new_lines : bool
If the array should be rendered as a single line of text
or if each element should be on its own line.
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9a149dd..9469fd5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -496,6 +496,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int indent
int indent_size
int window
+ int container_window
c_string null_rep
c_bool skip_new_lines
c_bool truncate_metadata
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 6b21d25..26db4e2 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -63,7 +63,7 @@ cdef class ChunkedArray(_PandasConvertible):
type_format = object.__repr__(self)
return '{0}\n{1}'.format(type_format, str(self))
- def to_string(self, *, int indent=0, int window=10,
+ def to_string(self, *, int indent=0, int window=5, int container_window=2,
c_bool skip_new_lines=False):
"""
Render a "pretty-printed" string representation of the ChunkedArray
@@ -74,9 +74,14 @@ cdef class ChunkedArray(_PandasConvertible):
How much to indent right the content of the array,
by default ``0``.
window : int
- How many items to preview at the begin and end
- of the array when the arrays is bigger than the window.
+ How many items to preview within each chunk at the begin and end
+ of the chunk when the chunk is bigger than the window.
The other elements will be ellipsed.
+ container_window : int
+ How many chunks to preview at the begin and end
+ of the array when the array is bigger than the window.
+ The other elements will be ellipsed.
+ This setting also applies to list columns.
skip_new_lines : bool
If the array should be rendered as a single line of text
or if each element should be on its own line.
@@ -88,6 +93,7 @@ cdef class ChunkedArray(_PandasConvertible):
with nogil:
options = PrettyPrintOptions(indent, window)
options.skip_new_lines = skip_new_lines
+ options.container_window = container_window
check_status(
PrettyPrint(
deref(self.chunked_array),
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 4555bed..756be0f 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1801,8 +1801,8 @@ def test_table_repr_to_string_ellipsis():
c0: int16
c1: int32
----
-c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]]
-c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,40,10,20,30,40]]"""
+c0: [[1,2,3,4,1,...,4,1,2,3,4]]
+c1: [[10,20,30,40,10,...,40,10,20,30,40]]"""
def test_table_function_unicode_schema():