You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/04/02 15:29:02 UTC
[arrow] branch master updated: ARROW-7904: [C++][Python] Revamp
metadata display, change show_metadata to verbose_metadata
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8d243d0 ARROW-7904: [C++][Python] Revamp metadata display, change show_metadata to verbose_metadata
8d243d0 is described below
commit 8d243d0759d6e4401bc39ea3195351a3b59e5c47
Author: Wes McKinney <we...@apache.org>
AuthorDate: Thu Apr 2 10:28:40 2020 -0500
ARROW-7904: [C++][Python] Revamp metadata display, change show_metadata to verbose_metadata
This is another attempt to present the information without overwhelming in the case where there is a large binary metadata blob. So the default will show just metadata keys like so:
```
foo: int32 not null, metadata.keys: ['key1']
bar: string, metadata.keys: ['key3']
-- schema.metadata.keys: ['key2']
```
Another option is to show values truncated to 50 chars (or 80 chars less the length of the key and other whitespace chars)
Closes #6577 from wesm/ARROW-7904
Lead-authored-by: Wes McKinney <we...@apache.org>
Co-authored-by: Benjamin Kietzman <be...@gmail.com>
Signed-off-by: Wes McKinney <we...@apache.org>
---
cpp/src/arrow/pretty_print.cc | 111 ++++++++++++++++++++---------------
cpp/src/arrow/pretty_print.h | 31 ++++++----
cpp/src/arrow/pretty_print_test.cc | 79 +++++++++++++++++++++----
python/pyarrow/array.pxi | 9 ++-
python/pyarrow/includes/libarrow.pxd | 7 ++-
python/pyarrow/table.pxi | 33 +++++++++--
python/pyarrow/tests/test_schema.py | 75 ++++++++++++++++++++---
python/pyarrow/tests/test_table.py | 39 +++++++++++-
python/pyarrow/types.pxi | 24 +++++---
9 files changed, 313 insertions(+), 95 deletions(-)
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 2f52eff..79a87ea 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <algorithm>
#include <chrono>
#include <cstddef>
#include <cstdint>
@@ -46,11 +47,7 @@ using internal::checked_cast;
class PrettyPrinter {
public:
PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink)
- : indent_(options.indent),
- indent_size_(options.indent_size),
- window_(options.window),
- skip_new_lines_(options.skip_new_lines),
- sink_(sink) {}
+ : options_(options), indent_(options.indent), sink_(sink) {}
void Write(const char* data);
void Write(const std::string& data);
@@ -64,10 +61,8 @@ class PrettyPrinter {
void Flush() { (*sink_) << std::flush; }
protected:
+ const PrettyPrintOptions& options_;
int indent_;
- int indent_size_;
- int window_;
- bool skip_new_lines_;
std::ostream* sink_;
};
@@ -76,13 +71,13 @@ void PrettyPrinter::OpenArray(const Array& array) {
(*sink_) << "[";
if (array.length() > 0) {
(*sink_) << "\n";
- indent_ += indent_size_;
+ indent_ += options_.indent_size;
}
}
void PrettyPrinter::CloseArray(const Array& array) {
if (array.length() > 0) {
- indent_ -= indent_size_;
+ indent_ -= options_.indent_size;
Indent();
}
(*sink_) << "]";
@@ -102,7 +97,7 @@ void PrettyPrinter::WriteIndented(const std::string& data) {
}
void PrettyPrinter::Newline() {
- if (skip_new_lines_) {
+ if (options_.skip_new_lines) {
return;
}
(*sink_) << "\n";
@@ -118,7 +113,7 @@ void PrettyPrinter::Indent() {
class ArrayPrinter : public PrettyPrinter {
public:
ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink)
- : PrettyPrinter(options, sink), null_rep_(options.null_rep) {}
+ : PrettyPrinter(options, sink) {}
template <typename FormatFunction>
void WriteValues(const Array& array, FormatFunction&& func) {
@@ -130,12 +125,12 @@ class ArrayPrinter : public PrettyPrinter {
(*sink_) << ",\n";
}
Indent();
- if ((i >= window_) && (i < (array.length() - window_))) {
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
(*sink_) << "...\n";
- i = array.length() - window_ - 1;
+ i = array.length() - options_.window - 1;
skip_comma = true;
} else if (array.IsNull(i)) {
- (*sink_) << null_rep_;
+ (*sink_) << options_.null_rep;
} else {
func(i);
}
@@ -239,18 +234,18 @@ class ArrayPrinter : public PrettyPrinter {
} else {
(*sink_) << ",\n";
}
- if ((i >= window_) && (i < (array.length() - window_))) {
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
Indent();
(*sink_) << "...\n";
- i = array.length() - window_ - 1;
+ i = array.length() - options_.window - 1;
skip_comma = true;
} else if (array.IsNull(i)) {
Indent();
- (*sink_) << null_rep_;
+ (*sink_) << options_.null_rep;
} else {
std::shared_ptr<Array> slice =
array.values()->Slice(array.value_offset(i), array.value_length(i));
- RETURN_NOT_OK(PrettyPrint(*slice, {indent_, window_}, sink_));
+ RETURN_NOT_OK(PrettyPrint(*slice, {indent_, options_.window}, sink_));
}
}
(*sink_) << "\n";
@@ -265,26 +260,26 @@ class ArrayPrinter : public PrettyPrinter {
} else {
(*sink_) << ",\n";
}
- if ((i >= window_) && (i < (array.length() - window_))) {
+ if ((i >= options_.window) && (i < (array.length() - options_.window))) {
Indent();
(*sink_) << "...\n";
- i = array.length() - window_ - 1;
+ i = array.length() - options_.window - 1;
skip_comma = true;
} else if (array.IsNull(i)) {
Indent();
- (*sink_) << null_rep_;
+ (*sink_) << options_.null_rep;
} else {
Indent();
(*sink_) << "keys:\n";
auto keys_slice =
array.keys()->Slice(array.value_offset(i), array.value_length(i));
- RETURN_NOT_OK(PrettyPrint(*keys_slice, {indent_, window_}, sink_));
+ RETURN_NOT_OK(PrettyPrint(*keys_slice, {indent_, options_.window}, sink_));
(*sink_) << "\n";
Indent();
(*sink_) << "values:\n";
auto values_slice =
array.items()->Slice(array.value_offset(i), array.value_length(i));
- RETURN_NOT_OK(PrettyPrint(*values_slice, {indent_, window_}, sink_));
+ RETURN_NOT_OK(PrettyPrint(*values_slice, {indent_, options_.window}, sink_));
}
}
(*sink_) << "\n";
@@ -332,7 +327,7 @@ class ArrayPrinter : public PrettyPrinter {
field = field->Slice(offset, length);
}
- RETURN_NOT_OK(PrettyPrint(*field, indent_ + indent_size_, sink_));
+ RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
}
return Status::OK();
}
@@ -353,14 +348,14 @@ class ArrayPrinter : public PrettyPrinter {
Newline();
Write("-- type_ids: ");
UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
- RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + indent_size_, sink_));
+ RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
if (array.mode() == UnionMode::DENSE) {
Newline();
Write("-- value_offsets: ");
Int32Array value_offsets(array.length(), array.value_offsets(), nullptr, 0,
array.offset());
- RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + indent_size_, sink_));
+ RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
}
// Print the children without any offset, because the type ids are absolute
@@ -375,11 +370,12 @@ class ArrayPrinter : public PrettyPrinter {
Status Visit(const DictionaryArray& array) {
Newline();
Write("-- dictionary:\n");
- RETURN_NOT_OK(PrettyPrint(*array.dictionary(), indent_ + indent_size_, sink_));
+ RETURN_NOT_OK(
+ PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
Newline();
Write("-- indices:\n");
- return PrettyPrint(*array.indices(), indent_ + indent_size_, sink_);
+ return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
}
Status Print(const Array& array) {
@@ -417,7 +413,6 @@ class ArrayPrinter : public PrettyPrinter {
}
static arrow_vendored::date::sys_days epoch_;
- std::string null_rep_;
};
arrow_vendored::date::sys_days ArrayPrinter::epoch_ =
@@ -431,7 +426,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
Newline();
BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
array.offset());
- return PrettyPrint(is_valid, indent_ + indent_size_, sink_);
+ return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
} else {
Write(" all not null");
return Status::OK();
@@ -562,20 +557,41 @@ class SchemaPrinter : public PrettyPrinter {
public:
SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options,
std::ostream* sink)
- : PrettyPrinter(options, sink),
- schema_(schema),
- show_metadata_(options.show_metadata) {}
+ : PrettyPrinter(options, sink), schema_(schema) {}
Status PrintType(const DataType& type, bool nullable);
Status PrintField(const Field& field);
- void PrintMetadata(const KeyValueMetadata& metadata) {
+ void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
+ for (int64_t i = 0; i < metadata.size(); ++i) {
+ Newline();
+ Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
+ }
+ }
+
+ void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
+ for (int64_t i = 0; i < metadata.size(); ++i) {
+ Newline();
+ size_t size = metadata.value(i).size();
+ size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
+ if (size <= truncated_size) {
+ Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
+ continue;
+ }
+
+ Write(metadata.key(i) + ": '" + metadata.value(i).substr(0, truncated_size) +
+ "' + " + std::to_string(size - truncated_size));
+ }
+ }
+
+ void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
if (metadata.size() > 0) {
Newline();
- Write("-- metadata --");
- for (int64_t i = 0; i < metadata.size(); ++i) {
- Newline();
- Write(metadata.key(i) + ": " + metadata.value(i));
+ Write(metadata_type);
+ if (options_.truncate_metadata) {
+ PrintTruncatedMetadata(metadata);
+ } else {
+ PrintVerboseMetadata(metadata);
}
}
}
@@ -590,8 +606,8 @@ class SchemaPrinter : public PrettyPrinter {
RETURN_NOT_OK(PrintField(*schema_.field(i)));
}
- if (show_metadata_ && schema_.metadata()) {
- PrintMetadata(*schema_.metadata());
+ if (options_.show_schema_metadata && schema_.metadata() != nullptr) {
+ PrintMetadata("-- schema metadata --", *schema_.metadata());
}
Flush();
return Status::OK();
@@ -599,7 +615,6 @@ class SchemaPrinter : public PrettyPrinter {
private:
const Schema& schema_;
- bool show_metadata_;
};
Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
@@ -613,10 +628,10 @@ Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
std::stringstream ss;
ss << "child " << i << ", ";
- indent_ += indent_size_;
+ indent_ += options_.indent_size;
WriteIndented(ss.str());
RETURN_NOT_OK(PrintField(*type.child(i)));
- indent_ -= indent_size_;
+ indent_ -= options_.indent_size;
}
return Status::OK();
}
@@ -626,10 +641,10 @@ Status SchemaPrinter::PrintField(const Field& field) {
Write(": ");
RETURN_NOT_OK(PrintType(*field.type(), field.nullable()));
- if (show_metadata_ && field.metadata()) {
- indent_ += indent_size_;
- PrintMetadata(*field.metadata());
- indent_ -= indent_size_;
+ if (options_.show_field_metadata && field.metadata() != nullptr) {
+ indent_ += options_.indent_size;
+ PrintMetadata("-- field metadata --", *field.metadata());
+ indent_ -= options_.indent_size;
}
return Status::OK();
}
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index 2ea6568..9d2c72c 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -32,33 +32,44 @@ class Status;
class Table;
struct PrettyPrintOptions {
- PrettyPrintOptions(int indent_arg = 0, int window_arg = 10, int indent_size_arg = 2,
+ PrettyPrintOptions() = default;
+
+ PrettyPrintOptions(int indent_arg, int window_arg = 10, int indent_size_arg = 2,
std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
- bool show_metadata = false)
+ bool truncate_metadata_arg = true)
: indent(indent_arg),
indent_size(indent_size_arg),
window(window_arg),
null_rep(null_rep_arg),
skip_new_lines(skip_new_lines_arg),
- show_metadata(show_metadata) {}
+ truncate_metadata(truncate_metadata_arg) {}
+
+ static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }
/// Number of spaces to shift entire formatted object to the right
- int indent;
+ int indent = 0;
/// Size of internal indents
- int indent_size;
+ int indent_size = 2;
/// Maximum number of elements to show at the beginning and at the end.
- int window;
+ int window = 10;
/// String to use for representing a null value, defaults to "null"
- std::string null_rep;
+ std::string null_rep = "null";
/// Skip new lines between elements, defaults to false
- bool skip_new_lines;
+ bool skip_new_lines = false;
+
+ /// Limit display of each KeyValueMetadata key/value pair to a single line at
+ /// 80 character width
+ bool truncate_metadata = true;
+
+ /// If true, display field metadata when pretty-printing a Schema
+ bool show_field_metadata = true;
- /// Show Schema and Field-level KeyValueMetadata
- bool show_metadata;
+ /// If true, display schema metadata when pretty-printing a Schema
+ bool show_schema_metadata = true;
};
/// \brief Print human-readable representation of RecordBatch
diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc
index 3495de0..6124b8f 100644
--- a/cpp/src/arrow/pretty_print_test.cc
+++ b/cpp/src/arrow/pretty_print_test.cc
@@ -652,24 +652,79 @@ five: list<item: int32 not null>
TEST_F(TestPrettyPrint, SchemaWithMetadata) {
// ARROW-7063
- auto metadata1 = key_value_metadata({"foo"}, {"bar1"});
- auto metadata2 = key_value_metadata({"foo"}, {"bar2"});
- auto metadata3 = key_value_metadata({"foo"}, {"bar3"});
+ auto metadata1 = key_value_metadata({"foo1"}, {"bar1"});
+ auto metadata2 = key_value_metadata({"foo2"}, {"bar2"});
+ auto metadata3 = key_value_metadata(
+ {"foo3", "lorem"},
+ {"bar3",
+ R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
+ turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
+ eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
+ litora torquent per conubia nostra, per inceptos himenaeos. Praesent
+ faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
+ turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
+ dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
+ pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
+ sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
+ sapien. Quisque pretium vestibulum urna eu vehicula.)"});
auto my_schema = schema(
{field("one", int32(), true, metadata1), field("two", utf8(), false, metadata2)},
metadata3);
- static const char* expected = R"expected(one: int32
- -- metadata --
- foo: bar1
-two: string not null
- -- metadata --
- foo: bar2
--- metadata --
-foo: bar3)expected";
PrettyPrintOptions options;
- options.show_metadata = true;
+ static const char* expected = R"(one: int32
+ -- field metadata --
+ foo1: 'bar1'
+two: string not null
+ -- field metadata --
+ foo2: 'bar2'
+-- schema metadata --
+foo3: 'bar3'
+lorem: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla ac' + 737)";
Check(*my_schema, options, expected);
+
+ static const char* expected_verbose = R"(one: int32
+ -- field metadata --
+ foo1: 'bar1'
+two: string not null
+ -- field metadata --
+ foo2: 'bar2'
+-- schema metadata --
+foo3: 'bar3'
+lorem: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
+ turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
+ eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
+ litora torquent per conubia nostra, per inceptos himenaeos. Praesent
+ faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
+ turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
+ dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
+ pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
+ sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
+ sapien. Quisque pretium vestibulum urna eu vehicula.')";
+ options.truncate_metadata = false;
+ Check(*my_schema, options, expected_verbose);
+
+ // Metadata that exactly fits
+ auto metadata4 =
+ key_value_metadata({"key"}, {("valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")});
+ my_schema = schema({field("f0", int32())}, metadata4);
+ static const char* expected_fits = R"(f0: int32
+-- schema metadata --
+key: 'valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')";
+ options.truncate_metadata = false;
+ Check(*my_schema, options, expected_fits);
+
+ // A large key
+ auto metadata5 = key_value_metadata({"0123456789012345678901234567890123456789"},
+ {("valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")});
+ my_schema = schema({field("f0", int32())}, metadata5);
+ static const char* expected_big_key = R"(f0: int32
+-- schema metadata --
+0123456789012345678901234567890123456789: 'valuexxxxxxxxxxxxxxxxxxxxxxxxx' + 40)";
+ options.truncate_metadata = true;
+ Check(*my_schema, options, expected_big_key);
}
TEST_F(TestPrettyPrint, SchemaIndentation) {
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index d0dfbca..ced55b5 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -876,7 +876,7 @@ cdef class Array(_PandasConvertible):
type_format = object.__repr__(self)
return '{0}\n{1}'.format(type_format, str(self))
- def format(self, int indent=0, int window=10):
+ def to_string(self, int indent=0, int window=10):
cdef:
c_string result
@@ -891,8 +891,13 @@ cdef class Array(_PandasConvertible):
return frombytes(result)
+ def format(self, **kwargs):
+ import warnings
+ warnings.warn('Array.format is deprecated, use Array.to_string')
+ return self.to_string(**kwargs)
+
def __str__(self):
- return self.format()
+ return self.to_string()
def equals(Array self, Array other):
return self.ap.Equals(deref(other.ap))
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 382db6d..3b6dfb5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -412,7 +412,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int window
c_string null_rep
c_bool skip_new_lines
- c_bool show_metadata
+ c_bool truncate_metadata
+ c_bool show_field_metadata
+ c_bool show_schema_metadata
+
+ @staticmethod
+ PrettyPrintOptions Defaults()
CStatus PrettyPrint(const CArray& schema,
const PrettyPrintOptions& options,
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index d70a9be..f6a8b48 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -63,7 +63,10 @@ cdef class ChunkedArray(_PandasConvertible):
type_format = object.__repr__(self)
return '{0}\n{1}'.format(type_format, str(self))
- def format(self, int indent=0, int window=10):
+ def to_string(self, int indent=0, int window=10):
+ """
+ Render a "pretty-printed" string representation of the ChunkedArray
+ """
cdef:
c_string result
@@ -78,8 +81,14 @@ cdef class ChunkedArray(_PandasConvertible):
return frombytes(result)
+ def format(self, **kwargs):
+ import warnings
+ warnings.warn('ChunkedArray.format is deprecated, '
+ 'use ChunkedArray.to_string')
+ return self.to_string(**kwargs)
+
def __str__(self):
- return self.format()
+ return self.to_string()
def validate(self, *, full=False):
"""
@@ -542,8 +551,16 @@ cdef class RecordBatch(_PandasConvertible):
except TypeError:
return NotImplemented
+ def to_string(self, show_metadata=False):
+ # Use less verbose schema output.
+ schema_as_string = self.schema.to_string(
+ show_field_metadata=show_metadata,
+ show_schema_metadata=show_metadata
+ )
+ return 'pyarrow.{}\n{}'.format(type(self).__name__, schema_as_string)
+
def __repr__(self):
- return 'pyarrow.{}\n{}'.format(type(self).__name__, str(self.schema))
+ return self.to_string()
def validate(self, *, full=False):
"""
@@ -1013,11 +1030,19 @@ cdef class Table(_PandasConvertible):
raise TypeError("Do not call Table's constructor directly, use one of "
"the `Table.from_*` functions instead.")
+ def to_string(self, show_metadata=False):
+ # Use less verbose schema output.
+ schema_as_string = self.schema.to_string(
+ show_field_metadata=show_metadata,
+ show_schema_metadata=show_metadata
+ )
+ return 'pyarrow.{}\n{}'.format(type(self).__name__, schema_as_string)
+
def __repr__(self):
if self.table == NULL:
raise ValueError("Table's internal pointer is NULL, do not use "
"any methods or attributes on this object")
- return 'pyarrow.{}\n{}'.format(type(self).__name__, str(self.schema))
+ return self.to_string()
cdef void init(self, const shared_ptr[CTable]& table):
self.sp_table = table
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index e601021..d2b1bd2 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -245,17 +245,78 @@ baz: list<item: int8>
def test_schema_to_string_with_metadata():
+ lorem = """\
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
+turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
+eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
+litora torquent per conubia nostra, per inceptos himenaeos. Praesent
+faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
+turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
+dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
+pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
+sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
+sapien. Quisque pretium vestibulum urna eu vehicula."""
# ARROW-7063
my_schema = pa.schema([pa.field("foo", "int32", False,
- metadata={"key1": "value1"})],
- metadata={"key2": "value2"})
+ metadata={"key1": "value1"}),
+ pa.field("bar", "string", True,
+ metadata={"key3": "value3"})],
+ metadata={"key2": "value2",
+ "lorem": lorem})
- assert my_schema.to_string(show_metadata=True) == """\
+ assert my_schema.to_string() == """\
foo: int32 not null
- -- metadata --
- key1: value1
--- metadata --
-key2: value2"""
+ -- field metadata --
+ key1: 'value1'
+bar: string
+ -- field metadata --
+ key3: 'value3'
+-- schema metadata --
+key2: 'value2'
+lorem: '""" + lorem[:65] + "' + " + str(len(lorem) - 65)
+
+ # Metadata that exactly fits
+ result = pa.schema([('f0', 'int32')],
+ metadata={'key': 'value' + 'x' * 62}).to_string()
+ assert result == """\
+f0: int32
+-- schema metadata --
+key: 'valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'"""
+
+ assert my_schema.to_string(truncate_metadata=False) == """\
+foo: int32 not null
+ -- field metadata --
+ key1: 'value1'
+bar: string
+ -- field metadata --
+ key3: 'value3'
+-- schema metadata --
+key2: 'value2'
+lorem: '{}'""".format(lorem)
+
+ assert my_schema.to_string(truncate_metadata=False,
+ show_field_metadata=False) == """\
+foo: int32 not null
+bar: string
+-- schema metadata --
+key2: 'value2'
+lorem: '{}'""".format(lorem)
+
+ assert my_schema.to_string(truncate_metadata=False,
+ show_schema_metadata=False) == """\
+foo: int32 not null
+ -- field metadata --
+ key1: 'value1'
+bar: string
+ -- field metadata --
+ key3: 'value3'"""
+
+ assert my_schema.to_string(truncate_metadata=False,
+ show_field_metadata=False,
+ show_schema_metadata=False) == """\
+foo: int32 not null
+bar: string"""
def test_schema_from_tuples():
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ecd2ef5..c63852a 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -316,7 +316,8 @@ def test_recordbatch_basics():
batch[2]
# Schema passed explicitly
- schema = pa.schema([pa.field('c0', pa.int16()),
+ schema = pa.schema([pa.field('c0', pa.int16(),
+ metadata={'key': 'value'}),
pa.field('c1', pa.int32())],
metadata={b'foo': b'bar'})
batch = pa.record_batch(data, schema=schema)
@@ -324,9 +325,18 @@ def test_recordbatch_basics():
# schema as first positional argument
batch = pa.record_batch(data, schema)
assert batch.schema == schema
- assert (str(batch) == """pyarrow.RecordBatch
+ assert str(batch) == """pyarrow.RecordBatch
c0: int16
-c1: int32""")
+c1: int32"""
+
+ assert batch.to_string(show_metadata=True) == """\
+pyarrow.RecordBatch
+c0: int16
+ -- field metadata --
+ key: 'value'
+c1: int32
+-- schema metadata --
+foo: 'bar'"""
def test_recordbatch_equals():
@@ -1311,6 +1321,29 @@ def test_factory_functions_invalid_input():
pa.record_batch("invalid input")
+def test_table_repr_to_string():
+ # Schema passed explicitly
+ schema = pa.schema([pa.field('c0', pa.int16(),
+ metadata={'key': 'value'}),
+ pa.field('c1', pa.int32())],
+ metadata={b'foo': b'bar'})
+
+ tab = pa.table([pa.array([1, 2, 3, 4], type='int16'),
+ pa.array([1, 2, 3, 4], type='int32')], schema=schema)
+ assert str(tab) == """pyarrow.Table
+c0: int16
+c1: int32"""
+
+ assert tab.to_string(show_metadata=True) == """\
+pyarrow.Table
+c0: int16
+ -- field metadata --
+ key: 'value'
+c1: int32
+-- schema metadata --
+foo: 'bar'"""
+
+
def test_table_function_unicode_schema():
col_a = "äääh"
col_b = "öööf"
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 7af853e..fbb3393 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1390,15 +1390,20 @@ cdef class Schema:
new_schema = self.schema.RemoveMetadata()
return pyarrow_wrap_schema(new_schema)
- def to_string(self, bint show_metadata=False):
+ def to_string(self, truncate_metadata=True, show_field_metadata=True,
+ show_schema_metadata=True):
"""
Return human-readable representation of Schema
Parameters
----------
- show_metadata : boolean, default False
- If True, and there is non-empty metadata, it will be printed after
- the column names and types
+ truncate_metadata : boolean, default True
+ Limit metadata key/value display to a single line of ~80 characters
+ or less
+ show_field_metadata : boolean, default True
+ Display Field-level KeyValueMetadata
+ show_schema_metadata : boolean, default True
+ Display Schema-level KeyValueMetadata
Returns
-------
@@ -1406,11 +1411,14 @@ cdef class Schema:
"""
cdef:
c_string result
- PrettyPrintOptions options
+ PrettyPrintOptions options = PrettyPrintOptions.Defaults()
+
+ options.indent = 0
+ options.truncate_metadata = truncate_metadata
+ options.show_field_metadata = show_field_metadata
+ options.show_schema_metadata = show_schema_metadata
with nogil:
- options.indent = 0
- options.show_metadata = show_metadata
check_status(
PrettyPrint(
deref(self.schema),
@@ -1443,7 +1451,7 @@ cdef class Schema:
return pyarrow_wrap_schema(result)
def __str__(self):
- return self.to_string(show_metadata=False)
+ return self.to_string()
def __repr__(self):
return self.__str__()