You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/24 13:58:47 UTC
[arrow] branch master updated: ARROW-653: [Python / C++] Add
debugging function to print an array's buffer contents in hexadecimal
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 863b926 ARROW-653: [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal
863b926 is described below
commit 863b926b617d417161e7c8d0046263005cd5f603
Author: Anatoly Myachev <an...@intel.com>
AuthorDate: Fri May 24 08:58:37 2019 -0500
ARROW-653: [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal
Author: Anatoly Myachev <an...@intel.com>
Closes #4310 from anmyachev/print_func and squashes the following commits:
d3236d7be <Anatoly Myachev> added doc-strings
fea82b9e8 <Anatoly Myachev> added a byte > 128
ebdf126cf <Anatoly Myachev> changed: 'e' -> 'E' in expected hex string
c4bf288b9 <Anatoly Myachev> added test 'ToHexString' func in c++ part
0f472357d <Anatoly Myachev> removed unnecessary headers
dc9b58921 <Anatoly Myachev> added 'test_buffer_hex' in 'python/pyarrow/tests/test_io.py'
a2de6b21d <Anatoly Myachev> using HexEncode; renamed 'ToHexString' -> 'hex' in pyarrow.Buffer
7af18471e <Anatoly Myachev> renamed function 'print' -> 'ToHexString'; this func returns std::string now
7d945eb55 <Anatoly Myachev> added 'print' method to Buffer class
---
cpp/src/arrow/buffer-test.cc | 11 +++++++++++
cpp/src/arrow/buffer.cc | 5 +++++
cpp/src/arrow/buffer.h | 4 ++++
python/pyarrow/includes/libarrow.pxd | 1 +
python/pyarrow/io.pxi | 10 ++++++++++
python/pyarrow/tests/test_io.py | 9 +++++++++
6 files changed, 40 insertions(+)
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 9b0530e..6dc18f6 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -136,6 +136,17 @@ TEST(TestBuffer, Copy) {
ASSERT_EQ(0, memcmp(out->data() + out->size(), zeros.data(), zeros.size()));
}
+TEST(TestBuffer, ToHexString) {
+ const uint8_t data_array[] = "\a0hex string\xa9";
+ std::basic_string<uint8_t> data_str = data_array;
+
+ auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+ Buffer buf(data, data_str.size());
+
+ ASSERT_EQ(buf.ToHexString(), std::string("073068657820737472696E67A9"));
+}
+
TEST(TestBuffer, SliceBuffer) {
std::string data_str = "some data to slice";
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index e93333e..589d93d 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -25,6 +25,7 @@
#include "arrow/status.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/logging.h"
+#include "arrow/util/string.h"
namespace arrow {
@@ -48,6 +49,10 @@ Status Buffer::Copy(const int64_t start, const int64_t nbytes,
return Copy(start, nbytes, default_memory_pool(), out);
}
+std::string Buffer::ToHexString() {
+ return HexEncode(data(), static_cast<size_t>(size()));
+}
+
bool Buffer::Equals(const Buffer& other, const int64_t nbytes) const {
return this == &other || (size_ >= nbytes && other.size_ >= nbytes &&
(data_ == other.data_ ||
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 07b2f09..cd3032d 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -88,6 +88,10 @@ class ARROW_EXPORT Buffer {
bool is_mutable() const { return is_mutable_; }
+ /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
+ /// \return std::string
+ std::string ToHexString();
+
/// Return true if both buffers are the same size and contain the same bytes
/// up to the number of compared bytes
bool Equals(const Buffer& other, int64_t nbytes) const;
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 57dd965..6656e73 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -210,6 +210,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int64_t size()
shared_ptr[CBuffer] parent()
c_bool is_mutable() const
+ c_string ToHexString()
c_bool Equals(const CBuffer& other)
shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index f542b24..b9817d3 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -868,6 +868,16 @@ cdef class Buffer:
"""
return <uintptr_t> self.buffer.get().data()
+ def hex(self):
+ """
+ Compute hexadecimal representation of the buffer.
+
+ Returns
+ -------
+ : bytes
+ """
+ return self.buffer.get().ToHexString()
+
@property
def is_mutable(self):
"""
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 5f6ff44..1c87078 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -324,6 +324,15 @@ def test_buffer_invalid():
pa.py_buffer(None)
+@pytest.mark.parametrize('val, expected_hex_buffer',
+ [(b'check', b'636865636B'),
+ (b'\a0', b'0730'),
+ (b'', b'')])
+def test_buffer_hex(val, expected_hex_buffer):
+ buf = pa.py_buffer(val)
+ assert buf.hex() == expected_hex_buffer
+
+
def test_buffer_to_numpy():
# Make sure creating a numpy array from an arrow buffer works
byte_array = bytearray(20)