You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/24 13:58:47 UTC

[arrow] branch master updated: ARROW-653: [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 863b926  ARROW-653: [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal
863b926 is described below

commit 863b926b617d417161e7c8d0046263005cd5f603
Author: Anatoly Myachev <an...@intel.com>
AuthorDate: Fri May 24 08:58:37 2019 -0500

    ARROW-653: [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal
    
    Author: Anatoly Myachev <an...@intel.com>
    
    Closes #4310 from anmyachev/print_func and squashes the following commits:
    
    d3236d7be <Anatoly Myachev> added doc-strings
    fea82b9e8 <Anatoly Myachev> added a byte > 128
    ebdf126cf <Anatoly Myachev> changed: 'e' -> 'E' in expected hex string
    c4bf288b9 <Anatoly Myachev> added test 'ToHexString' func in c++ part
    0f472357d <Anatoly Myachev> removed unnecessary headers
    dc9b58921 <Anatoly Myachev> added 'test_buffer_hex' in 'python/pyarrow/tests/test_io.py'
    a2de6b21d <Anatoly Myachev> using HexEncode; renamed 'ToHexString' -> 'hex' in pyarrow.Buffer
    7af18471e <Anatoly Myachev> renamed function 'print' -> 'ToHexString'; this func returns std::string now
    7d945eb55 <Anatoly Myachev> added 'print' method to Buffer class
---
 cpp/src/arrow/buffer-test.cc         | 11 +++++++++++
 cpp/src/arrow/buffer.cc              |  5 +++++
 cpp/src/arrow/buffer.h               |  4 ++++
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/io.pxi                | 10 ++++++++++
 python/pyarrow/tests/test_io.py      |  9 +++++++++
 6 files changed, 40 insertions(+)

diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 9b0530e..6dc18f6 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -136,6 +136,17 @@ TEST(TestBuffer, Copy) {
   ASSERT_EQ(0, memcmp(out->data() + out->size(), zeros.data(), zeros.size()));
 }
 
+TEST(TestBuffer, ToHexString) {
+  const uint8_t data_array[] = "\a0hex string\xa9";
+  std::basic_string<uint8_t> data_str = data_array;
+
+  auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+  Buffer buf(data, data_str.size());
+
+  ASSERT_EQ(buf.ToHexString(), std::string("073068657820737472696E67A9"));
+}
+
 TEST(TestBuffer, SliceBuffer) {
   std::string data_str = "some data to slice";
 
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index e93333e..589d93d 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -25,6 +25,7 @@
 #include "arrow/status.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/string.h"
 
 namespace arrow {
 
@@ -48,6 +49,10 @@ Status Buffer::Copy(const int64_t start, const int64_t nbytes,
   return Copy(start, nbytes, default_memory_pool(), out);
 }
 
+std::string Buffer::ToHexString() {
+  return HexEncode(data(), static_cast<size_t>(size()));
+}
+
 bool Buffer::Equals(const Buffer& other, const int64_t nbytes) const {
   return this == &other || (size_ >= nbytes && other.size_ >= nbytes &&
                             (data_ == other.data_ ||
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 07b2f09..cd3032d 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -88,6 +88,10 @@ class ARROW_EXPORT Buffer {
 
   bool is_mutable() const { return is_mutable_; }
 
+  /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
+  /// \return std::string
+  std::string ToHexString();
+
   /// Return true if both buffers are the same size and contain the same bytes
   /// up to the number of compared bytes
   bool Equals(const Buffer& other, int64_t nbytes) const;
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 57dd965..6656e73 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -210,6 +210,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t size()
         shared_ptr[CBuffer] parent()
         c_bool is_mutable() const
+        c_string ToHexString()
         c_bool Equals(const CBuffer& other)
 
     shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index f542b24..b9817d3 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -868,6 +868,16 @@ cdef class Buffer:
         """
         return <uintptr_t> self.buffer.get().data()
 
+    def hex(self):
+        """
+        Compute hexadecimal representation of the buffer.
+
+        Returns
+        -------
+        : bytes
+        """
+        return self.buffer.get().ToHexString()
+
     @property
     def is_mutable(self):
         """
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 5f6ff44..1c87078 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -324,6 +324,15 @@ def test_buffer_invalid():
         pa.py_buffer(None)
 
 
+@pytest.mark.parametrize('val, expected_hex_buffer',
+                         [(b'check', b'636865636B'),
+                          (b'\a0', b'0730'),
+                          (b'', b'')])
+def test_buffer_hex(val, expected_hex_buffer):
+    buf = pa.py_buffer(val)
+    assert buf.hex() == expected_hex_buffer
+
+
 def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)