You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/03/08 20:18:47 UTC

[arrow] branch master updated: ARROW-4699: [C++] remove json chunker's requirement of null terminated buffers

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4afd2ee  ARROW-4699: [C++] remove json chunker's requirement of null terminated buffers
4afd2ee is described below

commit 4afd2eec89de5b9d07d291cab8f55bc3c8439e41
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri Mar 8 14:18:39 2019 -0600

    ARROW-4699: [C++] remove json chunker's requirement of null terminated buffers
    
    Re-enable json chunker test. [This was failing valgrind](https://travis-ci.org/apache/arrow/jobs/494039471#L2140) but that is not currently happening locally
    
    Author: Benjamin Kietzman <be...@gmail.com>
    
    Closes #3775 from bkietz/ARROW-4699-dont-require-null-terminated-buffers and squashes the following commits:
    
    1a9d72c2 <Benjamin Kietzman> don't use in-situ parsing for test's PrettyPrint
    bb642754 <Benjamin Kietzman> enable chunker-test, remove null terminated reqmt
---
 cpp/src/arrow/json/CMakeLists.txt  | 3 +--
 cpp/src/arrow/json/chunker-test.cc | 7 ++++---
 cpp/src/arrow/json/chunker.cc      | 5 ++++-
 cpp/src/arrow/json/chunker.h       | 2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/json/CMakeLists.txt b/cpp/src/arrow/json/CMakeLists.txt
index a244b8c..a5ceb04 100644
--- a/cpp/src/arrow/json/CMakeLists.txt
+++ b/cpp/src/arrow/json/CMakeLists.txt
@@ -17,8 +17,7 @@
 
 add_arrow_test(parser-test PREFIX "arrow-json")
 
-# TODO(wesm): ARROW-694 this fails valgrind
-# add_arrow_test(chunker-test PREFIX "arrow-json")
+add_arrow_test(chunker-test PREFIX "arrow-json")
 
 add_arrow_benchmark(parser-benchmark PREFIX "arrow-json")
 
diff --git a/cpp/src/arrow/json/chunker-test.cc b/cpp/src/arrow/json/chunker-test.cc
index 36e2953..88fbc31 100644
--- a/cpp/src/arrow/json/chunker-test.cc
+++ b/cpp/src/arrow/json/chunker-test.cc
@@ -49,9 +49,9 @@ std::string join(Lines&& lines, std::string delimiter) {
   return joined;
 }
 
-std::string PrettyPrint(std::string one_line) {
+std::string PrettyPrint(string_view one_line) {
   rapidjson::Document document;
-  document.ParseInsitu(const_cast<char*>(one_line.data()));
+  document.Parse(one_line.data());
   rapidjson::StringBuffer sb;
   rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(sb);
   document.Accept(writer);
@@ -172,7 +172,8 @@ TEST(ChunkerTest, PrettyPrinted) {
 
 TEST(ChunkerTest, SingleLine) {
   auto chunker = MakeChunker(true);
-  AssertChunking(*chunker, join(lines(), ""), object_count);
+  auto single_line = join(lines(), "");
+  AssertChunking(*chunker, single_line, object_count);
 }
 
 TEST_P(BaseChunkerTest, Straddling) {
diff --git a/cpp/src/arrow/json/chunker.cc b/cpp/src/arrow/json/chunker.cc
index 02daa06..7b992df 100644
--- a/cpp/src/arrow/json/chunker.cc
+++ b/cpp/src/arrow/json/chunker.cc
@@ -167,7 +167,10 @@ class ParsingChunker : public Chunker {
     }
     std::size_t total_length = 0;
     for (auto consumed = block;; consumed = block.substr(total_length)) {
-      auto length = ConsumeWholeObject(rapidjson::StringStream(consumed.data()));
+      using rapidjson::MemoryStream;
+      MemoryStream ms(consumed.data(), consumed.size());
+      using InputStream = rapidjson::EncodedInputStream<rapidjson::UTF8<>, MemoryStream>;
+      auto length = ConsumeWholeObject(InputStream(ms));
       if (length == string_view::npos || length == 0) {
         // found incomplete object or consumed is empty
         break;
diff --git a/cpp/src/arrow/json/chunker.h b/cpp/src/arrow/json/chunker.h
index 1e71dd4..a74abf2 100644
--- a/cpp/src/arrow/json/chunker.h
+++ b/cpp/src/arrow/json/chunker.h
@@ -40,7 +40,7 @@ class ARROW_EXPORT Chunker {
   virtual ~Chunker() = default;
 
   /// \brief Carve up a chunk in a block of data to contain only whole objects
-  /// \param[in] block json data to be chunked, must end with '\0'
+  /// \param[in] block json data to be chunked
   /// \param[out] chunked subrange of block containing whole json objects
   virtual Status Process(util::string_view block, util::string_view* chunked) = 0;