You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/03/08 20:18:47 UTC
[arrow] branch master updated: ARROW-4699: [C++] remove json
chunker's requirement of null terminated buffers
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4afd2ee ARROW-4699: [C++] remove json chunker's requirement of null terminated buffers
4afd2ee is described below
commit 4afd2eec89de5b9d07d291cab8f55bc3c8439e41
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri Mar 8 14:18:39 2019 -0600
ARROW-4699: [C++] remove json chunker's requirement of null terminated buffers
Re-enable json chunker test. [This was failing valgrind](https://travis-ci.org/apache/arrow/jobs/494039471#L2140) but that is not currently happening locally
Author: Benjamin Kietzman <be...@gmail.com>
Closes #3775 from bkietz/ARROW-4699-dont-require-null-terminated-buffers and squashes the following commits:
1a9d72c2 <Benjamin Kietzman> don't use in-situ parsing for test's PrettyPrint
bb642754 <Benjamin Kietzman> enable chunker-test, remove null terminated reqmt
---
cpp/src/arrow/json/CMakeLists.txt | 3 +--
cpp/src/arrow/json/chunker-test.cc | 7 ++++---
cpp/src/arrow/json/chunker.cc | 5 ++++-
cpp/src/arrow/json/chunker.h | 2 +-
4 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/cpp/src/arrow/json/CMakeLists.txt b/cpp/src/arrow/json/CMakeLists.txt
index a244b8c..a5ceb04 100644
--- a/cpp/src/arrow/json/CMakeLists.txt
+++ b/cpp/src/arrow/json/CMakeLists.txt
@@ -17,8 +17,7 @@
add_arrow_test(parser-test PREFIX "arrow-json")
-# TODO(wesm): ARROW-694 this fails valgrind
-# add_arrow_test(chunker-test PREFIX "arrow-json")
+add_arrow_test(chunker-test PREFIX "arrow-json")
add_arrow_benchmark(parser-benchmark PREFIX "arrow-json")
diff --git a/cpp/src/arrow/json/chunker-test.cc b/cpp/src/arrow/json/chunker-test.cc
index 36e2953..88fbc31 100644
--- a/cpp/src/arrow/json/chunker-test.cc
+++ b/cpp/src/arrow/json/chunker-test.cc
@@ -49,9 +49,9 @@ std::string join(Lines&& lines, std::string delimiter) {
return joined;
}
-std::string PrettyPrint(std::string one_line) {
+std::string PrettyPrint(string_view one_line) {
rapidjson::Document document;
- document.ParseInsitu(const_cast<char*>(one_line.data()));
+ document.Parse(one_line.data());
rapidjson::StringBuffer sb;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(sb);
document.Accept(writer);
@@ -172,7 +172,8 @@ TEST(ChunkerTest, PrettyPrinted) {
TEST(ChunkerTest, SingleLine) {
auto chunker = MakeChunker(true);
- AssertChunking(*chunker, join(lines(), ""), object_count);
+ auto single_line = join(lines(), "");
+ AssertChunking(*chunker, single_line, object_count);
}
TEST_P(BaseChunkerTest, Straddling) {
diff --git a/cpp/src/arrow/json/chunker.cc b/cpp/src/arrow/json/chunker.cc
index 02daa06..7b992df 100644
--- a/cpp/src/arrow/json/chunker.cc
+++ b/cpp/src/arrow/json/chunker.cc
@@ -167,7 +167,10 @@ class ParsingChunker : public Chunker {
}
std::size_t total_length = 0;
for (auto consumed = block;; consumed = block.substr(total_length)) {
- auto length = ConsumeWholeObject(rapidjson::StringStream(consumed.data()));
+ using rapidjson::MemoryStream;
+ MemoryStream ms(consumed.data(), consumed.size());
+ using InputStream = rapidjson::EncodedInputStream<rapidjson::UTF8<>, MemoryStream>;
+ auto length = ConsumeWholeObject(InputStream(ms));
if (length == string_view::npos || length == 0) {
// found incomplete object or consumed is empty
break;
diff --git a/cpp/src/arrow/json/chunker.h b/cpp/src/arrow/json/chunker.h
index 1e71dd4..a74abf2 100644
--- a/cpp/src/arrow/json/chunker.h
+++ b/cpp/src/arrow/json/chunker.h
@@ -40,7 +40,7 @@ class ARROW_EXPORT Chunker {
virtual ~Chunker() = default;
/// \brief Carve up a chunk in a block of data to contain only whole objects
- /// \param[in] block json data to be chunked, must end with '\0'
+ /// \param[in] block json data to be chunked
/// \param[out] chunked subrange of block containing whole json objects
virtual Status Process(util::string_view block, util::string_view* chunked) = 0;