You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2018/02/03 01:09:30 UTC

[4/4] kudu git commit: jsonwriter: do some internal buffering in front of ostringstream

jsonwriter: do some internal buffering in front of ostringstream

JsonWriter outputs into a std::ostringstream, and the RapidJSON
interface is such that we always append one character at a time.
ostringstream::put(char) is not inlined, so this is very slow.

This patch adds a small buffer in front of the ostringstream. We flush
the buffer on the end of any JSON array or object.

The patch adds a simple benchmark to show the improvement:

Before:
I0131 20:26:09.437477 16201 jsonwriter-test.cc:187] Throughput: 87.0644MB/sec

After:
I0131 20:24:17.889117 15929 jsonwriter-test.cc:187] Throughput: 154.192MB/sec

The remaining perf issues seem to be due to our use of protobuf
reflection.

Change-Id: Ib45d58187cf80ecfe2f5a25e40d7042e4a27619d
Reviewed-on: http://gerrit.cloudera.org:8080/9181
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wd...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2bff29b5
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2bff29b5
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2bff29b5

Branch: refs/heads/master
Commit: 2bff29b5949825515dfa827a98973a9dbdc32b86
Parents: afae5c7
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Jan 31 20:24:26 2018 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Sat Feb 3 01:08:49 2018 +0000

----------------------------------------------------------------------
 src/kudu/util/jsonwriter-test.cc | 72 ++++++++++++++++++++++++++---------
 src/kudu/util/jsonwriter.cc      | 26 +++++++++++--
 2 files changed, 77 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2bff29b5/src/kudu/util/jsonwriter-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/jsonwriter-test.cc b/src/kudu/util/jsonwriter-test.cc
index 9dc7e69..608d9c0 100644
--- a/src/kudu/util/jsonwriter-test.cc
+++ b/src/kudu/util/jsonwriter-test.cc
@@ -15,19 +15,48 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <ostream>
+#include <stdint.h>
+#include <string>
+
 #include <gflags/gflags.h>
+#include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/jsonwriter.h"
 #include "kudu/util/jsonwriter_test.pb.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/test_util.h"
 
 using jsonwriter_test::TestAllTypes;
 
 namespace kudu {
 
-class TestJsonWriter : public KuduTest {};
+class TestJsonWriter : public KuduTest {
+ protected:
+  TestAllTypes MakeAllTypesPB() {
+    TestAllTypes pb;
+    pb.set_optional_int32(1);
+    pb.set_optional_int64(2);
+    pb.set_optional_uint32(3);
+    pb.set_optional_uint64(4);
+    pb.set_optional_sint32(5);
+    pb.set_optional_sint64(6);
+    pb.set_optional_fixed32(7);
+    pb.set_optional_fixed64(8);
+    pb.set_optional_sfixed32(9);
+    pb.set_optional_sfixed64(10);
+    pb.set_optional_float(11);
+    pb.set_optional_double(12);
+    pb.set_optional_bool(true);
+    pb.set_optional_string("hello world");
+    pb.set_optional_redacted_string("secret!");
+    pb.set_optional_nested_enum(TestAllTypes::FOO);
+    return pb;
+  }
+
+};
 
 TEST_F(TestJsonWriter, TestPBEmpty) {
   TestAllTypes pb;
@@ -36,23 +65,8 @@ TEST_F(TestJsonWriter, TestPBEmpty) {
 
 TEST_F(TestJsonWriter, TestPBAllFieldTypes) {
   ASSERT_NE("", gflags::SetCommandLineOption("redact", "log"));
-  TestAllTypes pb;
-  pb.set_optional_int32(1);
-  pb.set_optional_int64(2);
-  pb.set_optional_uint32(3);
-  pb.set_optional_uint64(4);
-  pb.set_optional_sint32(5);
-  pb.set_optional_sint64(6);
-  pb.set_optional_fixed32(7);
-  pb.set_optional_fixed64(8);
-  pb.set_optional_sfixed32(9);
-  pb.set_optional_sfixed64(10);
-  pb.set_optional_float(11);
-  pb.set_optional_double(12);
-  pb.set_optional_bool(true);
-  pb.set_optional_string("hello world");
-  pb.set_optional_redacted_string("secret!");
-  pb.set_optional_nested_enum(TestAllTypes::FOO);
+  TestAllTypes pb = MakeAllTypesPB();
+
   ASSERT_EQ("{\n"
             "    \"optional_int32\": 1,\n"
             "    \"optional_int64\": 2,\n"
@@ -156,4 +170,26 @@ TEST_F(TestJsonWriter, TestPBNestedMessage) {
             JsonWriter::ToJson(pb, JsonWriter::COMPACT));
 }
 
+TEST_F(TestJsonWriter, Benchmark) {
+  TestAllTypes pb = MakeAllTypesPB();
+
+  int64_t total_len = 0;
+  Stopwatch sw;
+  sw.start();
+  while (sw.elapsed().wall_seconds() < 5) {
+    std::ostringstream str;
+    JsonWriter jw(&str, JsonWriter::COMPACT);
+    jw.StartArray();
+    for (int i = 0; i < 10000; i++) {
+      jw.Protobuf(pb);
+    }
+    jw.EndArray();
+    total_len += str.str().size();
+  }
+  sw.stop();
+  double mbps = total_len / 1024.0 / 1024.0 / sw.elapsed().user_cpu_seconds();
+  LOG(INFO) << "Throughput: " << mbps << "MB/sec";
+}
+
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2bff29b5/src/kudu/util/jsonwriter.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/jsonwriter.cc b/src/kudu/util/jsonwriter.cc
index d2f1f65..f194d5b 100644
--- a/src/kudu/util/jsonwriter.cc
+++ b/src/kudu/util/jsonwriter.cc
@@ -31,6 +31,7 @@
 #include <rapidjson/rapidjson.h>
 
 #include "kudu/gutil/port.h"
+#include "kudu/util/faststring.h"
 #include "kudu/util/logging.h"
 #include "kudu/util/pb_util.pb.h"
 
@@ -49,8 +50,13 @@ namespace kudu {
 class UTF8StringStreamBuffer {
  public:
   explicit UTF8StringStreamBuffer(std::ostringstream* out);
+  ~UTF8StringStreamBuffer();
   void Put(rapidjson::UTF8<>::Ch c);
+
+  void Flush();
+
  private:
+  faststring buf_;
   std::ostringstream* out_;
 };
 
@@ -285,9 +291,17 @@ string JsonWriter::ToJson(const Message& pb, Mode mode) {
 UTF8StringStreamBuffer::UTF8StringStreamBuffer(std::ostringstream* out)
   : out_(DCHECK_NOTNULL(out)) {
 }
+UTF8StringStreamBuffer::~UTF8StringStreamBuffer() {
+  DCHECK_EQ(buf_.size(), 0) << "Forgot to flush!";
+}
 
 void UTF8StringStreamBuffer::Put(rapidjson::UTF8<>::Ch c) {
-  out_->put(c);
+  buf_.push_back(c);
+}
+
+void UTF8StringStreamBuffer::Flush() {
+  out_->write(reinterpret_cast<char*>(buf_.data()), buf_.size());
+  buf_.clear();
 }
 
 //
@@ -322,10 +336,16 @@ void JsonWriterImpl<T>::String(const string& str) { writer_.String(str.c_str(),
 template<class T>
 void JsonWriterImpl<T>::StartObject() { writer_.StartObject(); }
 template<class T>
-void JsonWriterImpl<T>::EndObject() { writer_.EndObject(); }
+void JsonWriterImpl<T>::EndObject() {
+  writer_.EndObject();
+  stream_.Flush();
+}
 template<class T>
 void JsonWriterImpl<T>::StartArray() { writer_.StartArray(); }
 template<class T>
-void JsonWriterImpl<T>::EndArray() { writer_.EndArray(); }
+void JsonWriterImpl<T>::EndArray() {
+  writer_.EndArray();
+  stream_.Flush();
+}
 
 } // namespace kudu