You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/07/25 03:24:20 UTC

[04/10] mesos git commit: Improved performance of jsonify by integrating with rapidjson.

Improved performance of jsonify by integrating with rapidjson.

This reduces the time needed for the client to finish receiving a
master's /state response by 50% in the `StateQuery` benchmark:

                    min    q1    q3   max
baseline           6.52  6.76  7.33  8.26
rapidjson w/ SIMD  3.48  3.54  4.12  4.4
rapidjson          3.29  3.32  3.65  3.85

SIMD is left disabled for now since it showed slightly slower
results.

Review: https://reviews.apache.org/r/67988


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c23bb29a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c23bb29a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c23bb29a

Branch: refs/heads/master
Commit: c23bb29aa83daedb58288cb3276a565744a2b757
Parents: 135545f
Author: Benjamin Mahler <bm...@apache.org>
Authored: Thu Jul 19 14:35:01 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Jul 24 18:33:22 2018 -0700

----------------------------------------------------------------------
 3rdparty/stout/include/stout/jsonify.hpp | 358 ++++++++++----------------
 1 file changed, 140 insertions(+), 218 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c23bb29a/3rdparty/stout/include/stout/jsonify.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/stout/include/stout/jsonify.hpp b/3rdparty/stout/include/stout/jsonify.hpp
index 2314980..e5402b7 100644
--- a/3rdparty/stout/include/stout/jsonify.hpp
+++ b/3rdparty/stout/include/stout/jsonify.hpp
@@ -13,15 +13,32 @@
 #ifndef __STOUT_JSONIFY__
 #define __STOUT_JSONIFY__
 
-#ifndef __WINDOWS__
-#include <locale.h>
-#endif // __WINDOWS__
+#define RAPIDJSON_HAS_STDSTRING 1
+
+// TODO(bmahler): Consider enabling UTF-8 validation when writing
+// json. Prior to the introduction of rapidjson, we performed no
+// validation, so we maintain this status quo for now.
+//
+// #define RAPIDJSON_WRITE_DEFAULT_FLAGS 1 // kWriteValidateEncodingFlag
+
+// TODO(bmahler): Consider enabling SIMD for rapidjson, unfortunately
+// it showed slightly slower results on the serialization path when
+// benchmarked so I've left it disabled.
+//
+// #if defined(__SSE4_2__)
+// #  define RAPIDJSON_SSE42
+// #elif defined(__SSE2__)
+// #  define RAPIDJSON_SSE2
+// #elif defined(_MSC_VER) // Turn on SSE4.2 for VC
+// #  define RAPIDJSON_SSE42
+// #endif
+
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
 
-#include <clocale>
 #include <cstddef>
 #include <functional>
 #include <ostream>
-#include <sstream>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -57,67 +74,6 @@ JSON::Proxy jsonify(const T&);
 
 namespace JSON {
 
-namespace internal {
-
-/**
- * This object changes the current thread's locale to the default "C"
- * locale for number printing purposes. This prevents, for example,
- * commas from appearing in printed numbers instead of decimal points.
- *
- * NOTE: This object should only be used to guard synchronous code.
- * If multiple blocks of code need to enforce the default locale,
- * each block should utilize this object.
- */
-// TODO(josephw): Consider pulling this helper into a separate header.
-class ClassicLocale
-{
-#ifdef __WINDOWS__
-public:
-  ClassicLocale()
-  {
-    // We will only change the locale for this thread
-    // and save the previous state of the thread's locale.
-    original_per_thread_ = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
-
-    // NOTE: We must make a copy of the return value as it points
-    // to global or shared memory. Future calls to `setlocale` will
-    // invalidate the memory location.
-    original_locale_ = setlocale(LC_NUMERIC, "C");
-  }
-
-  ~ClassicLocale()
-  {
-    setlocale(LC_NUMERIC, original_locale_.c_str());
-    _configthreadlocale(original_per_thread_);
-  }
-
-private:
-  int original_per_thread_;
-  std::string original_locale_;
-#else
-public:
-  ClassicLocale()
-  {
-    c_locale_ = newlocale(LC_NUMERIC_MASK, "C", nullptr);
-    original_locale_ = uselocale(c_locale_);
-  }
-
-  ~ClassicLocale()
-  {
-    uselocale(original_locale_);
-    CHECK(c_locale_ != 0);
-    freelocale(c_locale_);
-  }
-
-private:
-  locale_t original_locale_;
-  locale_t c_locale_;
-#endif // __WINDOWS__
-};
-
-} // namespace internal {
-
-
 // The result of `jsonify`. This is a light-weight proxy object that can either
 // be implicitly converted to a `std::string`, or directly inserted into an
 // output stream.
@@ -131,16 +87,17 @@ class Proxy
 public:
   operator std::string() &&
   {
-    // Needed to set C locale and therefore creating proper JSON output.
-    internal::ClassicLocale guard;
+    rapidjson::StringBuffer buffer;
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+
+    write(&writer);
 
-    std::ostringstream stream;
-    stream << std::move(*this);
-    return stream.str();
+    return {buffer.GetString(), buffer.GetSize()};
   }
 
 private:
-  Proxy(std::function<void(std::ostream*)> write) : write_(std::move(write)) {}
+  Proxy(std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> write)
+    : write(std::move(write)) {}
 
   // We declare copy/move constructors `private` to prevent statements that try
   // to "save" an instance of `Proxy` such as:
@@ -155,36 +112,37 @@ private:
   Proxy(const Proxy&) = default;
   Proxy(Proxy&&) = default;
 
-  std::function<void(std::ostream*)> write_;
-
   template <typename T>
   friend Proxy (::jsonify)(const T&);
 
   friend std::ostream& operator<<(std::ostream& stream, Proxy&& that);
+
+public:
+  // This is public in order to enable the `ObjectWriter` and `ArrayWriter`
+  // to continue writing to the same writer.
+  std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> write;
 };
 
 
 inline std::ostream& operator<<(std::ostream& stream, Proxy&& that)
 {
-  // Needed to set C locale and therefore creating proper JSON output.
-  internal::ClassicLocale guard;
-
-  that.write_(&stream);
-  return stream;
+  return stream << std::string(std::move(that));
 }
 
 
-// The boolean writer. If `set` is not called at all, `false` is printed.
-// If `set` is called more than once, only the last value is printed out.
+// The boolean writer. If `set` is not called at all, a false value is
+// written. If `set` is called more than once, only the last value is
+// written.
 class BooleanWriter
 {
 public:
-  BooleanWriter(std::ostream* stream) : stream_(stream), value_(false) {}
+  BooleanWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer), value_(false) {}
 
   BooleanWriter(const BooleanWriter&) = delete;
   BooleanWriter(BooleanWriter&&) = delete;
 
-  ~BooleanWriter() { *stream_ << (value_ ? "true" : "false"); }
+  ~BooleanWriter() { CHECK(writer_->Bool(value_)); }
 
   BooleanWriter& operator=(const BooleanWriter&) = delete;
   BooleanWriter& operator=(BooleanWriter&&) = delete;
@@ -192,18 +150,18 @@ public:
   void set(bool value) { value_ = value; }
 
 private:
-  std::ostream* stream_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
   bool value_;
 };
 
 
-// The number writer. If `set` is not called at all, `0` is printed.
-// If `set` is called more than once, only the last value is printed.
+// The number writer. If `set` is not called at all, `0` is written.
+// If `set` is called more than once, only the last value is written.
 class NumberWriter
 {
 public:
-  NumberWriter(std::ostream* stream)
-    : stream_(stream), type_(INT), int_(0) {}
+  NumberWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer), type_(INT), int_(0) {}
 
   NumberWriter(const NumberWriter&) = delete;
   NumberWriter(NumberWriter&&) = delete;
@@ -211,43 +169,9 @@ public:
   ~NumberWriter()
   {
     switch (type_) {
-      case INT: {
-        *stream_ << int_;
-        break;
-      }
-      case UINT: {
-        *stream_ << uint_;
-        break;
-      }
-      case DOUBLE: {
-        // Prints a floating point value, with the specified precision, see:
-        // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2005.pdf
-        // Additionally ensures that a decimal point is in the output.
-        char buffer[50]; // More than enough for the specified precision.
-        const int size = snprintf(
-            buffer,
-            sizeof(buffer),
-            "%#.*g",
-            std::numeric_limits<double>::digits10,
-            double_);
-
-        // Get rid of excess trailing zeroes before outputting.
-        // Otherwise, printing 1.0 would result in "1.00000000000000".
-        //
-        // NOTE: We intentionally do not use `strings::trim` here in order to
-        // avoid construction of temporary strings.
-        int back = size - 1;
-        for (; back > 0; --back) {
-          if (buffer[back] != '0') {
-            break;
-          }
-          buffer[back] = '\0';
-        }
-
-        // NOTE: valid JSON numbers cannot end with a '.'.
-        *stream_ << buffer << (buffer[back] == '.' ? "0" : "");
-        break;
-      }
+      case INT:    CHECK(writer_->Int64(int_));     break;
+      case UINT:   CHECK(writer_->Uint64(uint_));   break;
+      case DOUBLE: CHECK(writer_->Double(double_)); break;
     }
   }
 
@@ -311,7 +235,7 @@ public:
   }
 
 private:
-  std::ostream* stream_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
 
   enum { INT, UINT, DOUBLE } type_;
 
@@ -324,109 +248,96 @@ private:
 };
 
 
-// The string writer. `append` is used to append a character or a string.
-// If `append` is not called at all, `""` is printed.
+// The string writer. `set` is used to write a string and must only
+// be called once. If `set` is not called at all, an empty JSON
+// string is written.
 class StringWriter
 {
 public:
-  StringWriter(std::ostream* stream) : stream_(stream) { *stream_ << '"'; }
+  StringWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer), empty_(true) {}
 
   StringWriter(const StringWriter&) = delete;
   StringWriter(StringWriter&&) = delete;
 
-  ~StringWriter() { *stream_ << '"'; }
+  ~StringWriter() { if (empty_) { CHECK(writer_->String("")); } }
 
   StringWriter& operator=(const StringWriter&) = delete;
   StringWriter& operator=(StringWriter&&) = delete;
 
-  void append(char c)
+  template <std::size_t N>
+  void set(const char (&value)[N])
   {
-    switch (c) {
-      case '"' : *stream_ << "\\\""; break;
-      case '\\': *stream_ << "\\\\"; break;
-      case '/' : *stream_ << "\\/"; break;
-      case '\b': *stream_ << "\\b"; break;
-      case '\f': *stream_ << "\\f"; break;
-      case '\n': *stream_ << "\\n"; break;
-      case '\r': *stream_ << "\\r"; break;
-      case '\t': *stream_ << "\\t"; break;
-      default: {
-        if (static_cast<unsigned char>(c) < 0x20 || c == 0x7f) {
-          char buffer[7];
-          snprintf(buffer, sizeof(buffer), "\\u%04x", c & 0xff);
-          stream_->write(buffer, sizeof(buffer) - 1);
-        } else {
-          *stream_ << c;
-        }
-        break;
-      }
-    }
-  }
+    empty_ = false;
 
-  template <std::size_t N>
-  void append(const char (&value)[N]) { append(value, N - 1); }
-  void append(const std::string& value) { append(value.data(), value.size()); }
+    // This check will fail if we enable write validation in rapidjson;
+    // we'll need to figure out a way to surface the error.
+    CHECK(writer_->String(value, N-1));
+  }
 
-private:
-  void append(const char* value, std::size_t size)
+  void set(const std::string& value)
   {
-    for (std::size_t i = 0; i < size; ++i) {
-      append(value[i]);
-    }
+    empty_ = false;
+
+    // This check will fail if we enable write validation in rapidjson;
+    // we'll need to figure out a way to surface the error.
+    CHECK(writer_->String(value));
   }
 
-  std::ostream* stream_;
+private:
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
+  bool empty_;
 };
 
 
 // The array writer. `element(value)` is used to write a new element.
-// If `element` is not called at all, `[]` is printed.
+// If `element` is not called at all, an empty JSON array is written.
 class ArrayWriter
 {
 public:
-  ArrayWriter(std::ostream* stream) : stream_(stream), count_(0)
+  ArrayWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer)
   {
-    *stream_ << '[';
+    CHECK(writer_->StartArray());
   }
 
   ArrayWriter(const ArrayWriter&) = delete;
   ArrayWriter(ArrayWriter&&) = delete;
 
-  ~ArrayWriter() { *stream_ << ']'; }
+  ~ArrayWriter()
+  {
+    CHECK(writer_->EndArray());
+  }
 
   ArrayWriter& operator=(const ArrayWriter&) = delete;
   ArrayWriter& operator=(ArrayWriter&&) = delete;
 
   template <typename T>
-  void element(const T& value)
-  {
-    if (count_ > 0) {
-      *stream_ << ',';
-    }
-    *stream_ << jsonify(value);
-    ++count_;
-  }
+  void element(const T& value) { jsonify(value).write(writer_); }
 
 private:
-  std::ostream* stream_;
-  std::size_t count_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
 };
 
 
 // The object writer. `field(key, value)` is used to write a new field.
-// If `field` is not called at all, `{}` is printed.
+// If `field` is not called at all, an empty JSON object is written.
 class ObjectWriter
 {
 public:
-  ObjectWriter(std::ostream* stream) : stream_(stream), count_(0)
+  ObjectWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer)
   {
-    *stream_ << '{';
+    CHECK(writer_->StartObject());
   }
 
   ObjectWriter(const ObjectWriter&) = delete;
   ObjectWriter(ObjectWriter&&) = delete;
 
-  ~ObjectWriter() { *stream_ << '}'; }
+  ~ObjectWriter()
+  {
+    CHECK(writer_->EndObject());
+  }
 
   ObjectWriter& operator=(const ObjectWriter&) = delete;
   ObjectWriter& operator=(ObjectWriter&&) = delete;
@@ -434,34 +345,37 @@ public:
   template <typename T>
   void field(const std::string& key, const T& value)
   {
-    if (count_ > 0) {
-      *stream_ << ',';
-    }
-    *stream_ << jsonify(key) << ':' << jsonify(value);
-    ++count_;
+    // This check will fail we enable write validation in rapidjson;
+    // we'll need to figure out a way to surface the error.
+    //
+    // TODO(bmahler): The 1.1.0 release of rapidjson did not
+    // yet have the std::string overload for `Key`, avoid calling
+    // `c_str()` and `size()` when we upgrade beyond 1.1.0.
+    CHECK(writer_->Key(key.c_str(), key.size()));
+    jsonify(value).write(writer_);
   }
 
 private:
-  std::ostream* stream_;
-  std::size_t count_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
 };
 
 
 class NullWriter
 {
 public:
-  NullWriter(std::ostream* stream) : stream_(stream) {}
+  NullWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer) {}
 
   NullWriter(const NullWriter&) = delete;
   NullWriter(NullWriter&&) = delete;
 
-  ~NullWriter() { *stream_ << "null"; }
+  ~NullWriter() { CHECK(writer_->Null()); }
 
   NullWriter& operator=(const NullWriter&) = delete;
   NullWriter& operator=(NullWriter&&) = delete;
 
 private:
-  std::ostream* stream_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
 };
 
 
@@ -514,13 +428,13 @@ inline void json(NumberWriter* writer, double value) { writer->set(value); }
 template <std::size_t N>
 void json(StringWriter* writer, const char (&value)[N])
 {
-  writer->append(value);
+  writer->set(value);
 }
 
 
 inline void json(StringWriter* writer, const std::string& value)
 {
-  writer->append(value);
+  writer->set(value);
 }
 
 namespace internal {
@@ -637,38 +551,39 @@ void json(ObjectWriter* writer, const Dictionary& dictionary)
 //
 // The goal is to perform overload resolution based on the second parameter.
 // Since `WriterProxy` is convertible to any of the writers equivalently, we
-// force overload resolution of `json(WriterProxy(stream), value)` to depend
+// force overload resolution of `json(WriterProxy(writer), value)` to depend
 // only on the second parameter.
 class WriterProxy
 {
 public:
-  WriterProxy(std::ostream* stream) : stream_(stream) {}
+  WriterProxy(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+    : writer_(writer) {}
 
   ~WriterProxy()
   {
     switch (type_) {
       case BOOLEAN_WRITER: {
-        writer_.boolean_writer.~BooleanWriter();
+        proxy_.boolean_writer.~BooleanWriter();
         break;
       }
       case NUMBER_WRITER: {
-        writer_.number_writer.~NumberWriter();
+        proxy_.number_writer.~NumberWriter();
         break;
       }
       case STRING_WRITER: {
-        writer_.string_writer.~StringWriter();
+        proxy_.string_writer.~StringWriter();
         break;
       }
       case ARRAY_WRITER: {
-        writer_.array_writer.~ArrayWriter();
+        proxy_.array_writer.~ArrayWriter();
         break;
       }
       case OBJECT_WRITER: {
-        writer_.object_writer.~ObjectWriter();
+        proxy_.object_writer.~ObjectWriter();
         break;
       }
       case NULL_WRITER: {
-        writer_.null_writer.~NullWriter();
+        proxy_.null_writer.~NullWriter();
         break;
       }
     }
@@ -676,44 +591,44 @@ public:
 
   operator BooleanWriter*() &&
   {
-    new (&writer_.boolean_writer) BooleanWriter(stream_);
+    new (&proxy_.boolean_writer) BooleanWriter(writer_);
     type_ = BOOLEAN_WRITER;
-    return &writer_.boolean_writer;
+    return &proxy_.boolean_writer;
   }
 
   operator NumberWriter*() &&
   {
-    new (&writer_.number_writer) NumberWriter(stream_);
+    new (&proxy_.number_writer) NumberWriter(writer_);
     type_ = NUMBER_WRITER;
-    return &writer_.number_writer;
+    return &proxy_.number_writer;
   }
 
   operator StringWriter*() &&
   {
-    new (&writer_.string_writer) StringWriter(stream_);
+    new (&proxy_.string_writer) StringWriter(writer_);
     type_ = STRING_WRITER;
-    return &writer_.string_writer;
+    return &proxy_.string_writer;
   }
 
   operator ArrayWriter*() &&
   {
-    new (&writer_.array_writer) ArrayWriter(stream_);
+    new (&proxy_.array_writer) ArrayWriter(writer_);
     type_ = ARRAY_WRITER;
-    return &writer_.array_writer;
+    return &proxy_.array_writer;
   }
 
   operator ObjectWriter*() &&
   {
-    new (&writer_.object_writer) ObjectWriter(stream_);
+    new (&proxy_.object_writer) ObjectWriter(writer_);
     type_ = OBJECT_WRITER;
-    return &writer_.object_writer;
+    return &proxy_.object_writer;
   }
 
   operator NullWriter*() &&
   {
-    new (&writer_.null_writer) NullWriter(stream_);
+    new (&proxy_.null_writer) NullWriter(writer_);
     type_ = NULL_WRITER;
-    return &writer_.null_writer;
+    return &proxy_.null_writer;
   }
 
 private:
@@ -739,11 +654,12 @@ private:
     NullWriter null_writer;
   };
 
-  std::ostream* stream_;
+  rapidjson::Writer<rapidjson::StringBuffer>* writer_;
   Type type_;
-  Writer writer_;
+  Writer proxy_;
 };
 
+
 namespace internal {
 
 // NOTE: The following overloads of `internal::jsonify` return a `std::function`
@@ -754,9 +670,13 @@ namespace internal {
 
 // Given an `F` which is a "write" function, we simply use it directly.
 template <typename F, typename = typename result_of<F(WriterProxy)>::type>
-std::function<void(std::ostream*)> jsonify(const F& write, Prefer)
+std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> jsonify(
+    const F& write,
+    Prefer)
 {
-  return [&write](std::ostream* stream) { write(WriterProxy(stream)); };
+  return [&write](rapidjson::Writer<rapidjson::StringBuffer>* writer) {
+      write(WriterProxy(writer));
+  };
 }
 
 // Given a `T` which is not a "write" function itself, the default "write"
@@ -765,10 +685,12 @@ std::function<void(std::ostream*)> jsonify(const F& write, Prefer)
 // namespace as well, since `WriterProxy` is intentionally defined in the
 // `JSON` namespace.
 template <typename T>
-std::function<void(std::ostream*)> jsonify(const T& value, LessPrefer)
+std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> jsonify(
+    const T& value,
+    LessPrefer)
 {
-  return [&value](std::ostream* stream) {
-    json(WriterProxy(stream), value);
+  return [&value](rapidjson::Writer<rapidjson::StringBuffer>* writer) {
+    json(WriterProxy(writer), value);
   };
 }