You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/07/25 03:24:20 UTC
[04/10] mesos git commit: Improved performance of jsonify by
integrating with rapidjson.
Improved performance of jsonify by integrating with rapidjson.
This reduces the time needed for the client to finish receiving a
master's /state response by 50% in the `StateQuery` benchmark:
min q1 q3 max
baseline 6.52 6.76 7.33 8.26
rapidjson w/ SIMD 3.48 3.54 4.12 4.4
rapidjson 3.29 3.32 3.65 3.85
SIMD is left disabled for now since it showed slightly slower
results.
Review: https://reviews.apache.org/r/67988
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c23bb29a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c23bb29a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c23bb29a
Branch: refs/heads/master
Commit: c23bb29aa83daedb58288cb3276a565744a2b757
Parents: 135545f
Author: Benjamin Mahler <bm...@apache.org>
Authored: Thu Jul 19 14:35:01 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Jul 24 18:33:22 2018 -0700
----------------------------------------------------------------------
3rdparty/stout/include/stout/jsonify.hpp | 358 ++++++++++----------------
1 file changed, 140 insertions(+), 218 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/c23bb29a/3rdparty/stout/include/stout/jsonify.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/stout/include/stout/jsonify.hpp b/3rdparty/stout/include/stout/jsonify.hpp
index 2314980..e5402b7 100644
--- a/3rdparty/stout/include/stout/jsonify.hpp
+++ b/3rdparty/stout/include/stout/jsonify.hpp
@@ -13,15 +13,32 @@
#ifndef __STOUT_JSONIFY__
#define __STOUT_JSONIFY__
-#ifndef __WINDOWS__
-#include <locale.h>
-#endif // __WINDOWS__
+#define RAPIDJSON_HAS_STDSTRING 1
+
+// TODO(bmahler): Consider enabling UTF-8 validation when writing
+// json. Prior to the introduction of rapidjson, we performed no
+// validation, so we maintain this status quo for now.
+//
+// #define RAPIDJSON_WRITE_DEFAULT_FLAGS 1 // kWriteValidateEncodingFlag
+
+// TODO(bmahler): Consider enabling SIMD for rapidjson, unfortunately
+// it showed slightly slower results on the serialization path when
+// benchmarked so I've left it disabled.
+//
+// #if defined(__SSE4_2__)
+// # define RAPIDJSON_SSE42
+// #elif defined(__SSE2__)
+// # define RAPIDJSON_SSE2
+// #elif defined(_MSC_VER) // Turn on SSE4.2 for VC
+// # define RAPIDJSON_SSE42
+// #endif
+
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
-#include <clocale>
#include <cstddef>
#include <functional>
#include <ostream>
-#include <sstream>
#include <string>
#include <type_traits>
#include <utility>
@@ -57,67 +74,6 @@ JSON::Proxy jsonify(const T&);
namespace JSON {
-namespace internal {
-
-/**
- * This object changes the current thread's locale to the default "C"
- * locale for number printing purposes. This prevents, for example,
- * commas from appearing in printed numbers instead of decimal points.
- *
- * NOTE: This object should only be used to guard synchronous code.
- * If multiple blocks of code need to enforce the default locale,
- * each block should utilize this object.
- */
-// TODO(josephw): Consider pulling this helper into a separate header.
-class ClassicLocale
-{
-#ifdef __WINDOWS__
-public:
- ClassicLocale()
- {
- // We will only change the locale for this thread
- // and save the previous state of the thread's locale.
- original_per_thread_ = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
-
- // NOTE: We must make a copy of the return value as it points
- // to global or shared memory. Future calls to `setlocale` will
- // invalidate the memory location.
- original_locale_ = setlocale(LC_NUMERIC, "C");
- }
-
- ~ClassicLocale()
- {
- setlocale(LC_NUMERIC, original_locale_.c_str());
- _configthreadlocale(original_per_thread_);
- }
-
-private:
- int original_per_thread_;
- std::string original_locale_;
-#else
-public:
- ClassicLocale()
- {
- c_locale_ = newlocale(LC_NUMERIC_MASK, "C", nullptr);
- original_locale_ = uselocale(c_locale_);
- }
-
- ~ClassicLocale()
- {
- uselocale(original_locale_);
- CHECK(c_locale_ != 0);
- freelocale(c_locale_);
- }
-
-private:
- locale_t original_locale_;
- locale_t c_locale_;
-#endif // __WINDOWS__
-};
-
-} // namespace internal {
-
-
// The result of `jsonify`. This is a light-weight proxy object that can either
// be implicitly converted to a `std::string`, or directly inserted into an
// output stream.
@@ -131,16 +87,17 @@ class Proxy
public:
operator std::string() &&
{
- // Needed to set C locale and therefore creating proper JSON output.
- internal::ClassicLocale guard;
+ rapidjson::StringBuffer buffer;
+ rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+
+ write(&writer);
- std::ostringstream stream;
- stream << std::move(*this);
- return stream.str();
+ return {buffer.GetString(), buffer.GetSize()};
}
private:
- Proxy(std::function<void(std::ostream*)> write) : write_(std::move(write)) {}
+ Proxy(std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> write)
+ : write(std::move(write)) {}
// We declare copy/move constructors `private` to prevent statements that try
// to "save" an instance of `Proxy` such as:
@@ -155,36 +112,37 @@ private:
Proxy(const Proxy&) = default;
Proxy(Proxy&&) = default;
- std::function<void(std::ostream*)> write_;
-
template <typename T>
friend Proxy (::jsonify)(const T&);
friend std::ostream& operator<<(std::ostream& stream, Proxy&& that);
+
+public:
+ // This is public in order to enable the `ObjectWriter` and `ArrayWriter`
+ // to continue writing to the same writer.
+ std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> write;
};
inline std::ostream& operator<<(std::ostream& stream, Proxy&& that)
{
- // Needed to set C locale and therefore creating proper JSON output.
- internal::ClassicLocale guard;
-
- that.write_(&stream);
- return stream;
+ return stream << std::string(std::move(that));
}
-// The boolean writer. If `set` is not called at all, `false` is printed.
-// If `set` is called more than once, only the last value is printed out.
+// The boolean writer. If `set` is not called at all, a false value is
+// written. If `set` is called more than once, only the last value is
+// written.
class BooleanWriter
{
public:
- BooleanWriter(std::ostream* stream) : stream_(stream), value_(false) {}
+ BooleanWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer), value_(false) {}
BooleanWriter(const BooleanWriter&) = delete;
BooleanWriter(BooleanWriter&&) = delete;
- ~BooleanWriter() { *stream_ << (value_ ? "true" : "false"); }
+ ~BooleanWriter() { CHECK(writer_->Bool(value_)); }
BooleanWriter& operator=(const BooleanWriter&) = delete;
BooleanWriter& operator=(BooleanWriter&&) = delete;
@@ -192,18 +150,18 @@ public:
void set(bool value) { value_ = value; }
private:
- std::ostream* stream_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
bool value_;
};
-// The number writer. If `set` is not called at all, `0` is printed.
-// If `set` is called more than once, only the last value is printed.
+// The number writer. If `set` is not called at all, `0` is written.
+// If `set` is called more than once, only the last value is written.
class NumberWriter
{
public:
- NumberWriter(std::ostream* stream)
- : stream_(stream), type_(INT), int_(0) {}
+ NumberWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer), type_(INT), int_(0) {}
NumberWriter(const NumberWriter&) = delete;
NumberWriter(NumberWriter&&) = delete;
@@ -211,43 +169,9 @@ public:
~NumberWriter()
{
switch (type_) {
- case INT: {
- *stream_ << int_;
- break;
- }
- case UINT: {
- *stream_ << uint_;
- break;
- }
- case DOUBLE: {
- // Prints a floating point value, with the specified precision, see:
- // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2005.pdf
- // Additionally ensures that a decimal point is in the output.
- char buffer[50]; // More than enough for the specified precision.
- const int size = snprintf(
- buffer,
- sizeof(buffer),
- "%#.*g",
- std::numeric_limits<double>::digits10,
- double_);
-
- // Get rid of excess trailing zeroes before outputting.
- // Otherwise, printing 1.0 would result in "1.00000000000000".
- //
- // NOTE: We intentionally do not use `strings::trim` here in order to
- // avoid construction of temporary strings.
- int back = size - 1;
- for (; back > 0; --back) {
- if (buffer[back] != '0') {
- break;
- }
- buffer[back] = '\0';
- }
-
- // NOTE: valid JSON numbers cannot end with a '.'.
- *stream_ << buffer << (buffer[back] == '.' ? "0" : "");
- break;
- }
+ case INT: CHECK(writer_->Int64(int_)); break;
+ case UINT: CHECK(writer_->Uint64(uint_)); break;
+ case DOUBLE: CHECK(writer_->Double(double_)); break;
}
}
@@ -311,7 +235,7 @@ public:
}
private:
- std::ostream* stream_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
enum { INT, UINT, DOUBLE } type_;
@@ -324,109 +248,96 @@ private:
};
-// The string writer. `append` is used to append a character or a string.
-// If `append` is not called at all, `""` is printed.
+// The string writer. `set` is used to write a string and must only
+// be called once. If `set` is not called at all, an empty JSON
+// string is written.
class StringWriter
{
public:
- StringWriter(std::ostream* stream) : stream_(stream) { *stream_ << '"'; }
+ StringWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer), empty_(true) {}
StringWriter(const StringWriter&) = delete;
StringWriter(StringWriter&&) = delete;
- ~StringWriter() { *stream_ << '"'; }
+ ~StringWriter() { if (empty_) { CHECK(writer_->String("")); } }
StringWriter& operator=(const StringWriter&) = delete;
StringWriter& operator=(StringWriter&&) = delete;
- void append(char c)
+ template <std::size_t N>
+ void set(const char (&value)[N])
{
- switch (c) {
- case '"' : *stream_ << "\\\""; break;
- case '\\': *stream_ << "\\\\"; break;
- case '/' : *stream_ << "\\/"; break;
- case '\b': *stream_ << "\\b"; break;
- case '\f': *stream_ << "\\f"; break;
- case '\n': *stream_ << "\\n"; break;
- case '\r': *stream_ << "\\r"; break;
- case '\t': *stream_ << "\\t"; break;
- default: {
- if (static_cast<unsigned char>(c) < 0x20 || c == 0x7f) {
- char buffer[7];
- snprintf(buffer, sizeof(buffer), "\\u%04x", c & 0xff);
- stream_->write(buffer, sizeof(buffer) - 1);
- } else {
- *stream_ << c;
- }
- break;
- }
- }
- }
+ empty_ = false;
- template <std::size_t N>
- void append(const char (&value)[N]) { append(value, N - 1); }
- void append(const std::string& value) { append(value.data(), value.size()); }
+ // This check will fail if we enable write validation in rapidjson;
+ // we'll need to figure out a way to surface the error.
+ CHECK(writer_->String(value, N-1));
+ }
-private:
- void append(const char* value, std::size_t size)
+ void set(const std::string& value)
{
- for (std::size_t i = 0; i < size; ++i) {
- append(value[i]);
- }
+ empty_ = false;
+
+ // This check will fail if we enable write validation in rapidjson;
+ // we'll need to figure out a way to surface the error.
+ CHECK(writer_->String(value));
}
- std::ostream* stream_;
+private:
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
+ bool empty_;
};
// The array writer. `element(value)` is used to write a new element.
-// If `element` is not called at all, `[]` is printed.
+// If `element` is not called at all, an empty JSON array is written.
class ArrayWriter
{
public:
- ArrayWriter(std::ostream* stream) : stream_(stream), count_(0)
+ ArrayWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer)
{
- *stream_ << '[';
+ CHECK(writer_->StartArray());
}
ArrayWriter(const ArrayWriter&) = delete;
ArrayWriter(ArrayWriter&&) = delete;
- ~ArrayWriter() { *stream_ << ']'; }
+ ~ArrayWriter()
+ {
+ CHECK(writer_->EndArray());
+ }
ArrayWriter& operator=(const ArrayWriter&) = delete;
ArrayWriter& operator=(ArrayWriter&&) = delete;
template <typename T>
- void element(const T& value)
- {
- if (count_ > 0) {
- *stream_ << ',';
- }
- *stream_ << jsonify(value);
- ++count_;
- }
+ void element(const T& value) { jsonify(value).write(writer_); }
private:
- std::ostream* stream_;
- std::size_t count_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
};
// The object writer. `field(key, value)` is used to write a new field.
-// If `field` is not called at all, `{}` is printed.
+// If `field` is not called at all, an empty JSON object is written.
class ObjectWriter
{
public:
- ObjectWriter(std::ostream* stream) : stream_(stream), count_(0)
+ ObjectWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer)
{
- *stream_ << '{';
+ CHECK(writer_->StartObject());
}
ObjectWriter(const ObjectWriter&) = delete;
ObjectWriter(ObjectWriter&&) = delete;
- ~ObjectWriter() { *stream_ << '}'; }
+ ~ObjectWriter()
+ {
+ CHECK(writer_->EndObject());
+ }
ObjectWriter& operator=(const ObjectWriter&) = delete;
ObjectWriter& operator=(ObjectWriter&&) = delete;
@@ -434,34 +345,37 @@ public:
template <typename T>
void field(const std::string& key, const T& value)
{
- if (count_ > 0) {
- *stream_ << ',';
- }
- *stream_ << jsonify(key) << ':' << jsonify(value);
- ++count_;
+ // This check will fail we enable write validation in rapidjson;
+ // we'll need to figure out a way to surface the error.
+ //
+ // TODO(bmahler): The 1.1.0 release of rapidjson did not
+ // yet have the std::string overload for `Key`, avoid calling
+ // `c_str()` and `size()` when we upgrade beyond 1.1.0.
+ CHECK(writer_->Key(key.c_str(), key.size()));
+ jsonify(value).write(writer_);
}
private:
- std::ostream* stream_;
- std::size_t count_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
};
class NullWriter
{
public:
- NullWriter(std::ostream* stream) : stream_(stream) {}
+ NullWriter(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer) {}
NullWriter(const NullWriter&) = delete;
NullWriter(NullWriter&&) = delete;
- ~NullWriter() { *stream_ << "null"; }
+ ~NullWriter() { CHECK(writer_->Null()); }
NullWriter& operator=(const NullWriter&) = delete;
NullWriter& operator=(NullWriter&&) = delete;
private:
- std::ostream* stream_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
};
@@ -514,13 +428,13 @@ inline void json(NumberWriter* writer, double value) { writer->set(value); }
template <std::size_t N>
void json(StringWriter* writer, const char (&value)[N])
{
- writer->append(value);
+ writer->set(value);
}
inline void json(StringWriter* writer, const std::string& value)
{
- writer->append(value);
+ writer->set(value);
}
namespace internal {
@@ -637,38 +551,39 @@ void json(ObjectWriter* writer, const Dictionary& dictionary)
//
// The goal is to perform overload resolution based on the second parameter.
// Since `WriterProxy` is convertible to any of the writers equivalently, we
-// force overload resolution of `json(WriterProxy(stream), value)` to depend
+// force overload resolution of `json(WriterProxy(writer), value)` to depend
// only on the second parameter.
class WriterProxy
{
public:
- WriterProxy(std::ostream* stream) : stream_(stream) {}
+ WriterProxy(rapidjson::Writer<rapidjson::StringBuffer>* writer)
+ : writer_(writer) {}
~WriterProxy()
{
switch (type_) {
case BOOLEAN_WRITER: {
- writer_.boolean_writer.~BooleanWriter();
+ proxy_.boolean_writer.~BooleanWriter();
break;
}
case NUMBER_WRITER: {
- writer_.number_writer.~NumberWriter();
+ proxy_.number_writer.~NumberWriter();
break;
}
case STRING_WRITER: {
- writer_.string_writer.~StringWriter();
+ proxy_.string_writer.~StringWriter();
break;
}
case ARRAY_WRITER: {
- writer_.array_writer.~ArrayWriter();
+ proxy_.array_writer.~ArrayWriter();
break;
}
case OBJECT_WRITER: {
- writer_.object_writer.~ObjectWriter();
+ proxy_.object_writer.~ObjectWriter();
break;
}
case NULL_WRITER: {
- writer_.null_writer.~NullWriter();
+ proxy_.null_writer.~NullWriter();
break;
}
}
@@ -676,44 +591,44 @@ public:
operator BooleanWriter*() &&
{
- new (&writer_.boolean_writer) BooleanWriter(stream_);
+ new (&proxy_.boolean_writer) BooleanWriter(writer_);
type_ = BOOLEAN_WRITER;
- return &writer_.boolean_writer;
+ return &proxy_.boolean_writer;
}
operator NumberWriter*() &&
{
- new (&writer_.number_writer) NumberWriter(stream_);
+ new (&proxy_.number_writer) NumberWriter(writer_);
type_ = NUMBER_WRITER;
- return &writer_.number_writer;
+ return &proxy_.number_writer;
}
operator StringWriter*() &&
{
- new (&writer_.string_writer) StringWriter(stream_);
+ new (&proxy_.string_writer) StringWriter(writer_);
type_ = STRING_WRITER;
- return &writer_.string_writer;
+ return &proxy_.string_writer;
}
operator ArrayWriter*() &&
{
- new (&writer_.array_writer) ArrayWriter(stream_);
+ new (&proxy_.array_writer) ArrayWriter(writer_);
type_ = ARRAY_WRITER;
- return &writer_.array_writer;
+ return &proxy_.array_writer;
}
operator ObjectWriter*() &&
{
- new (&writer_.object_writer) ObjectWriter(stream_);
+ new (&proxy_.object_writer) ObjectWriter(writer_);
type_ = OBJECT_WRITER;
- return &writer_.object_writer;
+ return &proxy_.object_writer;
}
operator NullWriter*() &&
{
- new (&writer_.null_writer) NullWriter(stream_);
+ new (&proxy_.null_writer) NullWriter(writer_);
type_ = NULL_WRITER;
- return &writer_.null_writer;
+ return &proxy_.null_writer;
}
private:
@@ -739,11 +654,12 @@ private:
NullWriter null_writer;
};
- std::ostream* stream_;
+ rapidjson::Writer<rapidjson::StringBuffer>* writer_;
Type type_;
- Writer writer_;
+ Writer proxy_;
};
+
namespace internal {
// NOTE: The following overloads of `internal::jsonify` return a `std::function`
@@ -754,9 +670,13 @@ namespace internal {
// Given an `F` which is a "write" function, we simply use it directly.
template <typename F, typename = typename result_of<F(WriterProxy)>::type>
-std::function<void(std::ostream*)> jsonify(const F& write, Prefer)
+std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> jsonify(
+ const F& write,
+ Prefer)
{
- return [&write](std::ostream* stream) { write(WriterProxy(stream)); };
+ return [&write](rapidjson::Writer<rapidjson::StringBuffer>* writer) {
+ write(WriterProxy(writer));
+ };
}
// Given a `T` which is not a "write" function itself, the default "write"
@@ -765,10 +685,12 @@ std::function<void(std::ostream*)> jsonify(const F& write, Prefer)
// namespace as well, since `WriterProxy` is intentionally defined in the
// `JSON` namespace.
template <typename T>
-std::function<void(std::ostream*)> jsonify(const T& value, LessPrefer)
+std::function<void(rapidjson::Writer<rapidjson::StringBuffer>*)> jsonify(
+ const T& value,
+ LessPrefer)
{
- return [&value](std::ostream* stream) {
- json(WriterProxy(stream), value);
+ return [&value](rapidjson::Writer<rapidjson::StringBuffer>* writer) {
+ json(WriterProxy(writer), value);
};
}