You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/07/13 06:03:36 UTC

[24/51] [abbrv] impala git commit: IMPALA-7006: Add KRPC folders from kudu@334ecafd

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_event_impl.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_event_impl.h b/be/src/kudu/util/debug/trace_event_impl.h
new file mode 100644
index 0000000..2650e8b
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_event_impl.h
@@ -0,0 +1,726 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef KUDU_UTIL_DEBUG_TRACE_EVENT_IMPL_H_
+#define KUDU_UTIL_DEBUG_TRACE_EVENT_IMPL_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <iosfwd>
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest_prod.h>
+
+#include "kudu/gutil/atomicops.h"
+#include "kudu/gutil/bind_helpers.h"
+#include "kudu/gutil/callback.h"
+#include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/spinlock.h"
+#include "kudu/gutil/walltime.h"
+#include "kudu/util/mutex.h"
+
+// Older style trace macros with explicit id and extra data
+// Only these macros result in publishing data to ETW as currently implemented.
+#define TRACE_EVENT_BEGIN_ETW(name, id, extra) \
+    base::debug::TraceLog::AddTraceEventEtw( \
+        TRACE_EVENT_PHASE_BEGIN, \
+        name, reinterpret_cast<const void*>(id), extra)
+
+#define TRACE_EVENT_END_ETW(name, id, extra) \
+    base::debug::TraceLog::AddTraceEventEtw( \
+        TRACE_EVENT_PHASE_END, \
+        name, reinterpret_cast<const void*>(id), extra)
+
+#define TRACE_EVENT_INSTANT_ETW(name, id, extra) \
+    base::debug::TraceLog::AddTraceEventEtw( \
+        TRACE_EVENT_PHASE_INSTANT, \
+        name, reinterpret_cast<const void*>(id), extra)
+
+template <typename Type>
+class Singleton;
+
+#if defined(COMPILER_GCC)
+namespace BASE_HASH_NAMESPACE {
+template <>
+struct hash<kudu::Thread*> {
+  std::size_t operator()(kudu::Thread* value) const {
+    return reinterpret_cast<std::size_t>(value);
+  }
+};
+}  // BASE_HASH_NAMESPACE
+#endif
+
+namespace kudu {
+
+class RefCountedString;
+class Thread;
+
+namespace debug {
+
+// For any argument of type TRACE_VALUE_TYPE_CONVERTABLE the provided
+// class must implement this interface.
+class ConvertableToTraceFormat : public kudu::RefCountedThreadSafe<ConvertableToTraceFormat> {
+ public:
+  // Append the class info to the provided |out| string. The appended
+  // data must be a valid JSON object. Strings must be properly quoted, and
+  // escaped. There is no processing applied to the content after it is
+  // appended.
+  virtual void AppendAsTraceFormat(std::string* out) const = 0;
+
+ protected:
+  virtual ~ConvertableToTraceFormat() {}
+
+ private:
+  friend class kudu::RefCountedThreadSafe<ConvertableToTraceFormat>;
+};
+
+struct TraceEventHandle {
+  uint32_t chunk_seq;
+  uint16_t chunk_index;
+  uint16_t event_index;
+};
+
+const int kTraceMaxNumArgs = 2;
+
+class BASE_EXPORT TraceEvent {
+ public:
+  union TraceValue {
+    bool as_bool;
+    uint64_t as_uint;
+    long long as_int;
+    double as_double;
+    const void* as_pointer;
+    const char* as_string;
+  };
+
+  TraceEvent();
+  ~TraceEvent();
+
+  // We don't need to copy TraceEvent except when TraceEventBuffer is cloned.
+  // Use explicit copy method to avoid accidentally misuse of copy.
+  void CopyFrom(const TraceEvent& other);
+
+  void Initialize(
+      int thread_id,
+      MicrosecondsInt64 timestamp,
+      MicrosecondsInt64 thread_timestamp,
+      char phase,
+      const unsigned char* category_group_enabled,
+      const char* name,
+      uint64_t id,
+      int num_args,
+      const char** arg_names,
+      const unsigned char* arg_types,
+      const uint64_t* arg_values,
+      const scoped_refptr<ConvertableToTraceFormat>* convertable_values,
+      unsigned char flags);
+
+  void Reset();
+
+  void UpdateDuration(const MicrosecondsInt64& now, const MicrosecondsInt64& thread_now);
+
+  // Serialize event data to JSON
+  void AppendAsJSON(std::string* out) const;
+  void AppendPrettyPrinted(std::ostringstream* out) const;
+
+  static void AppendValueAsJSON(unsigned char type,
+                                TraceValue value,
+                                std::string* out);
+
+  MicrosecondsInt64 timestamp() const { return timestamp_; }
+  MicrosecondsInt64 thread_timestamp() const { return thread_timestamp_; }
+  char phase() const { return phase_; }
+  int thread_id() const { return thread_id_; }
+  MicrosecondsInt64 duration() const { return duration_; }
+  MicrosecondsInt64 thread_duration() const { return thread_duration_; }
+  uint64_t id() const { return id_; }
+  unsigned char flags() const { return flags_; }
+
+  // Exposed for unittesting:
+
+  const kudu::RefCountedString* parameter_copy_storage() const {
+    return parameter_copy_storage_.get();
+  }
+
+  const unsigned char* category_group_enabled() const {
+    return category_group_enabled_;
+  }
+
+  const char* name() const { return name_; }
+
+#if defined(OS_ANDROID)
+  void SendToATrace();
+#endif
+
+ private:
+  // Note: these are ordered by size (largest first) for optimal packing.
+  MicrosecondsInt64 timestamp_;
+  MicrosecondsInt64 thread_timestamp_;
+  MicrosecondsInt64 duration_;
+  MicrosecondsInt64 thread_duration_;
+  // id_ can be used to store phase-specific data.
+  uint64_t id_;
+  TraceValue arg_values_[kTraceMaxNumArgs];
+  const char* arg_names_[kTraceMaxNumArgs];
+  scoped_refptr<ConvertableToTraceFormat> convertable_values_[kTraceMaxNumArgs];
+  const unsigned char* category_group_enabled_;
+  const char* name_;
+  scoped_refptr<kudu::RefCountedString> parameter_copy_storage_;
+  int thread_id_;
+  char phase_;
+  unsigned char flags_;
+  unsigned char arg_types_[kTraceMaxNumArgs];
+
+  DISALLOW_COPY_AND_ASSIGN(TraceEvent);
+};
+
+// TraceBufferChunk is the basic unit of TraceBuffer.
+class BASE_EXPORT TraceBufferChunk {
+ public:
+  explicit TraceBufferChunk(uint32_t seq)
+      : next_free_(0),
+        seq_(seq) {
+  }
+
+  void Reset(uint32_t new_seq);
+  TraceEvent* AddTraceEvent(size_t* event_index);
+  bool IsFull() const { return next_free_ == kTraceBufferChunkSize; }
+
+  uint32_t seq() const { return seq_; }
+  size_t capacity() const { return kTraceBufferChunkSize; }
+  size_t size() const { return next_free_; }
+
+  TraceEvent* GetEventAt(size_t index) {
+    DCHECK(index < size());
+    return &chunk_[index];
+  }
+  const TraceEvent* GetEventAt(size_t index) const {
+    DCHECK(index < size());
+    return &chunk_[index];
+  }
+
+  gscoped_ptr<TraceBufferChunk> Clone() const;
+
+  static const size_t kTraceBufferChunkSize = 64;
+
+ private:
+  size_t next_free_;
+  TraceEvent chunk_[kTraceBufferChunkSize];
+  uint32_t seq_;
+};
+
+// TraceBuffer holds the events as they are collected.
+class BASE_EXPORT TraceBuffer {
+ public:
+  virtual ~TraceBuffer() {}
+
+  virtual gscoped_ptr<TraceBufferChunk> GetChunk(size_t *index) = 0;
+  virtual void ReturnChunk(size_t index,
+                           gscoped_ptr<TraceBufferChunk> chunk) = 0;
+
+  virtual bool IsFull() const = 0;
+  virtual size_t Size() const = 0;
+  virtual size_t Capacity() const = 0;
+  virtual TraceEvent* GetEventByHandle(TraceEventHandle handle) = 0;
+
+  // For iteration. Each TraceBuffer can only be iterated once.
+  virtual const TraceBufferChunk* NextChunk() = 0;
+
+  virtual gscoped_ptr<TraceBuffer> CloneForIteration() const = 0;
+};
+
+// TraceResultBuffer collects and converts trace fragments returned by TraceLog
+// to JSON output.
+class TraceResultBuffer {
+ public:
+  static std::string FlushTraceLogToString();
+  static std::string FlushTraceLogToStringButLeaveBufferIntact();
+
+ private:
+  TraceResultBuffer();
+  ~TraceResultBuffer();
+
+  static std::string DoFlush(bool leave_intact);
+
+  // Callback for TraceLog::Flush
+  void Collect(const scoped_refptr<RefCountedString>& s,
+               bool has_more_events);
+
+  bool first_;
+  std::string json_;
+};
+
+class BASE_EXPORT CategoryFilter {
+ public:
+  typedef std::vector<std::string> StringList;
+
+  // The default category filter, used when none is provided.
+  // Allows all categories through, except if they end in the suffix 'Debug' or
+  // 'Test'.
+  static const char* kDefaultCategoryFilterString;
+
+  // |filter_string| is a comma-delimited list of category wildcards.
+  // A category can have an optional '-' prefix to make it an excluded category.
+  // All the same rules apply above, so for example, having both included and
+  // excluded categories in the same list would not be supported.
+  //
+  // Example: CategoryFilter"test_MyTest*");
+  // Example: CategoryFilter("test_MyTest*,test_OtherStuff");
+  // Example: CategoryFilter("-excluded_category1,-excluded_category2");
+  // Example: CategoryFilter("-*,webkit"); would disable everything but webkit.
+  // Example: CategoryFilter("-webkit"); would enable everything but webkit.
+  //
+  // Category filters can also be used to configure synthetic delays.
+  //
+  // Example: CategoryFilter("DELAY(gpu.PresentingFrame;16)"); would make swap
+  //          buffers always take at least 16 ms.
+  // Example: CategoryFilter("DELAY(gpu.PresentingFrame;16;oneshot)"); would
+  //          make swap buffers take at least 16 ms the first time it is
+  //          called.
+  // Example: CategoryFilter("DELAY(gpu.PresentingFrame;16;alternating)");
+  //          would make swap buffers take at least 16 ms every other time it
+  //          is called.
+  explicit CategoryFilter(const std::string& filter_string);
+
+  CategoryFilter(const CategoryFilter& cf);
+
+  ~CategoryFilter();
+
+  CategoryFilter& operator=(const CategoryFilter& rhs);
+
+  // Writes the string representation of the CategoryFilter. This is a comma
+  // separated string, similar in nature to the one used to determine
+  // enabled/disabled category patterns, except here there is an arbitrary
+  // order, included categories go first, then excluded categories. Excluded
+  // categories are distinguished from included categories by the prefix '-'.
+  std::string ToString() const;
+
+  // Determines whether category group would be enabled or
+  // disabled by this category filter.
+  bool IsCategoryGroupEnabled(const char* category_group) const;
+
+  // Return a list of the synthetic delays specified in this category filter.
+  const StringList& GetSyntheticDelayValues() const;
+
+  // Merges nested_filter with the current CategoryFilter
+  void Merge(const CategoryFilter& nested_filter);
+
+  // Clears both included/excluded pattern lists. This would be equivalent to
+  // creating a CategoryFilter with an empty string, through the constructor.
+  // i.e: CategoryFilter("").
+  //
+  // When using an empty filter, all categories are considered included as we
+  // are not excluding anything.
+  void Clear();
+
+ private:
+  FRIEND_TEST(TraceEventTestFixture, CategoryFilter);
+
+  static bool IsEmptyOrContainsLeadingOrTrailingWhitespace(
+      const std::string& str);
+
+  void Initialize(const std::string& filter_string);
+  void WriteString(const StringList& values,
+                   std::string* out,
+                   bool included) const;
+  void WriteString(const StringList& delays, std::string* out) const;
+  bool HasIncludedPatterns() const;
+
+  bool DoesCategoryGroupContainCategory(const char* category_group,
+                                        const char* category) const;
+
+  StringList included_;
+  StringList disabled_;
+  StringList excluded_;
+  StringList delays_;
+};
+
+class TraceSamplingThread;
+
+class BASE_EXPORT TraceLog {
+ public:
+  enum Mode {
+    DISABLED = 0,
+    RECORDING_MODE,
+    MONITORING_MODE,
+  };
+
+  // Options determines how the trace buffer stores data.
+  enum Options {
+    // Record until the trace buffer is full.
+    RECORD_UNTIL_FULL = 1 << 0,
+
+    // Record until the user ends the trace. The trace buffer is a fixed size
+    // and we use it as a ring buffer during recording.
+    RECORD_CONTINUOUSLY = 1 << 1,
+
+    // Enable the sampling profiler in the recording mode.
+    ENABLE_SAMPLING = 1 << 2,
+
+    // Echo to console. Events are discarded.
+    ECHO_TO_CONSOLE = 1 << 3,
+  };
+
+  // The pointer returned from GetCategoryGroupEnabledInternal() points to a
+  // value with zero or more of the following bits. Used in this class only.
+  // The TRACE_EVENT macros should only use the value as a bool.
+  // These values must be in sync with macro values in TraceEvent.h in Blink.
+  enum CategoryGroupEnabledFlags {
+    // Category group enabled for the recording mode.
+    ENABLED_FOR_RECORDING = 1 << 0,
+    // Category group enabled for the monitoring mode.
+    ENABLED_FOR_MONITORING = 1 << 1,
+    // Category group enabled by SetEventCallbackEnabled().
+    ENABLED_FOR_EVENT_CALLBACK = 1 << 2,
+  };
+
+  static TraceLog* GetInstance();
+
+  // Get set of known category groups. This can change as new code paths are
+  // reached. The known category groups are inserted into |category_groups|.
+  void GetKnownCategoryGroups(std::vector<std::string>* category_groups);
+
+  // Retrieves a copy (for thread-safety) of the current CategoryFilter.
+  CategoryFilter GetCurrentCategoryFilter();
+
+  Options trace_options() const {
+    return static_cast<Options>(base::subtle::NoBarrier_Load(&trace_options_));
+  }
+
+  // Enables normal tracing (recording trace events in the trace buffer).
+  // See CategoryFilter comments for details on how to control what categories
+  // will be traced. If tracing has already been enabled, |category_filter| will
+  // be merged into the current category filter.
+  void SetEnabled(const CategoryFilter& category_filter,
+                  Mode mode, Options options);
+
+  // Disables normal tracing for all categories.
+  void SetDisabled();
+
+  bool IsEnabled() { return mode_ != DISABLED; }
+
+  // The number of times we have begun recording traces. If tracing is off,
+  // returns -1. If tracing is on, then it returns the number of times we have
+  // recorded a trace. By watching for this number to increment, you can
+  // passively discover when a new trace has begun. This is then used to
+  // implement the TRACE_EVENT_IS_NEW_TRACE() primitive.
+  int GetNumTracesRecorded();
+
+#if defined(OS_ANDROID)
+  void StartATrace();
+  void StopATrace();
+  void AddClockSyncMetadataEvent();
+#endif
+
+  // Enabled state listeners give a callback when tracing is enabled or
+  // disabled. This can be used to tie into other library's tracing systems
+  // on-demand.
+  class EnabledStateObserver {
+   public:
+    // Called just after the tracing system becomes enabled, outside of the
+    // |lock_|. TraceLog::IsEnabled() is true at this point.
+    virtual void OnTraceLogEnabled() = 0;
+
+    // Called just after the tracing system disables, outside of the |lock_|.
+    // TraceLog::IsEnabled() is false at this point.
+    virtual void OnTraceLogDisabled() = 0;
+  };
+  void AddEnabledStateObserver(EnabledStateObserver* listener);
+  void RemoveEnabledStateObserver(EnabledStateObserver* listener);
+  bool HasEnabledStateObserver(EnabledStateObserver* listener) const;
+
+  float GetBufferPercentFull() const;
+  bool BufferIsFull() const;
+
+  // Not using kudu::Callback because of its limited by 7 parameters.
+  // Also, using primitive type allows directly passing callback from WebCore.
+  // WARNING: It is possible for the previously set callback to be called
+  // after a call to SetEventCallbackEnabled() that replaces or a call to
+  // SetEventCallbackDisabled() that disables the callback.
+  // This callback may be invoked on any thread.
+  // For TRACE_EVENT_PHASE_COMPLETE events, the client will still receive pairs
+  // of TRACE_EVENT_PHASE_BEGIN and TRACE_EVENT_PHASE_END events to keep the
+  // interface simple.
+  typedef void (*EventCallback)(MicrosecondsInt64 timestamp,
+                                char phase,
+                                const unsigned char* category_group_enabled,
+                                const char* name,
+                                uint64_t id,
+                                int num_args,
+                                const char* const arg_names[],
+                                const unsigned char arg_types[],
+                                const uint64_t arg_values[],
+                                unsigned char flags);
+
+  // Enable tracing for EventCallback.
+  void SetEventCallbackEnabled(const CategoryFilter& category_filter,
+                               EventCallback cb);
+  void SetEventCallbackDisabled();
+
+  // Flush all collected events to the given output callback. The callback will
+  // be called one or more times synchronously from
+  // the current thread with IPC-bite-size chunks. The string format is
+  // undefined. Use TraceResultBuffer to convert one or more trace strings to
+  // JSON. The callback can be null if the caller doesn't want any data.
+  // Due to the implementation of thread-local buffers, flush can't be
+  // done when tracing is enabled. If called when tracing is enabled, the
+  // callback will be called directly with (empty_string, false) to indicate
+  // the end of this unsuccessful flush.
+  typedef kudu::Callback<void(const scoped_refptr<kudu::RefCountedString>&,
+                              bool has_more_events)> OutputCallback;
+  void Flush(const OutputCallback& cb);
+  void FlushButLeaveBufferIntact(const OutputCallback& flush_output_callback);
+
+  // Called by TRACE_EVENT* macros, don't call this directly.
+  // The name parameter is a category group for example:
+  // TRACE_EVENT0("renderer,webkit", "WebViewImpl::HandleInputEvent")
+  static const unsigned char* GetCategoryGroupEnabled(const char* name);
+  static const char* GetCategoryGroupName(
+      const unsigned char* category_group_enabled);
+
+  // Called by TRACE_EVENT* macros, don't call this directly.
+  // If |copy| is set, |name|, |arg_name1| and |arg_name2| will be deep copied
+  // into the event; see "Memory scoping note" and TRACE_EVENT_COPY_XXX above.
+  TraceEventHandle AddTraceEvent(
+      char phase,
+      const unsigned char* category_group_enabled,
+      const char* name,
+      uint64_t id,
+      int num_args,
+      const char** arg_names,
+      const unsigned char* arg_types,
+      const uint64_t* arg_values,
+      const scoped_refptr<ConvertableToTraceFormat>* convertable_values,
+      unsigned char flags);
+  TraceEventHandle AddTraceEventWithThreadIdAndTimestamp(
+      char phase,
+      const unsigned char* category_group_enabled,
+      const char* name,
+      uint64_t id,
+      int thread_id,
+      const MicrosecondsInt64& timestamp,
+      int num_args,
+      const char** arg_names,
+      const unsigned char* arg_types,
+      const uint64_t* arg_values,
+      const scoped_refptr<ConvertableToTraceFormat>* convertable_values,
+      unsigned char flags);
+  static void AddTraceEventEtw(char phase,
+                               const char* category_group,
+                               const void* id,
+                               const char* extra);
+  static void AddTraceEventEtw(char phase,
+                               const char* category_group,
+                               const void* id,
+                               const std::string& extra);
+
+  void UpdateTraceEventDuration(const unsigned char* category_group_enabled,
+                                const char* name,
+                                TraceEventHandle handle);
+
+  // For every matching event, the callback will be called.
+  typedef kudu::Callback<void()> WatchEventCallback;
+  void SetWatchEvent(const std::string& category_name,
+                     const std::string& event_name,
+                     const WatchEventCallback& callback);
+  // Cancel the watch event. If tracing is enabled, this may race with the
+  // watch event notification firing.
+  void CancelWatchEvent();
+
+  int process_id() const { return process_id_; }
+
+  // Allow tests to inspect TraceEvents.
+  size_t GetEventsSize() const { return logged_events_->Size(); }
+  TraceEvent* GetEventByHandle(TraceEventHandle handle);
+
+  void SetProcessID(int process_id);
+
+  // Process sort indices, if set, override the order of a process will appear
+  // relative to other processes in the trace viewer. Processes are sorted first
+  // on their sort index, ascending, then by their name, and then tid.
+  void SetProcessSortIndex(int sort_index);
+
+  // Sets the name of the process.
+  void SetProcessName(const std::string& process_name);
+
+  // Processes can have labels in addition to their names. Use labels, for
+  // instance, to list out the web page titles that a process is handling.
+  void UpdateProcessLabel(int label_id, const std::string& current_label);
+  void RemoveProcessLabel(int label_id);
+
+  // Thread sort indices, if set, override the order of a thread will appear
+  // within its process in the trace viewer. Threads are sorted first on their
+  // sort index, ascending, then by their name, and then tid.
+  void SetThreadSortIndex(int64_t tid , int sort_index);
+
+  // Allow setting an offset between the current MicrosecondsInt64 time and the time
+  // that should be reported.
+  void SetTimeOffset(MicrosecondsInt64 offset);
+
+  size_t GetObserverCountForTest() const;
+
+
+ private:
+  FRIEND_TEST(TraceEventTestFixture,
+                           TraceBufferRingBufferGetReturnChunk);
+  FRIEND_TEST(TraceEventTestFixture,
+                           TraceBufferRingBufferHalfIteration);
+  FRIEND_TEST(TraceEventTestFixture,
+                           TraceBufferRingBufferFullIteration);
+
+  // This allows constructor and destructor to be private and usable only
+  // by the Singleton class.
+  friend class Singleton<TraceLog>;
+
+  // Enable/disable each category group based on the current mode_,
+  // category_filter_, event_callback_ and event_callback_category_filter_.
+  // Enable the category group in the enabled mode if category_filter_ matches
+  // the category group, or event_callback_ is not null and
+  // event_callback_category_filter_ matches the category group.
+  void UpdateCategoryGroupEnabledFlags();
+  void UpdateCategoryGroupEnabledFlag(int category_index);
+
+  // Configure synthetic delays based on the values set in the current
+  // category filter.
+  void UpdateSyntheticDelaysFromCategoryFilter();
+
+  struct PerThreadInfo;
+  class OptionalAutoLock;
+  class ThreadLocalEventBuffer;
+
+  TraceLog();
+  ~TraceLog();
+  const unsigned char* GetCategoryGroupEnabledInternal(const char* name);
+  void AddMetadataEventsWhileLocked();
+
+  TraceBuffer* trace_buffer() const { return logged_events_.get(); }
+  TraceBuffer* CreateTraceBuffer();
+
+  std::string EventToConsoleMessage(unsigned char phase,
+                                    const MicrosecondsInt64& timestamp,
+                                    TraceEvent* trace_event);
+
+  TraceEvent* AddEventToThreadSharedChunkWhileLocked(TraceEventHandle* handle,
+                                                     bool check_buffer_is_full);
+  void CheckIfBufferIsFullWhileLocked();
+  void SetDisabledWhileLocked();
+
+  TraceEvent* GetEventByHandleInternal(TraceEventHandle handle,
+                                       OptionalAutoLock* lock);
+
+  void ConvertTraceEventsToTraceFormat(gscoped_ptr<TraceBuffer> logged_events,
+                                       const OutputCallback& flush_output_callback);
+  void FinishFlush(int generation,
+                   const OutputCallback& flush_output_callback);
+
+  // Called when a thread which has registered trace events is about to exit.
+  void ThreadExiting();
+
+  // The static callback registered as a thread destructor.
+  static void ThreadExitingCB(void* arg);
+
+  int generation() const {
+    return static_cast<int>(base::subtle::NoBarrier_Load(&generation_));
+  }
+  bool CheckGeneration(int generation) const {
+    return generation == this->generation();
+  }
+  void UseNextTraceBuffer();
+
+  MicrosecondsInt64 OffsetNow() const {
+    return OffsetTimestamp(GetMonoTimeMicros());
+  }
+  MicrosecondsInt64 OffsetTimestamp(const MicrosecondsInt64& timestamp) const {
+    return timestamp - time_offset_;
+  }
+
+  // Create a new PerThreadInfo object for the current thread,
+  // and register it in the active_threads_ list.
+  PerThreadInfo* SetupThreadLocalBuffer();
+
+  // This lock protects TraceLog member accesses (except for members protected
+  // by thread_info_lock_) from arbitrary threads.
+  mutable base::SpinLock lock_;
+  // This lock protects accesses to thread_names_, thread_event_start_times_
+  // and thread_colors_.
+  base::SpinLock thread_info_lock_;
+  int locked_line_;
+  Mode mode_;
+  int num_traces_recorded_;
+  gscoped_ptr<TraceBuffer> logged_events_;
+  AtomicWord /* EventCallback */ event_callback_;
+  bool dispatching_to_observer_list_;
+  std::vector<EnabledStateObserver*> enabled_state_observer_list_;
+
+  std::string process_name_;
+  std::unordered_map<int, std::string> process_labels_;
+  int process_sort_index_;
+  std::unordered_map<int, int> thread_sort_indices_;
+  std::unordered_map<int, std::string> thread_names_;
+
+  // The following two maps are used only when ECHO_TO_CONSOLE.
+  std::unordered_map<int, std::stack<MicrosecondsInt64> > thread_event_start_times_;
+  std::unordered_map<std::string, int> thread_colors_;
+
+  // XORed with TraceID to make it unlikely to collide with other processes.
+  uint64_t process_id_hash_;
+
+  int process_id_;
+
+  MicrosecondsInt64 time_offset_;
+
+  // Allow tests to wake up when certain events occur.
+  WatchEventCallback watch_event_callback_;
+  AtomicWord /* const unsigned char* */ watch_category_;
+  std::string watch_event_name_;
+
+  AtomicWord /* Options */ trace_options_;
+
+  // Sampling thread handles.
+  gscoped_ptr<TraceSamplingThread> sampling_thread_;
+  scoped_refptr<kudu::Thread> sampling_thread_handle_;
+
+  CategoryFilter category_filter_;
+  CategoryFilter event_callback_category_filter_;
+
+  struct PerThreadInfo {
+    ThreadLocalEventBuffer* event_buffer_;
+    base::subtle::Atomic32 is_in_trace_event_;
+
+    // Atomically take the event_buffer_ member, setting it to NULL.
+    // Returns the old value of the member.
+    ThreadLocalEventBuffer* AtomicTakeBuffer();
+  };
+  static __thread PerThreadInfo* thread_local_info_;
+
+  Mutex active_threads_lock_;
+  // Map of PID -> PerThreadInfo
+  // Protected by active_threads_lock_.
+  typedef std::unordered_map<int64_t, PerThreadInfo*> ActiveThreadMap;
+  ActiveThreadMap active_threads_;
+
+  // For events which can't be added into the thread local buffer, e.g. events
+  // from threads without a message loop.
+  gscoped_ptr<TraceBufferChunk> thread_shared_chunk_;
+  size_t thread_shared_chunk_index_;
+
+  // The generation is incremented whenever tracing is enabled, and incremented
+  // again when the buffers are flushed. This ensures that trace events logged
+  // for a previous tracing session do not get accidentally flushed in the
+  // next tracing session.
+  AtomicWord generation_;
+
+  DISALLOW_COPY_AND_ASSIGN(TraceLog);
+};
+
+}  // namespace debug
+}  // namespace kudu
+
+#endif  // KUDU_UTIL_DEBUG_TRACE_EVENT_IMPL_H_

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_event_impl_constants.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_event_impl_constants.cc b/be/src/kudu/util/debug/trace_event_impl_constants.cc
new file mode 100644
index 0000000..bf45ed7
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_event_impl_constants.cc
@@ -0,0 +1,14 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "kudu/util/debug/trace_event_impl.h"
+
+namespace kudu {
+namespace debug {
+
+// Enable everything but debug and test categories by default.
+const char* CategoryFilter::kDefaultCategoryFilterString = "-*Debug,-*Test";
+
+}  // namespace debug
+}  // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_event_memory.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_event_memory.h b/be/src/kudu/util/debug/trace_event_memory.h
new file mode 100644
index 0000000..6d9cf8d
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_event_memory.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_DEBUG_TRACE_EVENT_MEMORY_H
+#define KUDU_DEBUG_TRACE_EVENT_MEMORY_H
+
+// Stub for this part of chromium tracing we haven't yet
+// imported.
+// The Chromium code relies on a locally patch tcmalloc.
+// See 5bc71bae28ea03689dbf50fe6baa15b574319091 in the Chromium
+// repository.
+
+#define INTERNAL_TRACE_MEMORY(category_group, name)
+
+#endif /* KUDU_DEBUG_TRACE_EVENT_MEMORY_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_event_synthetic_delay.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_event_synthetic_delay.cc b/be/src/kudu/util/debug/trace_event_synthetic_delay.cc
new file mode 100644
index 0000000..947ab88
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_event_synthetic_delay.cc
@@ -0,0 +1,238 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "kudu/util/debug/trace_event_synthetic_delay.h"
+
+#include <cstring>
+#include <ostream>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/dynamic_annotations.h"
+#include "kudu/gutil/port.h"
+#include "kudu/gutil/singleton.h"
+
+namespace {
+const int kMaxSyntheticDelays = 32;
+}  // namespace
+
+namespace kudu {
+namespace debug {
+
+TraceEventSyntheticDelayClock::TraceEventSyntheticDelayClock() {}
+TraceEventSyntheticDelayClock::~TraceEventSyntheticDelayClock() {}
+
+class TraceEventSyntheticDelayRegistry : public TraceEventSyntheticDelayClock {
+ public:
+  static TraceEventSyntheticDelayRegistry* GetInstance();
+
+  TraceEventSyntheticDelay* GetOrCreateDelay(const char* name);
+  void ResetAllDelays();
+
+  // TraceEventSyntheticDelayClock implementation.
+  virtual MonoTime Now() OVERRIDE;
+
+ private:
+  TraceEventSyntheticDelayRegistry();
+
+  friend class Singleton<TraceEventSyntheticDelayRegistry>;
+
+  Mutex lock_;
+  TraceEventSyntheticDelay delays_[kMaxSyntheticDelays];
+  TraceEventSyntheticDelay dummy_delay_;
+  base::subtle::Atomic32 delay_count_;
+
+  DISALLOW_COPY_AND_ASSIGN(TraceEventSyntheticDelayRegistry);
+};
+
+TraceEventSyntheticDelay::TraceEventSyntheticDelay()
+    : mode_(STATIC), begin_count_(0), trigger_count_(0), clock_(nullptr) {}
+
+TraceEventSyntheticDelay::~TraceEventSyntheticDelay() {}
+
+TraceEventSyntheticDelay* TraceEventSyntheticDelay::Lookup(
+    const std::string& name) {
+  return TraceEventSyntheticDelayRegistry::GetInstance()->GetOrCreateDelay(
+      name.c_str());
+}
+
+void TraceEventSyntheticDelay::Initialize(
+    const std::string& name,
+    TraceEventSyntheticDelayClock* clock) {
+  name_ = name;
+  clock_ = clock;
+}
+
+void TraceEventSyntheticDelay::SetTargetDuration(const MonoDelta& target_duration) {
+  MutexLock lock(lock_);
+  target_duration_ = target_duration;
+  trigger_count_ = 0;
+  begin_count_ = 0;
+}
+
+void TraceEventSyntheticDelay::SetMode(Mode mode) {
+  MutexLock lock(lock_);
+  mode_ = mode;
+}
+
+void TraceEventSyntheticDelay::SetClock(TraceEventSyntheticDelayClock* clock) {
+  MutexLock lock(lock_);
+  clock_ = clock;
+}
+
+void TraceEventSyntheticDelay::Begin() {
+  // Note that we check for a non-zero target duration without locking to keep
+  // things quick for the common case when delays are disabled. Since the delay
+  // calculation is done with a lock held, it will always be correct. The only
+  // downside of this is that we may fail to apply some delays when the target
+  // duration changes.
+  ANNOTATE_BENIGN_RACE(&target_duration_, "Synthetic delay duration");
+  if (!target_duration_.Initialized())
+    return;
+
+  MonoTime start_time = clock_->Now();
+  {
+    MutexLock lock(lock_);
+    if (++begin_count_ != 1)
+      return;
+    end_time_ = CalculateEndTimeLocked(start_time);
+  }
+}
+
+void TraceEventSyntheticDelay::BeginParallel(MonoTime* out_end_time) {
+  // See note in Begin().
+  ANNOTATE_BENIGN_RACE(&target_duration_, "Synthetic delay duration");
+  if (!target_duration_.Initialized()) {
+    *out_end_time = MonoTime();
+    return;
+  }
+
+  MonoTime start_time = clock_->Now();
+  {
+    MutexLock lock(lock_);
+    *out_end_time = CalculateEndTimeLocked(start_time);
+  }
+}
+
+void TraceEventSyntheticDelay::End() {
+  // See note in Begin().
+  ANNOTATE_BENIGN_RACE(&target_duration_, "Synthetic delay duration");
+  if (!target_duration_.Initialized())
+    return;
+
+  MonoTime end_time;
+  {
+    MutexLock lock(lock_);
+    if (!begin_count_ || --begin_count_ != 0)
+      return;
+    end_time = end_time_;
+  }
+  if (end_time.Initialized())
+    ApplyDelay(end_time);
+}
+
+void TraceEventSyntheticDelay::EndParallel(const MonoTime& end_time) {
+  if (end_time.Initialized())
+    ApplyDelay(end_time);
+}
+
+MonoTime TraceEventSyntheticDelay::CalculateEndTimeLocked(
+    const MonoTime& start_time) {
+  if (mode_ == ONE_SHOT && trigger_count_++)
+    return MonoTime();
+  else if (mode_ == ALTERNATING && trigger_count_++ % 2)
+    return MonoTime();
+  return start_time + target_duration_;
+}
+
+void TraceEventSyntheticDelay::ApplyDelay(const MonoTime& end_time) {
+  TRACE_EVENT0("synthetic_delay", name_.c_str());
+  while (clock_->Now() < end_time) {
+    // Busy loop.
+  }
+}
+
+TraceEventSyntheticDelayRegistry*
+TraceEventSyntheticDelayRegistry::GetInstance() {
+  return Singleton<TraceEventSyntheticDelayRegistry>::get();
+}
+
+TraceEventSyntheticDelayRegistry::TraceEventSyntheticDelayRegistry()
+    : delay_count_(0) {}
+
+TraceEventSyntheticDelay* TraceEventSyntheticDelayRegistry::GetOrCreateDelay(
+    const char* name) {
+  // Try to find an existing delay first without locking to make the common case
+  // fast.
+  int delay_count = base::subtle::Acquire_Load(&delay_count_);
+  for (int i = 0; i < delay_count; ++i) {
+    if (!strcmp(name, delays_[i].name_.c_str()))
+      return &delays_[i];
+  }
+
+  MutexLock lock(lock_);
+  delay_count = base::subtle::Acquire_Load(&delay_count_);
+  for (int i = 0; i < delay_count; ++i) {
+    if (!strcmp(name, delays_[i].name_.c_str()))
+      return &delays_[i];
+  }
+
+  DCHECK(delay_count < kMaxSyntheticDelays)
+      << "must increase kMaxSyntheticDelays";
+  if (delay_count >= kMaxSyntheticDelays)
+    return &dummy_delay_;
+
+  delays_[delay_count].Initialize(std::string(name), this);
+  base::subtle::Release_Store(&delay_count_, delay_count + 1);
+  return &delays_[delay_count];
+}
+
+MonoTime TraceEventSyntheticDelayRegistry::Now() {
+  return MonoTime::Now();
+}
+
+void TraceEventSyntheticDelayRegistry::ResetAllDelays() {
+  MutexLock lock(lock_);
+  int delay_count = base::subtle::Acquire_Load(&delay_count_);
+  for (int i = 0; i < delay_count; ++i) {
+    delays_[i].SetTargetDuration(MonoDelta());
+    delays_[i].SetClock(this);
+  }
+}
+
+void ResetTraceEventSyntheticDelays() {
+  TraceEventSyntheticDelayRegistry::GetInstance()->ResetAllDelays();
+}
+
+}  // namespace debug
+}  // namespace kudu
+
+namespace trace_event_internal {
+
+ScopedSyntheticDelay::ScopedSyntheticDelay(const char* name,
+                                           AtomicWord* impl_ptr)
+    : delay_impl_(GetOrCreateDelay(name, impl_ptr)) {
+  delay_impl_->BeginParallel(&end_time_);
+}
+
+ScopedSyntheticDelay::~ScopedSyntheticDelay() {
+  delay_impl_->EndParallel(end_time_);
+}
+
+kudu::debug::TraceEventSyntheticDelay* GetOrCreateDelay(
+    const char* name,
+    AtomicWord* impl_ptr) {
+  kudu::debug::TraceEventSyntheticDelay* delay_impl =
+      reinterpret_cast<kudu::debug::TraceEventSyntheticDelay*>(
+          base::subtle::Acquire_Load(impl_ptr));
+  if (!delay_impl) {
+    delay_impl = kudu::debug::TraceEventSyntheticDelayRegistry::GetInstance()
+                     ->GetOrCreateDelay(name);
+    base::subtle::Release_Store(
+        impl_ptr, reinterpret_cast<AtomicWord>(delay_impl));
+  }
+  return delay_impl;
+}
+
+}  // namespace trace_event_internal

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_event_synthetic_delay.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_event_synthetic_delay.h b/be/src/kudu/util/debug/trace_event_synthetic_delay.h
new file mode 100644
index 0000000..5866814
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_event_synthetic_delay.h
@@ -0,0 +1,166 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The synthetic delay framework makes it possible to dynamically inject
+// arbitrary delays into into different parts of the codebase. This can be used,
+// for instance, for testing various task scheduling algorithms.
+//
+// The delays are specified in terms of a target duration for a given block of
+// code. If the code executes faster than the duration, the thread is made to
+// sleep until the deadline is met.
+//
+// Code can be instrumented for delays with two sets of macros. First, for
+// delays that should apply within a scope, use the following macro:
+//
+//   TRACE_EVENT_SYNTHETIC_DELAY("cc.LayerTreeHost.DrawAndSwap");
+//
+// For delaying operations that span multiple scopes, use:
+//
+//   TRACE_EVENT_SYNTHETIC_DELAY_BEGIN("cc.Scheduler.BeginMainFrame");
+//   ...
+//   TRACE_EVENT_SYNTHETIC_DELAY_END("cc.Scheduler.BeginMainFrame");
+//
+// Here BEGIN establishes the start time for the delay and END executes the
+// delay based on the remaining time. If BEGIN is called multiple times in a
+// row, END should be called a corresponding number of times. Only the last
+// call to END will have an effect.
+//
+// Note that a single delay may begin on one thread and end on another. This
+// implies that a single delay cannot not be applied in several threads at once.
+
+#ifndef KUDU_UTIL_DEBUG_TRACE_EVENT_SYNTHETIC_DELAY_H_
+#define KUDU_UTIL_DEBUG_TRACE_EVENT_SYNTHETIC_DELAY_H_
+
+#include <string>
+
+#include "kudu/gutil/atomicops.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/util/debug/trace_event.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/mutex.h"
+
+// Apply a named delay in the current scope.
+#define TRACE_EVENT_SYNTHETIC_DELAY(name)                                     \
+  static AtomicWord INTERNAL_TRACE_EVENT_UID(impl_ptr) = 0;                   \
+  trace_event_internal::ScopedSyntheticDelay INTERNAL_TRACE_EVENT_UID(delay)( \
+      name, &INTERNAL_TRACE_EVENT_UID(impl_ptr));
+
+// Begin a named delay, establishing its timing start point. May be called
+// multiple times as long as the calls to TRACE_EVENT_SYNTHETIC_DELAY_END are
+// balanced. Only the first call records the timing start point.
+#define TRACE_EVENT_SYNTHETIC_DELAY_BEGIN(name)                          \
+  do {                                                                   \
+    static AtomicWord impl_ptr = 0;                                      \
+    trace_event_internal::GetOrCreateDelay(name, &impl_ptr)->Begin();    \
+  } while (false)
+
+// End a named delay. The delay is applied only if this call matches the
+// first corresponding call to TRACE_EVENT_SYNTHETIC_DELAY_BEGIN with the
+// same delay.
+#define TRACE_EVENT_SYNTHETIC_DELAY_END(name)                         \
+  do {                                                                \
+    static AtomicWord impl_ptr = 0;                                   \
+    trace_event_internal::GetOrCreateDelay(name, &impl_ptr)->End();   \
+  } while (false)
+
+namespace kudu {
+namespace debug {
+
+// Time source for computing delay durations. Used for testing.
+class TRACE_EVENT_API_CLASS_EXPORT TraceEventSyntheticDelayClock {
+ public:
+  TraceEventSyntheticDelayClock();
+  virtual ~TraceEventSyntheticDelayClock();
+  virtual MonoTime Now() = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TraceEventSyntheticDelayClock);
+};
+
+// Single delay point instance.
+class TRACE_EVENT_API_CLASS_EXPORT TraceEventSyntheticDelay {
+ public:
+  enum Mode {
+    STATIC,      // Apply the configured delay every time.
+    ONE_SHOT,    // Apply the configured delay just once.
+    ALTERNATING  // Apply the configured delay every other time.
+  };
+
+  // Returns an existing named delay instance or creates a new one with |name|.
+  static TraceEventSyntheticDelay* Lookup(const std::string& name);
+
+  void SetTargetDuration(const MonoDelta& target_duration);
+  void SetMode(Mode mode);
+  void SetClock(TraceEventSyntheticDelayClock* clock);
+
+  // Begin the delay, establishing its timing start point. May be called
+  // multiple times as long as the calls to End() are balanced. Only the first
+  // call records the timing start point.
+  void Begin();
+
+  // End the delay. The delay is applied only if this call matches the first
+  // corresponding call to Begin() with the same delay.
+  void End();
+
+  // Begin a parallel instance of the delay. Several parallel instances may be
+  // active simultaneously and will complete independently. The computed end
+  // time for the delay is stored in |out_end_time|, which should later be
+  // passed to EndParallel().
+  void BeginParallel(MonoTime* out_end_time);
+
+  // End a previously started parallel delay. |end_time| is the delay end point
+  // computed by BeginParallel().
+  void EndParallel(const MonoTime& end_time);
+
+ private:
+  TraceEventSyntheticDelay();
+  ~TraceEventSyntheticDelay();
+  friend class TraceEventSyntheticDelayRegistry;
+
+  void Initialize(const std::string& name,
+                  TraceEventSyntheticDelayClock* clock);
+  MonoTime CalculateEndTimeLocked(const MonoTime& start_time);
+  void ApplyDelay(const MonoTime& end_time);
+
+  Mutex lock_;
+  Mode mode_;
+  std::string name_;
+  int begin_count_;
+  int trigger_count_;
+  MonoTime end_time_;
+  MonoDelta target_duration_;
+  TraceEventSyntheticDelayClock* clock_;
+
+  DISALLOW_COPY_AND_ASSIGN(TraceEventSyntheticDelay);
+};
+
+// Set the target durations of all registered synthetic delay points to zero.
+TRACE_EVENT_API_CLASS_EXPORT void ResetTraceEventSyntheticDelays();
+
+}  // namespace debug
+}  // namespace kudu
+
+namespace trace_event_internal {
+
+// Helper class for scoped delays. Do not use directly.
+class TRACE_EVENT_API_CLASS_EXPORT ScopedSyntheticDelay {
+ public:
+  explicit ScopedSyntheticDelay(const char* name,
+                                AtomicWord* impl_ptr);
+  ~ScopedSyntheticDelay();
+
+ private:
+  kudu::debug::TraceEventSyntheticDelay* delay_impl_;
+  kudu::MonoTime end_time_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedSyntheticDelay);
+};
+
+// Helper for registering delays. Do not use directly.
+TRACE_EVENT_API_CLASS_EXPORT kudu::debug::TraceEventSyntheticDelay*
+    GetOrCreateDelay(const char* name, AtomicWord* impl_ptr);
+
+}  // namespace trace_event_internal
+
+#endif /* KUDU_UTIL_DEBUG_TRACE_EVENT_SYNTHETIC_DELAY_H_ */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/trace_logging.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/trace_logging.h b/be/src/kudu/util/debug/trace_logging.h
new file mode 100644
index 0000000..1a3b39e
--- /dev/null
+++ b/be/src/kudu/util/debug/trace_logging.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// This header defines the following macros:
+//
+// VLOG_AND_TRACE(category, vlevel)
+//
+//   Write a log message to VLOG(vlevel) as well as the current
+//   trace event buffer as an "INSTANT" trace event type. If the
+//   given vlog level is not enabled, this will still result in a
+//   trace buffer entry.
+//
+//   The provided 'category' should be a trace event category, which
+//   allows the users to filter which trace events to enable.
+//   For example:
+//
+//    VLOG_AND_TRACE("my_subsystem", 1) << "This always shows up in trace buffers "
+//        << "but only shows up in the log if VLOG(1) level logging is enabled.";
+//
+//   Most VLOG(1) level log messages are reasonable to use this macro.
+//   Note that there is slightly more overhead to this macro as opposed
+//   to just using VLOG(1).
+//
+//   Note that, like VLOG(n), this macro avoids evaluating its arguments unless
+//   either trace recording or VLOG(n) is enabled. In the case that both are enabled,
+//   the arguments are only evaluated once.
+//
+//
+// LOG_AND_TRACE(category, severity)
+//
+//   Same as the above, but always logs at the given severity level in addition
+//   to writing to the trace buffer.
+
+#ifndef KUDU_DEBUG_TRACE_LOGGING_H
+#define KUDU_DEBUG_TRACE_LOGGING_H
+
+#include <glog/logging.h>
+#include <string>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/debug/trace_event.h"
+
+// The inner workings of these macros are a bit arcane:
+// - We make use of the fact that a block can be embedded within a ternary expression.
+//   This allows us to determine whether the trace event is enabled before we decide
+//   to evaluate the arguments.
+// - We have to use google::LogMessageVoidify so that we can put 'void(0)' on one side
+//   of the ternary expression and the log stream on the other. This technique is
+//   cribbed from glog/logging.h.
+#define VLOG_AND_TRACE_INTERNAL(category, vlevel) \
+  kudu::debug::TraceGLog(__FILE__, __LINE__, category, google::GLOG_INFO, \
+                         /* send_to_log= */VLOG_IS_ON(vlevel)).stream()
+
+#define VLOG_AND_TRACE(category, vlevel)                              \
+  !( {                                                                \
+      bool enabled;                                                   \
+      TRACE_EVENT_CATEGORY_GROUP_ENABLED(category, &enabled);         \
+      enabled || VLOG_IS_ON(vlevel);                                  \
+    } ) ? static_cast<void>(0) :                                      \
+          google::LogMessageVoidify() & VLOG_AND_TRACE_INTERNAL(category, vlevel) // NOLINT(*)
+
+
+#define LOG_AND_TRACE(category, severity) \
+  kudu::debug::TraceGLog(__FILE__, __LINE__, category, \
+                        google::GLOG_ ## severity, /* send_to_log= */true).stream()
+
+namespace kudu {
+namespace debug {
+
+class TraceGLog {
+ public:
+  TraceGLog(const char* file, int line, const char* category,
+           google::LogSeverity severity, bool send_to_log)
+    : sink_(category),
+      google_msg_(file, line, severity, &sink_, send_to_log) {
+  }
+
+  std::ostream& stream() {
+    return google_msg_.stream();
+  }
+
+ private:
+  class TraceLogSink : public google::LogSink {
+   public:
+    explicit TraceLogSink(const char* category) : category_(category) {}
+    void send(google::LogSeverity severity, const char* full_filename,
+              const char* base_filename, int line,
+              const struct ::tm* tm_time, const char* message,
+              size_t message_len) override {
+      // Rather than calling TRACE_EVENT_INSTANT here, we have to do it from
+      // the destructor. This is because glog holds its internal mutex while
+      // calling send(). So, if we try to use TRACE_EVENT here, and --trace_to_console
+      // is enabled, then we'd end up calling back into glog when its lock is already
+      // held. glog isn't re-entrant, so that causes a crash.
+      //
+      // By just storing the string here, and then emitting the trace in the dtor,
+      // we defer the tracing until the google::LogMessage has destructed and the
+      // glog lock is available again.
+      str_ = ToString(severity, base_filename, line,
+                      tm_time, message, message_len);
+    }
+    virtual ~TraceLogSink() {
+      TRACE_EVENT_INSTANT1(category_, "vlog", TRACE_EVENT_SCOPE_THREAD,
+                           "msg", str_);
+    }
+
+   private:
+    const char* const category_;
+    std::string str_;
+  };
+
+  TraceLogSink sink_;
+  google::LogMessage google_msg_;
+};
+
+} // namespace debug
+} // namespace kudu
+#endif /* KUDU_DEBUG_TRACE_LOGGING_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/unwind_safeness.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/unwind_safeness.cc b/be/src/kudu/util/debug/unwind_safeness.cc
new file mode 100644
index 0000000..c8e0adf
--- /dev/null
+++ b/be/src/kudu/util/debug/unwind_safeness.cc
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Override various libdl functions which can race with libunwind.
+// The overridden versions set a threadlocal variable and our
+// stack-tracing code checks the threadlocal before calling into
+// libunwind.
+//
+// Based on public domain code by Aliaksey Kandratsenka at
+// https://github.com/alk/unwind_safeness_helper
+
+#include "kudu/util/debug/unwind_safeness.h"
+
+#include <dlfcn.h>
+#include <stddef.h>
+
+#include <ostream>
+
+#include <glog/logging.h>
+
+#define CALL_ORIG(func_name, ...) \
+  ((decltype(&func_name))g_orig_ ## func_name)(__VA_ARGS__)
+
+// Don't hook dl_iterate_phdr in TSAN builds since TSAN already instruments this
+// function and blocks signals while calling it. And skip it for macOS; it
+// doesn't exist there.
+#if !defined(THREAD_SANITIZER) && !defined(__APPLE__)
+#define HOOK_DL_ITERATE_PHDR 1
+#endif
+
+typedef int (*dl_iterate_phdr_cbtype)(struct dl_phdr_info *, size_t, void *);
+
+namespace {
+
+// Whether InitializeIfNecessary() has been called.
+bool g_initted;
+
+// The original versions of our wrapped functions.
+void* g_orig_dlopen;
+void* g_orig_dlclose;
+#ifdef HOOK_DL_ITERATE_PHDR
+void* g_orig_dl_iterate_phdr;
+#endif
+
+// The depth of calls into libdl.
+__thread int g_unsafeness_depth;
+
+// Scoped helper to track the recursion depth of calls into libdl
+struct ScopedBumpDepth {
+  ScopedBumpDepth() {
+    g_unsafeness_depth++;
+  }
+  ~ScopedBumpDepth() {
+    g_unsafeness_depth--;
+  }
+};
+
+void *dlsym_or_die(const char *sym) {
+  dlerror();
+  void* ret = dlsym(RTLD_NEXT, sym);
+  char* error = dlerror();
+  CHECK(!error) << "failed to find symbol " << sym << ": " << error;
+  return ret;
+}
+
+// Initialize the global variables which store the original references. This is
+// set up as a constructor so that we're guaranteed to call this before main()
+// while we are still single-threaded.
+//
+// NOTE: We _also_ call explicitly this from each of the wrappers, because
+// there are some cases where the constructors of dynamic libraries may call
+// dlopen, and there is no guarantee that our own constructor runs before
+// the constructor of other libraries.
+//
+// A couple examples of the above:
+//
+// 1) In ASAN builds, the sanitizer runtime ends up calling dl_iterate_phdr from its
+//    initialization.
+// 2) OpenSSL in FIPS mode calls dlopen() within its constructor.
+__attribute__((constructor))
+void InitIfNecessary() {
+  // Dynamic library initialization is always single-threaded, so there's no
+  // need for any synchronization here.
+  if (g_initted) return;
+
+  g_orig_dlopen = dlsym_or_die("dlopen");
+  g_orig_dlclose = dlsym_or_die("dlclose");
+#ifdef HOOK_DL_ITERATE_PHDR
+  // Failing to hook dl_iterate_phdr is non-fatal.
+  //
+  // In toolchains where the linker is passed --as-needed by default, a
+  // dynamically linked binary that doesn't directly reference any kudu_util
+  // symbols will omit a DT_NEEDED entry for kudu_util. Such a binary will
+  // no doubt emit a DT_NEEDED entry for libc, which means libc will wind up
+  // _before_ kudu_util in dlsym's search order. The net effect: the dlsym()
+  // call below will fail.
+  //
+  // All Ubuntu releases since Natty[1] behave in this way, except that many
+  // of them are also vulnerable to a glibc bug[2] that'll cause such a
+  // failure to go unreported by dlerror(). In newer releases, the failure
+  // is reported and dlsym_or_die() crashes the process.
+  //
+  // Given that the subset of affected binaries is small, and given that
+  // dynamic linkage isn't used in production anyway, we'll just treat the
+  // hook attempt as a best effort. Affected binaries that actually attempt
+  // to invoke dl_iterate_phdr will dereference a null pointer and crash, so
+  // if this is ever becomes a problem, we'll know right away.a
+  //
+  // 1. https://wiki.ubuntu.com/NattyNarwhal/ToolchainTransition
+  // 2. https://sourceware.org/bugzilla/show_bug.cgi?id=19509
+  g_orig_dl_iterate_phdr = dlsym(RTLD_NEXT, "dl_iterate_phdr");
+#endif
+  g_initted = true;
+}
+
+} // anonymous namespace
+
+namespace kudu {
+namespace debug {
+
+bool SafeToUnwindStack() {
+  return g_unsafeness_depth == 0;
+}
+
+} // namespace debug
+} // namespace kudu
+
+extern "C" {
+
+void *dlopen(const char *filename, int flag) { // NOLINT
+  InitIfNecessary();
+  ScopedBumpDepth d;
+  return CALL_ORIG(dlopen, filename, flag);
+}
+
+int dlclose(void *handle) { // NOLINT
+  InitIfNecessary();
+  ScopedBumpDepth d;
+  return CALL_ORIG(dlclose, handle);
+}
+
+#ifdef HOOK_DL_ITERATE_PHDR
+int dl_iterate_phdr(dl_iterate_phdr_cbtype callback, void *data) { // NOLINT
+  InitIfNecessary();
+  ScopedBumpDepth d;
+  return CALL_ORIG(dl_iterate_phdr, callback, data);
+}
+#endif
+
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug/unwind_safeness.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug/unwind_safeness.h b/be/src/kudu/util/debug/unwind_safeness.h
new file mode 100644
index 0000000..4aab6f9
--- /dev/null
+++ b/be/src/kudu/util/debug/unwind_safeness.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+namespace kudu {
+namespace debug {
+
+// Return true if it is currently safe to unwind the call stack.
+//
+// It's almost always safe unless we are in a signal handler context
+// inside a call into libdl.
+bool SafeToUnwindStack();
+
+} // namespace debug
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/debug_ref_counted.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/debug_ref_counted.h b/be/src/kudu/util/debug_ref_counted.h
new file mode 100644
index 0000000..7c2deca
--- /dev/null
+++ b/be/src/kudu/util/debug_ref_counted.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef KUDU_UTIL_DEBUG_REF_COUNTED_H_
+#define KUDU_UTIL_DEBUG_REF_COUNTED_H_
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/debug-util.h"
+
+namespace kudu {
+
+// For use in debugging. Change a ref-counted class to inherit from this,
+// instead of RefCountedThreadSafe, and fill your logs with stack traces.
+template <class T, typename Traits = DefaultRefCountedThreadSafeTraits<T> >
+class DebugRefCountedThreadSafe : public RefCountedThreadSafe<T, Traits> {
+ public:
+  DebugRefCountedThreadSafe() {}
+
+  void AddRef() const {
+    RefCountedThreadSafe<T, Traits>::AddRef();
+    LOG(INFO) << "Incremented ref on " << this << ":\n" << GetStackTrace();
+  }
+
+  void Release() const {
+    LOG(INFO) << "Decrementing ref on " << this << ":\n" << GetStackTrace();
+    RefCountedThreadSafe<T, Traits>::Release();
+  }
+
+ protected:
+  ~DebugRefCountedThreadSafe() {}
+
+ private:
+  friend struct DefaultRefCountedThreadSafeTraits<T>;
+
+  DISALLOW_COPY_AND_ASSIGN(DebugRefCountedThreadSafe);
+};
+
+} // namespace kudu
+
+#endif // KUDU_UTIL_DEBUG_REF_COUNTED_H_

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/decimal_util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/decimal_util-test.cc b/be/src/kudu/util/decimal_util-test.cc
new file mode 100644
index 0000000..d7bfc35
--- /dev/null
+++ b/be/src/kudu/util/decimal_util-test.cc
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "kudu/util/decimal_util.h"
+
+using std::string;
+
+namespace kudu {
+
+TEST(TestDecimalUtil, TestMaxUnscaledDecimal) {
+  ASSERT_EQ(9, MaxUnscaledDecimal(1));
+  ASSERT_EQ(99999, MaxUnscaledDecimal(5));
+  ASSERT_EQ(kMaxUnscaledDecimal32, MaxUnscaledDecimal(kMaxDecimal32Precision));
+  ASSERT_EQ(kMaxUnscaledDecimal64, MaxUnscaledDecimal(kMaxDecimal64Precision));
+  ASSERT_EQ(kMaxUnscaledDecimal128, MaxUnscaledDecimal(kMaxDecimal128Precision));
+}
+
+TEST(TestDecimalUtil, TestMinUnscaledDecimal) {
+  ASSERT_EQ(-9, MinUnscaledDecimal(1));
+  ASSERT_EQ(-99999, MinUnscaledDecimal(5));
+  ASSERT_EQ(kMinUnscaledDecimal32, MinUnscaledDecimal(kMaxDecimal32Precision));
+  ASSERT_EQ(kMinUnscaledDecimal64, MinUnscaledDecimal(kMaxDecimal64Precision));
+  ASSERT_EQ(kMinUnscaledDecimal128, MinUnscaledDecimal(kMaxDecimal128Precision));
+}
+
+TEST(TestDecimalUtil, TestToString) {
+  ASSERT_EQ("999999999",
+            DecimalToString(kMaxUnscaledDecimal32, kDefaultDecimalScale));
+  ASSERT_EQ("0.999999999",
+            DecimalToString(kMaxUnscaledDecimal32, kMaxDecimal32Precision));
+  ASSERT_EQ("-999999999",
+            DecimalToString(kMinUnscaledDecimal32, kDefaultDecimalScale));
+  ASSERT_EQ("-0.999999999",
+            DecimalToString(kMinUnscaledDecimal32, kMaxDecimal32Precision));
+
+  ASSERT_EQ("999999999999999999",
+            DecimalToString(kMaxUnscaledDecimal64, kDefaultDecimalScale));
+  ASSERT_EQ("0.999999999999999999",
+            DecimalToString(kMaxUnscaledDecimal64, kMaxDecimal64Precision));
+  ASSERT_EQ("-999999999999999999",
+            DecimalToString(kMinUnscaledDecimal64, kDefaultDecimalScale));
+  ASSERT_EQ("-0.999999999999999999",
+            DecimalToString(kMinUnscaledDecimal64, kMaxDecimal64Precision));
+
+  ASSERT_EQ("99999999999999999999999999999999999999",
+            DecimalToString(kMaxUnscaledDecimal128, kDefaultDecimalScale));
+  ASSERT_EQ("0.99999999999999999999999999999999999999",
+            DecimalToString(kMaxUnscaledDecimal128, kMaxDecimal128Precision));
+  ASSERT_EQ("-99999999999999999999999999999999999999",
+            DecimalToString(kMinUnscaledDecimal128, kDefaultDecimalScale));
+  ASSERT_EQ("-0.99999999999999999999999999999999999999",
+            DecimalToString(kMinUnscaledDecimal128, kMaxDecimal128Precision));
+
+  ASSERT_EQ("0", DecimalToString(0, 0));
+  ASSERT_EQ("12345", DecimalToString(12345, 0));
+  ASSERT_EQ("-12345", DecimalToString(-12345, 0));
+  ASSERT_EQ("123.45", DecimalToString(12345, 2));
+  ASSERT_EQ("-123.45", DecimalToString(-12345, 2));
+  ASSERT_EQ("0.00012345", DecimalToString(12345, 8));
+  ASSERT_EQ("-0.00012345", DecimalToString(-12345, 8));
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/decimal_util.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/decimal_util.cc b/be/src/kudu/util/decimal_util.cc
new file mode 100644
index 0000000..0e04494
--- /dev/null
+++ b/be/src/kudu/util/decimal_util.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/decimal_util.h"
+
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/port.h"
+
+namespace kudu {
+
+using std::string;
+
+// Workaround for an ASAN build issue documented here:
+// https://bugs.llvm.org/show_bug.cgi?id=16404
+ATTRIBUTE_NO_SANITIZE_UNDEFINED
+int128_t MaxUnscaledDecimal(int8_t precision) {
+  DCHECK_GE(precision, kMinDecimalPrecision);
+  DCHECK_LE(precision, kMaxDecimalPrecision);
+  int128_t result = 1;
+  for (; precision > 0; precision--) {
+    result = result * 10;
+  }
+  return result - 1;
+}
+
+int128_t MinUnscaledDecimal(int8_t precision) {
+  return -MaxUnscaledDecimal(precision);
+}
+
+// Workaround for an ASAN build issue documented here:
+// https://bugs.llvm.org/show_bug.cgi?id=16404
+ATTRIBUTE_NO_SANITIZE_UNDEFINED
+string DecimalToString(int128_t d, int8_t scale) {
+  // 38 digits, 1 extra leading zero, decimal point,
+  // and sign are good for 128-bit or smaller decimals.
+  char local[41];
+  char *p = local + sizeof(local);
+  int128_t n = d < 0? -d : d;
+  int position = 0;
+  while (n) {
+    // Print the decimal in the scale position.
+    // No decimal is output when scale is 0.
+    if (scale != 0 && position == scale) {
+      *--p = '.';
+    }
+    // Unroll the next digits.
+    *--p = '0' + n % 10;
+    n /= 10;
+    position++;
+  }
+  // True if the value is between 1 and -1.
+  bool fractional = position <= scale;
+  // Pad with zeros until the scale
+  while (position < scale) {
+    *--p = '0';
+    position++;
+  }
+  // Add leading "0.".
+  if (fractional) {
+    if (d != 0) {
+      *--p = '.';
+    }
+    *--p = '0';
+  }
+  // Add sign for negative values.
+  if (d < 0) {
+    *--p = '-';
+  }
+  return string(p, local + sizeof(local));
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/decimal_util.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/decimal_util.h b/be/src/kudu/util/decimal_util.h
new file mode 100644
index 0000000..a465412
--- /dev/null
+++ b/be/src/kudu/util/decimal_util.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <string>
+
+#include "kudu/util/int128.h"
+
+namespace kudu {
+  // Maximum precision and absolute value of a Decimal that can be stored
+  // in 4 bytes.
+  static const int8_t kMaxDecimal32Precision = 9;
+  static const int32_t kMaxUnscaledDecimal32 = 999999999; // 9 9's
+  static const int32_t kMinUnscaledDecimal32 = -kMaxUnscaledDecimal32; // 9 9's
+
+  // Maximum precision and absolute value of a valid Decimal can be
+  // stored in 8 bytes.
+  static const int8_t kMaxDecimal64Precision = 18;
+  static const int64_t kMaxUnscaledDecimal64 = 999999999999999999; // 18 9's
+  static const int64_t kMinUnscaledDecimal64 = -kMaxUnscaledDecimal64; // 18 9's
+
+  // Maximum precision and absolute value of a valid Decimal can be
+  // stored in 16 bytes.
+  static const int8_t kMaxDecimal128Precision = 38;
+  // Hacky calculation because int128 literals are not supported.
+  static const int128_t kMaxUnscaledDecimal128 =
+      (((static_cast<int128_t>(999999999999999999) * 1000000000000000000) +
+          999999999999999999) * 100) + 99; // 38 9's
+  static const int128_t kMinUnscaledDecimal128 = -kMaxUnscaledDecimal128;
+
+  // Minimum and maximum precision for any Decimal.
+  static const int8_t kMinDecimalPrecision = 1;
+  static const int8_t kMaxDecimalPrecision = kMaxDecimal128Precision;
+  // Maximum absolute value for any Decimal.
+  static const int128_t kMaxUnscaledDecimal = kMaxUnscaledDecimal128;
+  static const int128_t kMinUnscaledDecimal = kMinUnscaledDecimal128;
+
+  // Minimum scale for any Decimal.
+  static const int8_t kMinDecimalScale = 0;
+  static const int8_t kDefaultDecimalScale = 0;
+  // The maximum scale is the Decimal's precision.
+
+  // Returns the maximum unscaled decimal value that can be stored
+  // based on the precision.
+  int128_t MaxUnscaledDecimal(int8_t precision);
+
+  // Returns the maximum unscaled decimal value that can be stored
+  // based on the precision.
+  int128_t MinUnscaledDecimal(int8_t precision);
+
+  std::string DecimalToString(int128_t value, int8_t scale);
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/easy_json-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/easy_json-test.cc b/be/src/kudu/util/easy_json-test.cc
new file mode 100644
index 0000000..7074512
--- /dev/null
+++ b/be/src/kudu/util/easy_json-test.cc
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include <gtest/gtest.h>
+#include <rapidjson/document.h>
+#include <rapidjson/rapidjson.h>
+
+#include "kudu/gutil/integral_types.h"
+#include "kudu/util/easy_json.h"
+#include "kudu/util/test_util.h"
+
+using rapidjson::SizeType;
+using rapidjson::Value;
+using std::string;
+
+namespace kudu {
+
+class EasyJsonTest: public KuduTest {};
+
+TEST_F(EasyJsonTest, TestNull) {
+  EasyJson ej;
+  ASSERT_TRUE(ej.value().IsNull());
+}
+
+TEST_F(EasyJsonTest, TestBasic) {
+  EasyJson ej;
+  ej.SetObject();
+  ej.Set("1", true);
+  ej.Set("2", kint32min);
+  ej.Set("4", kint64min);
+  ej.Set("6", 1.0);
+  ej.Set("7", "string");
+
+  Value& v = ej.value();
+
+  ASSERT_EQ(v["1"].GetBool(), true);
+  ASSERT_EQ(v["2"].GetInt(), kint32min);
+  ASSERT_EQ(v["4"].GetInt64(), kint64min);
+  ASSERT_EQ(v["6"].GetDouble(), 1.0);
+  ASSERT_EQ(string(v["7"].GetString()), "string");
+}
+
+TEST_F(EasyJsonTest, TestNested) {
+  EasyJson ej;
+  ej.SetObject();
+  ej.Get("nested").SetObject();
+  ej.Get("nested").Set("nested_attr", true);
+  ASSERT_EQ(ej.value()["nested"]["nested_attr"].GetBool(), true);
+
+  ej.Get("nested_array").SetArray();
+  ej.Get("nested_array").PushBack(1);
+  ej.Get("nested_array").PushBack(2);
+  ASSERT_EQ(ej.value()["nested_array"][SizeType(0)].GetInt(), 1);
+  ASSERT_EQ(ej.value()["nested_array"][SizeType(1)].GetInt(), 2);
+}
+
+TEST_F(EasyJsonTest, TestCompactSyntax) {
+  EasyJson ej;
+  ej["nested"]["nested_attr"] = true;
+  ASSERT_EQ(ej.value()["nested"]["nested_attr"].GetBool(), true);
+
+  for (int i = 0; i < 2; i++) {
+    ej["nested_array"][i] = i + 1;
+  }
+  ASSERT_EQ(ej.value()["nested_array"][SizeType(0)].GetInt(), 1);
+  ASSERT_EQ(ej.value()["nested_array"][SizeType(1)].GetInt(), 2);
+}
+
+TEST_F(EasyJsonTest, TestComplexInitializer) {
+  EasyJson ej;
+  ej = EasyJson::kObject;
+  ASSERT_TRUE(ej.value().IsObject());
+
+  EasyJson nested_arr = ej.Set("nested_arr", EasyJson::kArray);
+  ASSERT_TRUE(nested_arr.value().IsArray());
+
+  EasyJson nested_obj = nested_arr.PushBack(EasyJson::kObject);
+  ASSERT_TRUE(ej["nested_arr"][0].value().IsObject());
+}
+
+TEST_F(EasyJsonTest, TestAllocatorLifetime) {
+  EasyJson* root = new EasyJson;
+  EasyJson child = (*root)["child"];
+  delete root;
+
+  child["child_attr"] = 1;
+  ASSERT_EQ(child.value()["child_attr"].GetInt(), 1);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/easy_json.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/easy_json.cc b/be/src/kudu/util/easy_json.cc
new file mode 100644
index 0000000..9057b50
--- /dev/null
+++ b/be/src/kudu/util/easy_json.cc
@@ -0,0 +1,212 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/easy_json.h"
+
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include <glog/logging.h>
+#include <rapidjson/document.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+
+using rapidjson::SizeType;
+using rapidjson::Value;
+using std::string;
+
+namespace kudu {
+
+EasyJson::EasyJson() : alloc_(new EasyJsonAllocator), value_(&alloc_->value()) {}
+
+EasyJson::EasyJson(EasyJson::ComplexTypeInitializer type)
+    : alloc_(new EasyJsonAllocator), value_(&alloc_->value()) {
+  if (type == kObject) {
+    value_->SetObject();
+  } else if (type == kArray) {
+    value_->SetArray();
+  }
+}
+
+EasyJson EasyJson::Get(const string& key) {
+  if (!value_->IsObject()) {
+    value_->SetObject();
+  }
+  if (!value_->HasMember(key.c_str())) {
+    Value key_val(key.c_str(), alloc_->allocator());
+    value_->AddMember(key_val, Value().SetNull(), alloc_->allocator());
+  }
+  return EasyJson(&(*value_)[key.c_str()], alloc_);
+}
+
+EasyJson EasyJson::Get(int index) {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  while (SizeType(index) >= value_->Size()) {
+    value_->PushBack(Value().SetNull(), alloc_->allocator());
+  }
+  return EasyJson(&(*value_)[index], alloc_);
+}
+
+EasyJson EasyJson::operator[](const string& key) {
+  return Get(key);
+}
+
+EasyJson EasyJson::operator[](int index) {
+  return Get(index);
+}
+
+EasyJson& EasyJson::operator=(const string& val) {
+  value_->SetString(val.c_str(), alloc_->allocator());
+  return *this;
+}
+template<typename T>
+EasyJson& EasyJson::operator=(T val) {
+  *value_ = val;
+  return *this;
+}
+template EasyJson& EasyJson::operator=<bool>(bool val);
+template EasyJson& EasyJson::operator=<int32_t>(int32_t val);
+template EasyJson& EasyJson::operator=<int64_t>(int64_t val);
+template EasyJson& EasyJson::operator=<uint32_t>(uint32_t val);
+template EasyJson& EasyJson::operator=<uint64_t>(uint64_t val);
+template EasyJson& EasyJson::operator=<double>(double val);
+template<> EasyJson& EasyJson::operator=<const char*>(const char* val) {
+  value_->SetString(val, alloc_->allocator());
+  return *this;
+}
+template<> EasyJson& EasyJson::operator=<EasyJson::ComplexTypeInitializer>(
+    EasyJson::ComplexTypeInitializer val) {
+  if (val == kObject) {
+    value_->SetObject();
+  } else if (val == kArray) {
+    value_->SetArray();
+  }
+  return (*this);
+}
+
+EasyJson& EasyJson::SetObject() {
+  if (!value_->IsObject()) {
+    value_->SetObject();
+  }
+  return *this;
+}
+
+EasyJson& EasyJson::SetArray() {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  return *this;
+}
+
+EasyJson EasyJson::Set(const string& key, const string& val) {
+  return (Get(key) = val);
+}
+template<typename T>
+EasyJson EasyJson::Set(const string& key, T val) {
+  return (Get(key) = val);
+}
+template EasyJson EasyJson::Set<bool>(const string& key, bool val);
+template EasyJson EasyJson::Set<int32_t>(const string& key, int32_t val);
+template EasyJson EasyJson::Set<int64_t>(const string& key, int64_t val);
+template EasyJson EasyJson::Set<uint32_t>(const string& key, uint32_t val);
+template EasyJson EasyJson::Set<uint64_t>(const string& key, uint64_t val);
+template EasyJson EasyJson::Set<double>(const string& key, double val);
+template EasyJson EasyJson::Set<const char*>(const string& key, const char* val);
+template EasyJson EasyJson::Set<EasyJson::ComplexTypeInitializer>(
+    const string& key,
+    EasyJson::ComplexTypeInitializer val);
+
+EasyJson EasyJson::Set(int index, const string& val) {
+  return (Get(index) = val);
+}
+template<typename T>
+EasyJson EasyJson::Set(int index, T val) {
+  return (Get(index) = val);
+}
+template EasyJson EasyJson::Set<bool>(int index, bool val);
+template EasyJson EasyJson::Set<int32_t>(int index, int32_t val);
+template EasyJson EasyJson::Set<int64_t>(int index, int64_t val);
+template EasyJson EasyJson::Set<uint32_t>(int index, uint32_t val);
+template EasyJson EasyJson::Set<uint64_t>(int index, uint64_t val);
+template EasyJson EasyJson::Set<double>(int index, double val);
+template EasyJson EasyJson::Set<const char*>(int index, const char* val);
+template EasyJson EasyJson::Set<EasyJson::ComplexTypeInitializer>(
+    int index,
+    EasyJson::ComplexTypeInitializer val);
+
+EasyJson EasyJson::PushBack(const string& val) {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  Value push_val(val.c_str(), alloc_->allocator());
+  value_->PushBack(push_val, alloc_->allocator());
+  return EasyJson(&(*value_)[value_->Size() - 1], alloc_);
+}
+template<typename T>
+EasyJson EasyJson::PushBack(T val) {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  value_->PushBack(val, alloc_->allocator());
+  return EasyJson(&(*value_)[value_->Size() - 1], alloc_);
+}
+template EasyJson EasyJson::PushBack<bool>(bool val);
+template EasyJson EasyJson::PushBack<int32_t>(int32_t val);
+template EasyJson EasyJson::PushBack<int64_t>(int64_t val);
+template EasyJson EasyJson::PushBack<uint32_t>(uint32_t val);
+template EasyJson EasyJson::PushBack<uint64_t>(uint64_t val);
+template EasyJson EasyJson::PushBack<double>(double val);
+template<> EasyJson EasyJson::PushBack<const char*>(const char* val) {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  Value push_val(val, alloc_->allocator());
+  value_->PushBack(push_val, alloc_->allocator());
+  return EasyJson(&(*value_)[value_->Size() - 1], alloc_);
+}
+template<> EasyJson EasyJson::PushBack<EasyJson::ComplexTypeInitializer>(
+    EasyJson::ComplexTypeInitializer val) {
+  if (!value_->IsArray()) {
+    value_->SetArray();
+  }
+  Value push_val;
+  if (val == kObject) {
+    push_val.SetObject();
+  } else if (val == kArray) {
+    push_val.SetArray();
+  } else {
+    LOG(FATAL) << "Unknown initializer type";
+  }
+  value_->PushBack(push_val, alloc_->allocator());
+  return EasyJson(&(*value_)[value_->Size() - 1], alloc_);
+}
+
+string EasyJson::ToString() const {
+  rapidjson::StringBuffer buffer;
+  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+  value_->Accept(writer);
+  return buffer.GetString();
+}
+
+EasyJson::EasyJson(Value* value, scoped_refptr<EasyJsonAllocator> alloc)
+    : alloc_(std::move(alloc)), value_(value) {}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/easy_json.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/easy_json.h b/be/src/kudu/util/easy_json.h
new file mode 100644
index 0000000..bd0365a
--- /dev/null
+++ b/be/src/kudu/util/easy_json.h
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <string>
+
+#include <rapidjson/document.h>
+
+#include "kudu/gutil/ref_counted.h"
+
+namespace kudu {
+
+// A wrapper around rapidjson Value objects, to simplify usage.
+// Intended solely for building json objects, not writing/parsing.
+//
+// Simplifies code like this:
+//
+//   rapidjson::Document d;
+//   rapidjson::Value v;
+//   v.SetObject();
+//   rapidjson::Value list;
+//   list.SetArray();
+//   v.AddMember("list", list, d.GetAllocator());
+//   v["list"].PushBack(rapidjson::Value().SetString("element"), d.GetAllocator());
+//
+// To this:
+//
+//   EasyJson ej;
+//   ej["list"][0] = "element";
+//
+// Client code should build objects as demonstrated above,
+// then call EasyJson::value() to obtain a reference to the
+// built rapidjson Value.
+class EasyJson {
+ public:
+  // Used for initializing EasyJson's with complex types.
+  // For example:
+  //
+  //  EasyJson array;
+  //  EasyJson nested = array.PushBack(EasyJson::kObject);
+  //  nested["attr"] = "val";
+  //  // array = [ { "attr": "val" } ]
+  enum ComplexTypeInitializer {
+    kObject,
+    kArray
+  };
+
+  EasyJson();
+  // Initializes the EasyJson object with the given type.
+  explicit EasyJson(ComplexTypeInitializer type);
+  ~EasyJson() = default;
+
+  // Returns the child EasyJson associated with key.
+  //
+  // Note: this method can mutate the EasyJson object
+  // as follows:
+  //
+  // If this EasyJson's underlying Value is not an object
+  // (i.e. !this->value().IsObject()), then its Value is
+  // coerced to an object, overwriting the old Value.
+  // If the given key does not exist, a Null-valued
+  // EasyJson associated with key is created.
+  EasyJson Get(const std::string& key);
+
+  // Returns the child EasyJson at index.
+  //
+  // Note: this method can mutate the EasyJson object
+  // as follows:
+  //
+  // If this EasyJson's underlying Value is not an array
+  // (i.e. !this->value().IsArray()), then its Value is
+  // coerced to an array, overwriting the old Value.
+  // If index >= this->value().Size(), then the underlying
+  // array's size is increased to index + 1 (new indices
+  // are filled with Null values).
+  EasyJson Get(int index);
+
+  // Same as Get(key).
+  EasyJson operator[](const std::string& key);
+  // Same as Get(index).
+  EasyJson operator[](int index);
+
+  // Sets the underlying Value equal to val.
+  // Returns a reference to the object itself.
+  //
+  // 'val' can be a bool, int32_t, int64_t, double,
+  // char*, string, or ComplexTypeInitializer.
+  EasyJson& operator=(const std::string& val);
+  template<typename T>
+  EasyJson& operator=(T val);
+
+  // Sets the underlying Value to an object.
+  // Returns a reference to the object itself.
+  //
+  // i.e. after calling SetObject(),
+  // value().IsObject() == true
+  EasyJson& SetObject();
+  // Sets the underlying Value to an array.
+  // Returns a reference to the object itself.
+  //
+  // i.e. after calling SetArray(),
+  // value().IsArray() == true
+  EasyJson& SetArray();
+
+  // Associates val with key.
+  // Returns the child object.
+  //
+  // If this EasyJson's underlying Value is not an object
+  // (i.e. !this->value().IsObject()), then its Value is
+  // coerced to an object, overwriting the old Value.
+  // If the given key does not exist, a new child entry
+  // is created with the given value.
+  EasyJson Set(const std::string& key, const std::string& val);
+  template<typename T>
+  EasyJson Set(const std::string& key, T val);
+
+  // Stores val at index.
+  // Returns the child object.
+  //
+  // If this EasyJson's underlying Value is not an array
+  // (i.e. !this->value().IsArray()), then its Value is
+  // coerced to an array, overwriting the old Value.
+  // If index >= this->value().Size(), then the underlying
+  // array's size is increased to index + 1 (new indices
+  // are filled with Null values).
+  EasyJson Set(int index, const std::string& val);
+  template<typename T>
+  EasyJson Set(int index, T val);
+
+  // Appends val to the underlying array.
+  // Returns a reference to the new child object.
+  //
+  // If this EasyJson's underlying Value is not an array
+  // (i.e. !this->value().IsArray()), then its Value is
+  // coerced to an array, overwriting the old Value.
+  EasyJson PushBack(const std::string& val);
+  template<typename T>
+  EasyJson PushBack(T val);
+
+  // Returns a reference to the underlying Value.
+  rapidjson::Value& value() const { return *value_; }
+
+  // Returns a string representation of the underlying json.
+  std::string ToString() const;
+
+ private:
+  // One instance of EasyJsonAllocator is shared among a root
+  // EasyJson object and all of its descendants. The allocator
+  // owns the underlying rapidjson Value, and a rapidjson
+  // allocator (via a rapidjson::Document).
+  class EasyJsonAllocator : public RefCounted<EasyJsonAllocator> {
+   public:
+    rapidjson::Value& value() { return value_; }
+    rapidjson::Document::AllocatorType& allocator() { return value_.GetAllocator(); }
+   private:
+    friend class RefCounted<EasyJsonAllocator>;
+    ~EasyJsonAllocator() = default;
+
+    // The underlying rapidjson::Value object (Document is
+    // a subclass of Value that has its own allocator).
+    rapidjson::Document value_;
+  };
+
+  // Used to instantiate descendant objects.
+  EasyJson(rapidjson::Value* value, scoped_refptr<EasyJsonAllocator> alloc);
+
+  // One allocator is shared among an EasyJson object and
+  // all of its descendants.
+  scoped_refptr<EasyJsonAllocator> alloc_;
+
+  // A pointer to the underlying Value in the object
+  // tree owned by alloc_.
+  rapidjson::Value* value_;
+};
+
+} // namespace kudu