You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/04/20 07:27:56 UTC

incubator-kudu git commit: KUDU-1410 (part 1). Add per-Trace counters

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 832030a89 -> 6b6a697e4


KUDU-1410 (part 1). Add per-Trace counters

This adds a map of counters to each Trace object and provides a nice macro to
increment a given counter on the current trace.

The goal here is to make it easy to collect per-request metrics without plumbing
structs through the call stack or new sets of thread-locals. This mechanism
also makes it easy to hook up generic pieces of code such as threadpools and
locking primitives where plumbing a metrics structure through the call stack
is quite impractical.

To keep things fast, the map is keyed by const char* pointers which use pointer
comparison rather than string comparison - this avoids having to do any hashing
on the increment path. We could probably improve this further (eg using a
different more-optimized map type) if we start seeing it on a profile.

One wrinkle about using 'const char*' pointers is that it means the keys have
to live forever. The class provides a 'string interning' implementation which
allows strings generated at runtime to be used.  This is used in particular for
threadpool-specific counters, which we can bound as a relatively small set.

I also sprinkled various counters around the code that I thought might be
useful, including:

- counters for spinlock and mutex wait times
- counters for cpu-time, wall-time, and queue-time for each threadpool
- counters for disk reads, so it's easy to see if a request was
  blocked on seeky disk accesses
- counters for various pieces of the tablet code which have been
  responsible for slow RPCs in the past

The RPC layer is augmented slightly to log an INFO level message for
any request which takes longer than a second. This should be conservative
enough that we don't get a lot of spew, but still interesting to look
at when we have slowness.

A typical trace looks something like:

I0411 23:47:48.600158 23021 inbound_call.cc:230] Call
  kudu.tserver.TabletServerService.Write from 127.0.0.1:41490 (request call id 2029711) took 1125ms.
  Request Metrics:
{ "child_traces" : [ { "apply.queue_time_us" : 390,
        "cfile_cache_hit" : 5,
        "cfile_cache_hit_bytes" : 20510,
        "num_ops" : 1,
        "prepare.queue_time_us" : 8,
        "prepare.run_cpu_time_us" : 20,
        "prepare.run_wall_time_us" : 29
      } ] }

(pretty-printed using jsonlint -f -- the actual log is compact)

Change-Id: I86001778976f01b62342f63f98b62962bc74212f
Reviewed-on: http://gerrit.cloudera.org:8080/2794
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/6b6a697e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/6b6a697e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/6b6a697e

Branch: refs/heads/master
Commit: 6b6a697e4d7cf0327339911b30377766cfd75037
Parents: 832030a
Author: Todd Lipcon <to...@apache.org>
Authored: Mon Apr 11 23:52:05 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Apr 20 01:52:46 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/cfile_reader.cc                  |  6 ++
 src/kudu/consensus/raft_consensus.cc            |  1 +
 src/kudu/consensus/raft_consensus_state.cc      |  2 +-
 src/kudu/fs/log_block_manager.cc                | 19 +++++
 src/kudu/rpc/inbound_call.cc                    |  8 ++-
 src/kudu/rpc/negotiation.cc                     |  2 +-
 src/kudu/rpc/rpc_context.cc                     |  8 +--
 src/kudu/tablet/deltafile.cc                    |  4 ++
 src/kudu/tablet/tablet.cc                       |  1 +
 src/kudu/tablet/tablet_peer.cc                  |  2 +-
 .../tablet/transactions/transaction_driver.cc   |  9 +++
 .../tablet/transactions/transaction_driver.h    |  1 +
 src/kudu/tserver/ts_tablet_manager.cc           |  2 +-
 src/kudu/util/CMakeLists.txt                    |  1 +
 src/kudu/util/mutex.cc                          | 22 ++++++
 src/kudu/util/spinlock_profiling-test.cc        |  3 +-
 src/kudu/util/spinlock_profiling.cc             |  1 +
 src/kudu/util/stopwatch.h                       |  7 +-
 src/kudu/util/threadpool-test.cc                |  2 +-
 src/kudu/util/threadpool.cc                     | 47 +++++++++---
 src/kudu/util/threadpool.h                      | 27 +++++++
 src/kudu/util/trace-test.cc                     | 22 ++++--
 src/kudu/util/trace.cc                          | 40 +++++++++--
 src/kudu/util/trace.h                           | 52 ++++++++++++--
 src/kudu/util/trace_metrics.cc                  | 65 +++++++++++++++++
 src/kudu/util/trace_metrics.h                   | 75 ++++++++++++++++++++
 26 files changed, 394 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index 03689c5..6c03d06 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -42,6 +42,7 @@
 #include "kudu/util/rle-encoding.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
+#include "kudu/util/trace.h"
 
 DEFINE_bool(cfile_lazy_open, true,
             "Allow lazily opening of cfiles");
@@ -125,6 +126,7 @@ Status CFileReader::ReadMagicAndLength(uint64_t offset, uint32_t *len) {
 
 Status CFileReader::InitOnce() {
   VLOG(1) << "Initializing CFile with ID " << block_->id().ToString();
+  TRACE_COUNTER_INCREMENT("cfile_init", 1);
 
   RETURN_NOT_OK(ReadAndParseHeader());
 
@@ -315,6 +317,8 @@ Status CFileReader::ReadBlock(const BlockPointer &ptr, CacheControl cache_contro
       Cache::EXPECT_IN_CACHE : Cache::NO_EXPECT_IN_CACHE;
   BlockCache* cache = BlockCache::GetSingleton();
   if (cache->Lookup(block_->id(), ptr.offset(), cache_behavior, &bc_handle)) {
+    TRACE_COUNTER_INCREMENT("cfile_cache_hit", 1);
+    TRACE_COUNTER_INCREMENT("cfile_cache_hit_bytes", ptr.size());
     *ret = BlockHandle::WithDataFromCache(&bc_handle);
     // Cache hit
     return Status::OK();
@@ -326,6 +330,8 @@ Status CFileReader::ReadBlock(const BlockPointer &ptr, CacheControl cache_contro
   // from the Linux cache).
   TRACE_EVENT1("io", "CFileReader::ReadBlock(cache miss)",
                "cfile", ToString());
+  TRACE_COUNTER_INCREMENT("cfile_cache_miss", 1);
+  TRACE_COUNTER_INCREMENT("cfile_cache_miss_bytes", ptr.size());
   Slice block;
 
   // If we are reading uncompressed data and plan to cache the result,

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index d046a6d..7c5383a 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -170,6 +170,7 @@ scoped_refptr<RaftConsensus> RaftConsensus::Create(
 
   gscoped_ptr<ThreadPool> thread_pool;
   CHECK_OK(ThreadPoolBuilder(Substitute("$0-raft", options.tablet_id.substr(0, 6)))
+           .set_trace_metric_prefix("raft")
            .Build(&thread_pool));
 
   DCHECK(local_peer_pb.has_permanent_uuid());

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/consensus/raft_consensus_state.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus_state.cc b/src/kudu/consensus/raft_consensus_state.cc
index 240ae4f..c607b00 100644
--- a/src/kudu/consensus/raft_consensus_state.cc
+++ b/src/kudu/consensus/raft_consensus_state.cc
@@ -644,7 +644,7 @@ void ReplicaState::UpdateLastReceivedOpIdUnlocked(const OpId& op_id) {
   DCHECK_LE(OpIdCompare(last_received_op_id_, op_id), 0)
     << "Previously received OpId: " << last_received_op_id_.ShortDebugString()
     << ", updated OpId: " << op_id.ShortDebugString()
-    << ", Trace:" << std::endl << Trace::CurrentTrace()->DumpToString(true);
+    << ", Trace:" << std::endl << Trace::CurrentTrace()->DumpToString();
   last_received_op_id_ = op_id;
   last_received_op_id_current_leader_ = last_received_op_id_;
   next_index_ = op_id.index() + 1;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index e207793..5b20fdb 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -39,6 +39,7 @@
 #include "kudu/util/random_util.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/threadpool.h"
+#include "kudu/util/trace.h"
 
 // TODO: How should this be configured? Should provide some guidance.
 DEFINE_uint64(log_container_max_size, 10LU * 1024 * 1024 * 1024,
@@ -984,7 +985,25 @@ Status LogReadableBlock::Read(uint64_t offset, size_t length,
                                       log_block_->offset(),
                                       log_block_->offset() + log_block_->length()));
   }
+
+  MicrosecondsInt64 start_time = GetMonoTimeMicros();
   RETURN_NOT_OK(container_->ReadData(read_offset, length, result, scratch));
+  MicrosecondsInt64 end_time = GetMonoTimeMicros();
+
+  int64_t dur = end_time - start_time;
+  TRACE_COUNTER_INCREMENT("lbm_read_time_us", dur);
+
+  const char* counter;
+  if (dur >= 100 * 1000) {
+    counter = "lbm_reads_gt_100_ms";
+  } else if (dur >= 10 * 1000) {
+    counter = "lbm_reads_10-100_ms";
+  } else if (dur >= 1000) {
+    counter = "lbm_reads_1-10_ms";
+  } else {
+    counter = "lbm_reads_lt_1ms";
+  }
+  TRACE_COUNTER_INCREMENT(counter, 1);
 
   if (container_->metrics()) {
     container_->metrics()->generic_metrics.total_bytes_read->IncrementBy(length);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/rpc/inbound_call.cc
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/inbound_call.cc b/src/kudu/rpc/inbound_call.cc
index 4c5f75b..dac3eaa 100644
--- a/src/kudu/rpc/inbound_call.cc
+++ b/src/kudu/rpc/inbound_call.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/rpc/inbound_call.h"
 
+#include <glog/stl_logging.h>
 #include <memory>
 
 #include "kudu/gutil/strings/substitute.h"
@@ -197,7 +198,7 @@ void InboundCall::DumpPB(const DumpRunningRpcsRequestPB& req,
                          RpcCallInProgressPB* resp) {
   resp->mutable_header()->CopyFrom(header_);
   if (req.include_traces() && trace_) {
-    resp->set_trace_buffer(trace_->DumpToString(true));
+    resp->set_trace_buffer(trace_->DumpToString());
   }
   resp->set_micros_elapsed(MonoTime::Now(MonoTime::FINE).GetDeltaSince(timing_.time_received)
                            .ToMicroseconds());
@@ -214,7 +215,7 @@ void InboundCall::LogTrace() const {
       // The traces may also be too large to fit in a log message.
       LOG(WARNING) << ToString() << " took " << total_time << "ms (client timeout "
                    << header_.timeout_millis() << ").";
-      std::string s = trace_->DumpToString(true);
+      std::string s = trace_->DumpToString();
       if (!s.empty()) {
         LOG(WARNING) << "Trace:\n" << s;
       }
@@ -225,6 +226,9 @@ void InboundCall::LogTrace() const {
   if (PREDICT_FALSE(FLAGS_rpc_dump_all_traces)) {
     LOG(INFO) << ToString() << " took " << total_time << "ms. Trace:";
     trace_->Dump(&LOG(INFO), true);
+  } else if (total_time > 1000) {
+    LOG(INFO) << ToString() << " took " << total_time << "ms. "
+              << "Request Metrics: " << trace_->MetricsAsJSON();
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/rpc/negotiation.cc
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/negotiation.cc b/src/kudu/rpc/negotiation.cc
index e2cbb4a..921d3d7 100644
--- a/src/kudu/rpc/negotiation.cc
+++ b/src/kudu/rpc/negotiation.cc
@@ -225,7 +225,7 @@ void Negotiation::RunNegotiation(const scoped_refptr<Connection>& conn,
   bool is_bad = !s.ok() && !(s.IsNetworkError() && s.posix_code() == ECONNREFUSED);
 
   if (is_bad || FLAGS_rpc_trace_negotiation) {
-    string msg = Trace::CurrentTrace()->DumpToString(true);
+    string msg = Trace::CurrentTrace()->DumpToString();
     if (is_bad) {
       LOG(WARNING) << "Failed RPC negotiation. Trace:\n" << msg;
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/rpc/rpc_context.cc
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/rpc_context.cc b/src/kudu/rpc/rpc_context.cc
index a3a870b..84a8a27 100644
--- a/src/kudu/rpc/rpc_context.cc
+++ b/src/kudu/rpc/rpc_context.cc
@@ -91,7 +91,7 @@ void RpcContext::RespondSuccess() {
           << call_->ToString() << ":" << std::endl << response_pb_->DebugString();
   TRACE_EVENT_ASYNC_END2("rpc_call", "RPC", this,
                          "response", TracePb(*response_pb_),
-                         "trace", trace()->DumpToString(true));
+                         "trace", trace()->DumpToString());
   call_->RespondSuccess(*response_pb_);
   delete this;
 }
@@ -102,7 +102,7 @@ void RpcContext::RespondFailure(const Status &status) {
           << call_->ToString() << ": " << status.ToString();
   TRACE_EVENT_ASYNC_END2("rpc_call", "RPC", this,
                          "status", status.ToString(),
-                         "trace", trace()->DumpToString(true));
+                         "trace", trace()->DumpToString());
   call_->RespondFailure(ErrorStatusPB::ERROR_APPLICATION,
                         status);
   delete this;
@@ -114,7 +114,7 @@ void RpcContext::RespondRpcFailure(ErrorStatusPB_RpcErrorCodePB err, const Statu
           << call_->ToString() << ": " << status.ToString();
   TRACE_EVENT_ASYNC_END2("rpc_call", "RPC", this,
                          "status", status.ToString(),
-                         "trace", trace()->DumpToString(true));
+                         "trace", trace()->DumpToString());
   call_->RespondFailure(err, status);
   delete this;
 }
@@ -129,7 +129,7 @@ void RpcContext::RespondApplicationError(int error_ext_id, const std::string& me
             << call_->ToString() << ":" << std::endl << err.DebugString();
     TRACE_EVENT_ASYNC_END2("rpc_call", "RPC", this,
                            "response", TracePb(app_error_pb),
-                           "trace", trace()->DumpToString(true));
+                           "trace", trace()->DumpToString());
   }
   call_->RespondApplicationError(error_ext_id, message, app_error_pb);
   delete this;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tablet/deltafile.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/deltafile.cc b/src/kudu/tablet/deltafile.cc
index 5cf07d0..1b3cfef 100644
--- a/src/kudu/tablet/deltafile.cc
+++ b/src/kudu/tablet/deltafile.cc
@@ -35,6 +35,7 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/hexdump.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/trace.h"
 
 DECLARE_bool(cfile_lazy_open);
 DEFINE_int32(deltafile_default_block_size, 32*1024,
@@ -261,6 +262,8 @@ Status DeltaFileReader::NewDeltaIterator(const Schema *projection,
   if (IsRelevantForSnapshot(snap)) {
     if (VLOG_IS_ON(2)) {
       if (!init_once_.initted()) {
+        TRACE_COUNTER_INCREMENT("delta_iterators_lazy_initted", 1);
+
         VLOG(2) << (delta_type_ == REDO ? "REDO" : "UNDO") << " delta " << ToString()
                 << "has no delta stats"
                 << ": can't cull for " << snap.ToString();
@@ -275,6 +278,7 @@ Status DeltaFileReader::NewDeltaIterator(const Schema *projection,
       }
     }
 
+    TRACE_COUNTER_INCREMENT("delta_iterators_relevant", 1);
     // Ugly cast, but it lets the iterator fully initialize the reader
     // during its first seek.
     *iterator = new DeltaFileIterator(

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index dcf33aa..7217c8a 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -314,6 +314,7 @@ Status Tablet::DecodeWriteOperations(const Schema* client_schema,
                              schema(),
                              tx_state->arena());
   RETURN_NOT_OK(dec.DecodeOperations(&ops));
+  TRACE_COUNTER_INCREMENT("num_ops", ops.size());
 
   // Create RowOp objects for each
   vector<RowOp*> row_ops;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tablet/tablet_peer.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_peer.cc b/src/kudu/tablet/tablet_peer.cc
index 56a6d18..dba7bdb 100644
--- a/src/kudu/tablet/tablet_peer.cc
+++ b/src/kudu/tablet/tablet_peer.cc
@@ -414,7 +414,7 @@ void TabletPeer::GetInFlightTransactions(Transaction::TraceType trace_type,
           MonoTime::Now(MonoTime::FINE).GetDeltaSince(driver->start_time()).ToMicroseconds();
       status_pb.set_running_for_micros(running_for_micros);
       if (trace_type == Transaction::TRACE_TXNS) {
-        status_pb.set_trace_buffer(driver->trace()->DumpToString(true));
+        status_pb.set_trace_buffer(driver->trace()->DumpToString());
       }
       out->push_back(status_pb);
     }

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tablet/transactions/transaction_driver.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/transactions/transaction_driver.cc b/src/kudu/tablet/transactions/transaction_driver.cc
index 3aa191a..6b9ff23 100644
--- a/src/kudu/tablet/transactions/transaction_driver.cc
+++ b/src/kudu/tablet/transactions/transaction_driver.cc
@@ -76,6 +76,7 @@ Status TransactionDriver::Init(gscoped_ptr<Transaction> transaction,
     op_id_copy_ = transaction_->state()->op_id();
     DCHECK(op_id_copy_.IsInitialized());
     replication_state_ = REPLICATING;
+    replication_start_time_ = MonoTime::Now(MonoTime::FINE);
   } else {
     DCHECK_EQ(type, consensus::LEADER);
     gscoped_ptr<ReplicateMsg> replicate_msg;
@@ -195,6 +196,7 @@ Status TransactionDriver::PrepareAndStart() {
       {
         boost::lock_guard<simple_spinlock> lock(lock_);
         replication_state_ = REPLICATING;
+        replication_start_time_ = MonoTime::Now(MonoTime::FINE);
       }
       Status s = consensus_->Replicate(mutable_state()->consensus_round());
 
@@ -265,6 +267,9 @@ void TransactionDriver::HandleFailure(const Status& s) {
 }
 
 void TransactionDriver::ReplicationFinished(const Status& status) {
+  MonoTime replication_finished_time = MonoTime::Now(MonoTime::FINE);
+
+  ADOPT_TRACE(trace());
   {
     boost::lock_guard<simple_spinlock> op_id_lock(opid_lock_);
     // TODO: it's a bit silly that we have three copies of the opid:
@@ -275,6 +280,7 @@ void TransactionDriver::ReplicationFinished(const Status& status) {
     mutable_state()->mutable_op_id()->CopyFrom(op_id_copy_);
   }
 
+  MonoDelta replication_duration;
   PrepareState prepare_state_copy;
   {
     boost::lock_guard<simple_spinlock> lock(lock_);
@@ -286,8 +292,11 @@ void TransactionDriver::ReplicationFinished(const Status& status) {
       transaction_status_ = status;
     }
     prepare_state_copy = prepare_state_;
+    replication_duration = replication_finished_time.GetDeltaSince(replication_start_time_);
   }
 
+  TRACE_COUNTER_INCREMENT("replication_time_us", replication_duration.ToMicroseconds());
+
   // If we have prepared and replicated, we're ready
   // to move ahead and apply this operation.
   // Note that if we set the state to REPLICATION_FAILED above,

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tablet/transactions/transaction_driver.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/transactions/transaction_driver.h b/src/kudu/tablet/transactions/transaction_driver.h
index 2deefb8..874fd55 100644
--- a/src/kudu/tablet/transactions/transaction_driver.h
+++ b/src/kudu/tablet/transactions/transaction_driver.h
@@ -243,6 +243,7 @@ class TransactionDriver : public RefCountedThreadSafe<TransactionDriver> {
   scoped_refptr<Trace> trace_;
 
   const MonoTime start_time_;
+  MonoTime replication_start_time_;
 
   ReplicationState replication_state_;
   PrepareState prepare_state_;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/tserver/ts_tablet_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index 2a17b7e..ec6377d 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -689,7 +689,7 @@ void TSTabletManager::OpenTablet(const scoped_refptr<TabletMetadata>& meta,
     LOG(WARNING) << LogPrefix(tablet_id) << "Tablet startup took " << elapsed_ms << "ms";
     if (Trace::CurrentTrace()) {
       LOG(WARNING) << LogPrefix(tablet_id) << "Trace:" << std::endl
-                   << Trace::CurrentTrace()->DumpToString(true);
+                   << Trace::CurrentTrace()->DumpToString();
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index c0749b1..0c972a6 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -158,6 +158,7 @@ set(UTIL_SRCS
   thread_restrictions.cc
   throttler.cc
   trace.cc
+  trace_metrics.cc
   user.cc
   url-coding.cc
   version_info.cc

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/mutex.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/mutex.cc b/src/kudu/util/mutex.cc
index 5996daf..1fb45b2 100644
--- a/src/kudu/util/mutex.cc
+++ b/src/kudu/util/mutex.cc
@@ -21,8 +21,10 @@
 
 #include <glog/logging.h>
 
+#include "kudu/gutil/walltime.h"
 #include "kudu/util/debug-util.h"
 #include "kudu/util/env.h"
+#include "kudu/util/trace.h"
 
 namespace kudu {
 
@@ -68,7 +70,27 @@ bool Mutex::TryAcquire() {
 }
 
 void Mutex::Acquire() {
+  // Optimize for the case when mutexes are uncontended. If they
+  // are contended, we'll have to go to sleep anyway, so the extra
+  // cost of branch mispredictions is moot.
+  //
+  // TryAcquire() is implemented as a simple CompareAndSwap inside
+  // pthreads so this does not require a system call.
+  if (PREDICT_TRUE(TryAcquire())) {
+    return;
+  }
+
+  // If we weren't able to acquire the mutex immediately, then it's
+  // worth gathering timing information about the mutex acquisition.
+  MicrosecondsInt64 start_time = GetMonoTimeMicros();
   int rv = pthread_mutex_lock(&native_handle_);
+  MicrosecondsInt64 end_time = GetMonoTimeMicros();
+
+  int64_t wait_time = end_time - start_time;
+  if (wait_time > 0) {
+    TRACE_COUNTER_INCREMENT("mutex_wait_us", wait_time);
+  }
+
 #ifndef NDEBUG
   DCHECK_EQ(0, rv) << ". " << strerror(rv)
       << ". Owner tid: " << owning_tid_ << "; Self tid: " << Env::Default()->gettid()

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/spinlock_profiling-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/spinlock_profiling-test.cc b/src/kudu/util/spinlock_profiling-test.cc
index 74227b2..370b087 100644
--- a/src/kudu/util/spinlock_profiling-test.cc
+++ b/src/kudu/util/spinlock_profiling-test.cc
@@ -48,8 +48,9 @@ TEST_F(SpinLockProfilingTest, TestSpinlockProfiling) {
     ADOPT_TRACE(t.get());
     gutil::SubmitSpinLockProfileData(&lock, 4000000);
   }
-  string result = t->DumpToString(true);
+  string result = t->DumpToString();
   LOG(INFO) << "trace: " << result;
+  ASSERT_STR_CONTAINS(result, "\"spinlock_wait_cycles\":4000000");
   // We can't assert more specifically because the CyclesPerSecond
   // on different machines might be different.
   ASSERT_STR_CONTAINS(result, "Waited ");

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/spinlock_profiling.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/spinlock_profiling.cc b/src/kudu/util/spinlock_profiling.cc
index f4a375f..3f2642c 100644
--- a/src/kudu/util/spinlock_profiling.cc
+++ b/src/kudu/util/spinlock_profiling.cc
@@ -204,6 +204,7 @@ bool ContentionStacks::CollectSample(uint64_t* iterator, StackTrace* s, int64_t*
 
 
 void SubmitSpinLockProfileData(const void *contendedlock, int64 wait_cycles) {
+  TRACE_COUNTER_INCREMENT("spinlock_wait_cycles", wait_cycles);
   bool profiling_enabled = base::subtle::Acquire_Load(&g_profiling_enabled);
   bool long_wait_time = wait_cycles > FLAGS_lock_contention_trace_threshold_cycles;
   // Short circuit this function quickly in the common case.

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/stopwatch.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/stopwatch.h b/src/kudu/util/stopwatch.h
index a08691a..c1289e1 100644
--- a/src/kudu/util/stopwatch.h
+++ b/src/kudu/util/stopwatch.h
@@ -138,11 +138,16 @@ struct CpuTimes {
 //
 // Wall clock time is based on a monotonic timer, so can be reliably used for
 // determining durations.
-// CPU time is based on the current thread's usage (not the whole process).
+// CPU time is based on either current thread's usage or the usage of the whole
+// process, depending on the value of 'Mode' passed to the constructor.
 //
 // The implementation relies on several syscalls, so should not be used for
 // hot paths, but is useful for timing anything on the granularity of seconds
 // or more.
+//
+// NOTE: the user time reported by this class is based on Linux scheduler ticks
+// and thus has low precision. Use GetThreadCpuTimeMicros() from walltime.h if
+// more accurate per-thread CPU usage timing is required.
 class Stopwatch {
  public:
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/threadpool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/threadpool-test.cc b/src/kudu/util/threadpool-test.cc
index cb03f86..b6bcd55 100644
--- a/src/kudu/util/threadpool-test.cc
+++ b/src/kudu/util/threadpool-test.cc
@@ -102,7 +102,7 @@ TEST(TestThreadPool, TestTracePropagation) {
     ASSERT_OK(thread_pool->SubmitFunc(&IssueTraceStatement));
   }
   thread_pool->Wait();
-  ASSERT_STR_CONTAINS(t->DumpToString(true), "hello from task");
+  ASSERT_STR_CONTAINS(t->DumpToString(), "hello from task");
 }
 
 TEST(TestThreadPool, TestSubmitAfterShutdown) {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/threadpool.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/threadpool.cc b/src/kudu/util/threadpool.cc
index 0f5011e..1588c8c 100644
--- a/src/kudu/util/threadpool.cc
+++ b/src/kudu/util/threadpool.cc
@@ -28,6 +28,7 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/sysinfo.h"
 #include "kudu/util/metrics.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/thread.h"
 #include "kudu/util/trace.h"
 
@@ -62,6 +63,12 @@ ThreadPoolBuilder::ThreadPoolBuilder(std::string name)
       max_queue_size_(std::numeric_limits<int>::max()),
       idle_timeout_(MonoDelta::FromMilliseconds(500)) {}
 
+ThreadPoolBuilder& ThreadPoolBuilder::set_trace_metric_prefix(
+    const std::string& prefix) {
+  trace_metric_prefix_ = prefix;
+  return *this;
+}
+
 ThreadPoolBuilder& ThreadPoolBuilder::set_min_threads(int min_threads) {
   CHECK_GE(min_threads, 0);
   min_threads_ = min_threads;
@@ -108,6 +115,16 @@ ThreadPool::ThreadPool(const ThreadPoolBuilder& builder)
     num_threads_(0),
     active_threads_(0),
     queue_size_(0) {
+
+  string prefix = !builder.trace_metric_prefix_.empty() ?
+      builder.trace_metric_prefix_ : builder.name_;
+
+  queue_time_trace_metric_name_ = TraceMetrics::InternName(
+      prefix + ".queue_time_us");
+  run_wall_time_trace_metric_name_ = TraceMetrics::InternName(
+      prefix + ".run_wall_time_us");
+  run_cpu_time_trace_metric_name_ = TraceMetrics::InternName(
+      prefix + ".run_cpu_time_us");
 }
 
 ThreadPool::~ThreadPool() {
@@ -304,21 +321,35 @@ void ThreadPool::DispatchThread(bool permanent) {
 
     unique_lock.Unlock();
 
-    // Update metrics
-    if (queue_time_us_histogram_) {
-      MonoTime now(MonoTime::Now(MonoTime::FINE));
-      queue_time_us_histogram_->Increment(now.GetDeltaSince(entry.submit_time).ToMicroseconds());
-    }
-
-    ADOPT_TRACE(entry.trace);
     // Release the reference which was held by the queued item.
+    ADOPT_TRACE(entry.trace);
     if (entry.trace) {
       entry.trace->Release();
     }
+
+    // Update metrics
+    MonoTime now(MonoTime::Now(MonoTime::FINE));
+    int64_t queue_time_us = now.GetDeltaSince(entry.submit_time).ToMicroseconds();
+    TRACE_COUNTER_INCREMENT(queue_time_trace_metric_name_, queue_time_us);
+    if (queue_time_us_histogram_) {
+      queue_time_us_histogram_->Increment(queue_time_us);
+    }
+
     // Execute the task
     {
-      ScopedLatencyMetric m(run_time_us_histogram_.get());
+      MicrosecondsInt64 start_wall_us = GetMonoTimeMicros();
+      MicrosecondsInt64 start_cpu_us = GetThreadCpuTimeMicros();
+
       entry.runnable->Run();
+
+      int64_t wall_us = GetMonoTimeMicros() - start_wall_us;
+      int64_t cpu_us = GetThreadCpuTimeMicros() - start_cpu_us;
+
+      if (run_time_us_histogram_) {
+        run_time_us_histogram_->Increment(wall_us);
+      }
+      TRACE_COUNTER_INCREMENT(run_wall_time_trace_metric_name_, wall_us);
+      TRACE_COUNTER_INCREMENT(run_cpu_time_trace_metric_name_, cpu_us);
     }
     unique_lock.Lock();
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/threadpool.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/threadpool.h b/src/kudu/util/threadpool.h
index 7b9c870..cfa12f4 100644
--- a/src/kudu/util/threadpool.h
+++ b/src/kudu/util/threadpool.h
@@ -55,6 +55,27 @@ class Runnable {
 //    choose a short name here.
 //    Required.
 //
+// trace_metric_prefix: used to prefix the names of TraceMetric counters.
+//    When a task on a thread pool has an associated trace, the thread pool
+//    implementation will increment TraceMetric counters to indicate the
+//    amount of time spent waiting in the queue as well as the amount of wall
+//    and CPU time spent executing. By default, these counters are prefixed
+//    with the name of the thread pool. For example, if the pool is named
+//    'apply', then counters such as 'apply.queue_time_us' will be
+//    incremented.
+//
+//    The TraceMetrics implementation relies on the number of distinct counter
+//    names being small. Thus, if the thread pool name itself is dynamically
+//    generated, the default behavior described above would result in an
+//    unbounded number of distinct cunter names. The 'trace_metric_prefix'
+//    setting can be used to override the prefix used in generating the trace
+//    metric names.
+//
+//    For example, the Raft thread pools are named "<tablet id>-raft" which
+//    has unbounded cardinality (a server may have thousands of different
+//    tablet IDs over its lifetime). In that case, setting the prefix to
+//    "raft" will avoid any issues.
+//
 // min_threads: Minimum number of threads we'll have at any time.
 //    Default: 0.
 //
@@ -75,6 +96,7 @@ class ThreadPoolBuilder {
 
   // Note: We violate the style guide by returning mutable references here
   // in order to provide traditional Builder pattern conveniences.
+  ThreadPoolBuilder& set_trace_metric_prefix(const std::string& prefix);
   ThreadPoolBuilder& set_min_threads(int min_threads);
   ThreadPoolBuilder& set_max_threads(int max_threads);
   ThreadPoolBuilder& set_max_queue_size(int max_queue_size);
@@ -92,6 +114,7 @@ class ThreadPoolBuilder {
  private:
   friend class ThreadPool;
   const std::string name_;
+  std::string trace_metric_prefix_;
   int min_threads_;
   int max_threads_;
   int max_queue_size_;
@@ -228,6 +251,10 @@ class ThreadPool {
   scoped_refptr<Histogram> queue_time_us_histogram_;
   scoped_refptr<Histogram> run_time_us_histogram_;
 
+  const char* queue_time_trace_metric_name_;
+  const char* run_wall_time_trace_metric_name_;
+  const char* run_cpu_time_trace_metric_name_;
+
   DISALLOW_COPY_AND_ASSIGN(ThreadPool);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/trace-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/trace-test.cc b/src/kudu/util/trace-test.cc
index 1891dfc..753dda1 100644
--- a/src/kudu/util/trace-test.cc
+++ b/src/kudu/util/trace-test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <gtest/gtest.h>
+#include <glog/stl_logging.h>
 #include <string>
 #include <rapidjson/document.h>
 #include <rapidjson/rapidjson.h>
@@ -59,7 +60,7 @@ TEST_F(TraceTest, TestBasic) {
   TRACE_TO(t, "hello $0, $1", "world", 12345);
   TRACE_TO(t, "goodbye $0, $1", "cruel world", 54321);
 
-  string result = XOutDigits(t->DumpToString(false));
+  string result = XOutDigits(t->DumpToString(Trace::NO_FLAGS));
   ASSERT_EQ("XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] hello world, XXXXX\n"
             "XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] goodbye cruel world, XXXXX\n",
             result);
@@ -82,9 +83,9 @@ TEST_F(TraceTest, TestAttach) {
   EXPECT_TRUE(Trace::CurrentTrace() == nullptr);
   TRACE("this goes nowhere");
 
-  EXPECT_EQ(XOutDigits(traceA->DumpToString(false)),
+  EXPECT_EQ(XOutDigits(traceA->DumpToString(Trace::NO_FLAGS)),
             "XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] hello from traceA\n");
-  EXPECT_EQ(XOutDigits(traceB->DumpToString(false)),
+  EXPECT_EQ(XOutDigits(traceB->DumpToString(Trace::NO_FLAGS)),
             "XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] hello from traceB\n");
 }
 
@@ -98,10 +99,10 @@ TEST_F(TraceTest, TestChildTrace) {
     ADOPT_TRACE(traceB.get());
     TRACE("hello from traceB");
   }
-  EXPECT_EQ(XOutDigits(traceA->DumpToString(false)),
+  EXPECT_EQ(XOutDigits(traceA->DumpToString(Trace::NO_FLAGS)),
             "XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] hello from traceA\n"
             "Related trace:\n"
-            "XXXX XX:XX:XX.XXXXXX trace-test.cc:XX] hello from traceB\n");
+            "XXXX XX:XX:XX.XXXXXX trace-test.cc:XXX] hello from traceB\n");
 }
 
 static void GenerateTraceEvents(int thread_id,
@@ -821,5 +822,16 @@ TEST_F(TraceTest, TestVLogAndEchoToConsole) {
   tl->SetDisabled();
 }
 
+TEST_F(TraceTest, TestTraceMetrics) {
+  scoped_refptr<Trace> trace(new Trace);
+  trace->metrics()->Increment("foo", 10);
+  trace->metrics()->Increment("bar", 10);
+  for (int i = 0; i < 1000; i++) {
+    trace->metrics()->Increment("baz", i);
+  }
+  ASSERT_EQ("{\"foo\":10,\"bar\":10,\"baz\":499500}",
+            trace->MetricsAsJSON());
+}
+
 } // namespace debug
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/trace.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/trace.cc b/src/kudu/util/trace.cc
index bc1085c..cde3830 100644
--- a/src/kudu/util/trace.cc
+++ b/src/kudu/util/trace.cc
@@ -27,6 +27,7 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/util/memory/arena.h"
+#include "kudu/util/jsonwriter.h"
 
 namespace kudu {
 
@@ -115,7 +116,7 @@ void Trace::AddEntry(TraceEntry* entry) {
   entries_tail_ = entry;
 }
 
-void Trace::Dump(std::ostream* out, bool include_time_deltas) const {
+void Trace::Dump(std::ostream* out, int flags) const {
   // Gather a copy of the list of entries under the lock. This is fast
   // enough that we aren't worried about stalling concurrent tracers
   // (whereas doing the logging itself while holding the lock might be
@@ -160,7 +161,7 @@ void Trace::Dump(std::ostream* out, bool include_time_deltas) const {
          << setw(2) << tm_time.tm_min   << ':'
          << setw(2) << tm_time.tm_sec   << '.'
          << setw(6) << usecs << ' ';
-    if (include_time_deltas) {
+    if (flags & INCLUDE_TIME_DELTAS) {
       out->fill(' ');
       *out << "(+" << setw(6) << usecs_since_prev << "us) ";
     }
@@ -173,19 +174,48 @@ void Trace::Dump(std::ostream* out, bool include_time_deltas) const {
 
   for (scoped_refptr<Trace> child_trace : child_traces) {
     *out << "Related trace:" << std::endl;
-    *out << child_trace->DumpToString(include_time_deltas);
+    *out << child_trace->DumpToString(flags & (~INCLUDE_METRICS));
+  }
+
+  if (flags & INCLUDE_METRICS) {
+    *out << "Metrics: " << MetricsAsJSON();
   }
 
   // Restore stream flags.
   out->flags(save_flags);
 }
 
-string Trace::DumpToString(bool include_time_deltas) const {
+string Trace::DumpToString(int flags) const {
+  std::stringstream s;
+  Dump(&s, flags);
+  return s.str();
+}
+
+string Trace::MetricsAsJSON() const {
   std::stringstream s;
-  Dump(&s, include_time_deltas);
+  JsonWriter jw(&s, JsonWriter::COMPACT);
+  MetricsToJSON(&jw);
   return s.str();
 }
 
+void Trace::MetricsToJSON(JsonWriter* jw) const {
+  jw->StartObject();
+  auto m = metrics_.Get();
+  for (const auto& e : m) {
+    jw->String(e.first);
+    jw->Int64(e.second);
+  }
+  if (!child_traces_.empty()) {
+    jw->String("child_traces");
+    jw->StartArray();
+    for (const auto& t : child_traces_) {
+      t->MetricsToJSON(jw);
+    }
+    jw->EndArray();
+  }
+  jw->EndObject();
+}
+
 void Trace::DumpCurrentTrace() {
   Trace* t = CurrentTrace();
   if (t == nullptr) {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/trace.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/trace.h b/src/kudu/util/trace.h
index ff44edc..99e1da7 100644
--- a/src/kudu/util/trace.h
+++ b/src/kudu/util/trace.h
@@ -28,6 +28,7 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/threading/thread_collision_warner.h"
 #include "kudu/util/locks.h"
+#include "kudu/util/trace_metrics.h"
 
 // Adopt a Trace on the current thread for the duration of the current
 // scope. The old current Trace is restored when the scope is exited.
@@ -52,9 +53,34 @@
 #define TRACE_TO(trace, format, substitutions...) \
   (trace)->SubstituteAndTrace(__FILE__, __LINE__, (format), ##substitutions)
 
+// Increment a counter associated with the current trace.
+//
+// Each trace contains a map of counters which can be used to keep
+// request-specific statistics. It is significantly faster to increment
+// a trace counter compared to logging a message. Additionally, having
+// slightly more structured information makes it easier to aggregate
+// and show information back to operators.
+//
+// NOTE: the 'counter_name' MUST be a string which stays alive forever.
+// Typically, this is a compile-time constant. If something other than
+// a constant is required, use TraceMetric::InternName() in order to
+// create a string which will last for the process lifetime. Of course,
+// these strings will never be cleaned up, so it's important to use this
+// judiciously.
+//
+// If no trace is active, this does nothing and does not evaluate its
+// parameters.
+#define TRACE_COUNTER_INCREMENT(counter_name, val) \
+  do { \
+    kudu::Trace* _trace = Trace::CurrentTrace(); \
+    if (_trace) { \
+      _trace->metrics()->Increment(counter_name, val); \
+    } \
+  } while (0);
 
 namespace kudu {
 
+class JsonWriter;
 class ThreadSafeArena;
 struct TraceEntry;
 
@@ -101,12 +127,22 @@ class Trace : public RefCountedThreadSafe<Trace> {
 
   // Dump the trace buffer to the given output stream.
   //
-  // If 'include_time_deltas' is true, calculates and prints the difference between
-  // successive trace messages.
-  void Dump(std::ostream* out, bool include_time_deltas) const;
+  enum {
+    NO_FLAGS = 0,
+
+    // If set, calculate and print the difference between successive trace messages.
+    INCLUDE_TIME_DELTAS = 1 << 0,
+    // If set, include a 'Metrics' line showing any attached trace metrics.
+    INCLUDE_METRICS =     1 << 1,
+
+    INCLUDE_ALL = INCLUDE_TIME_DELTAS | INCLUDE_METRICS
+  };
+  void Dump(std::ostream* out, int flags) const;
 
   // Dump the trace buffer as a string.
-  std::string DumpToString(bool include_time_deltas) const;
+  std::string DumpToString(int flags = INCLUDE_ALL) const;
+
+  std::string MetricsAsJSON() const;
 
   // Attaches the given trace which will get appended at the end when Dumping.
   void AddChildTrace(Trace* child_trace);
@@ -121,6 +157,10 @@ class Trace : public RefCountedThreadSafe<Trace> {
   // 'call kudu::Trace::DumpCurrentTrace();'.
   static void DumpCurrentTrace();
 
+  TraceMetrics* metrics() {
+    return &metrics_;
+  }
+
  private:
   friend class ScopedAdoptTrace;
   friend class RefCountedThreadSafe<Trace>;
@@ -138,6 +178,8 @@ class Trace : public RefCountedThreadSafe<Trace> {
   // Add the entry to the linked list of entries.
   void AddEntry(TraceEntry* entry);
 
+  void MetricsToJSON(JsonWriter* jw) const;
+
   gscoped_ptr<ThreadSafeArena> arena_;
 
   // Lock protecting the entries linked list.
@@ -149,6 +191,8 @@ class Trace : public RefCountedThreadSafe<Trace> {
 
   std::vector<scoped_refptr<Trace> > child_traces_;
 
+  TraceMetrics metrics_;
+
   DISALLOW_COPY_AND_ASSIGN(Trace);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/trace_metrics.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/trace_metrics.cc b/src/kudu/util/trace_metrics.cc
new file mode 100644
index 0000000..81c4df5
--- /dev/null
+++ b/src/kudu/util/trace_metrics.cc
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/trace_metrics.h"
+
+#include <algorithm>
+#include <ctype.h>
+#include <glog/stl_logging.h>
+#include <map>
+#include <string>
+
+#include "kudu/util/debug/leakcheck_disabler.h"
+
+using std::string;
+
+namespace kudu {
+
+namespace {
+
+static simple_spinlock g_intern_map_lock;
+typedef std::map<string, const char*> InternMap;
+static InternMap* g_intern_map;
+
+} // anonymous namespace
+
+const char* TraceMetrics::InternName(const string& name) {
+  DCHECK(std::all_of(name.begin(), name.end(), isprint))
+      << "not printable: " << name;
+
+  debug::ScopedLeakCheckDisabler no_leakcheck;
+  lock_guard<simple_spinlock> l(&g_intern_map_lock);
+  if (g_intern_map == nullptr) {
+    g_intern_map = new InternMap();
+  }
+
+  InternMap::iterator it = g_intern_map->find(name);
+  if (it != g_intern_map->end()) {
+    return it->second;
+  }
+
+  const char* dup = strdup(name.c_str());
+  (*g_intern_map)[name] = dup;
+
+  // We don't expect this map to grow large.
+  DCHECK_LT(g_intern_map->size(), 100) <<
+      "Too many interned strings: " << *g_intern_map;
+
+  return dup;
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/6b6a697e/src/kudu/util/trace_metrics.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/trace_metrics.h b/src/kudu/util/trace_metrics.h
new file mode 100644
index 0000000..322cfd8
--- /dev/null
+++ b/src/kudu/util/trace_metrics.h
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/locks.h"
+
+#include <map>
+#include <string>
+
+namespace kudu {
+
+// A simple map of constant string names to integer counters.
+//
+// Typically, the TRACE_COUNTER_INCREMENT(...) macro defined in
+// trace.h is used to increment a counter within this map.
+//
+// This currently is just a thin wrapper around a spinlocked map,
+// but if it becomes noticeable in the CPU profile, various optimizations
+// are plausible.
+class TraceMetrics {
+ public:
+  TraceMetrics() {}
+  ~TraceMetrics() {}
+
+  // Internalize the given string by duplicating it into a process-wide
+  // pool. If this string has already been interned, returns a pointer
+  // to a previous instance. Otherwise, copies it into the pool.
+  //
+  // The resulting strings are purposefully leaked, so this should only
+  // be used in cases where the number of unique strings that will be
+  // passed is relatively low (i.e. not user-specified).
+  //
+  // Because 'name' is exposed back to operators, it must be a printable
+  // ASCII string.
+  static const char* InternName(const std::string& name);
+
+  // Increment the given counter.
+  void Increment(const char* name, int64_t amount);
+
+  // Return a copy of the current counter map.
+  std::map<const char*, int64_t> Get() const;
+
+ private:
+  mutable simple_spinlock lock_;
+  std::map<const char*, int64_t> counters_;
+
+  DISALLOW_COPY_AND_ASSIGN(TraceMetrics);
+};
+
+inline void TraceMetrics::Increment(const char* name, int64_t amount) {
+  lock_guard<simple_spinlock> l(&lock_);
+  counters_[name] += amount;
+}
+
+inline std::map<const char*, int64_t> TraceMetrics::Get() const {
+  lock_guard<simple_spinlock> l(&lock_);
+  return counters_;
+}
+
+} // namespace kudu