You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/07/13 06:03:31 UTC

[19/51] [abbrv] impala git commit: IMPALA-7006: Add KRPC folders from kudu@334ecafd

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hdr_histogram.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hdr_histogram.cc b/be/src/kudu/util/hdr_histogram.cc
new file mode 100644
index 0000000..4907444
--- /dev/null
+++ b/be/src/kudu/util/hdr_histogram.cc
@@ -0,0 +1,501 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// Portions of these classes were ported from Java to C++ from the sources
+// available at https://github.com/HdrHistogram/HdrHistogram .
+//
+//   The code in this repository code was Written by Gil Tene, Michael Barker,
+//   and Matt Warren, and released to the public domain, as explained at
+//   http://creativecommons.org/publicdomain/zero/1.0/
+#include "kudu/util/hdr_histogram.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <ostream>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/atomicops.h"
+#include "kudu/gutil/bits.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/status.h"
+
+using base::subtle::Atomic64;
+using base::subtle::NoBarrier_AtomicIncrement;
+using base::subtle::NoBarrier_Store;
+using base::subtle::NoBarrier_Load;
+using base::subtle::NoBarrier_CompareAndSwap;
+using strings::Substitute;
+
+namespace kudu {
+
+HdrHistogram::HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits)
+  : highest_trackable_value_(highest_trackable_value),
+    num_significant_digits_(num_significant_digits),
+    counts_array_length_(0),
+    bucket_count_(0),
+    sub_bucket_count_(0),
+    sub_bucket_half_count_magnitude_(0),
+    sub_bucket_half_count_(0),
+    sub_bucket_mask_(0),
+    total_count_(0),
+    total_sum_(0),
+    min_value_(std::numeric_limits<Atomic64>::max()),
+    max_value_(0),
+    counts_(nullptr) {
+  Init();
+}
+
+HdrHistogram::HdrHistogram(const HdrHistogram& other)
+  : highest_trackable_value_(other.highest_trackable_value_),
+    num_significant_digits_(other.num_significant_digits_),
+    counts_array_length_(0),
+    bucket_count_(0),
+    sub_bucket_count_(0),
+    sub_bucket_half_count_magnitude_(0),
+    sub_bucket_half_count_(0),
+    sub_bucket_mask_(0),
+    total_count_(0),
+    total_sum_(0),
+    min_value_(std::numeric_limits<Atomic64>::max()),
+    max_value_(0),
+    counts_(nullptr) {
+  Init();
+
+  // Not a consistent snapshot but we try to roughly keep it close.
+  // Copy the sum and min first.
+  NoBarrier_Store(&total_sum_, NoBarrier_Load(&other.total_sum_));
+  NoBarrier_Store(&min_value_, NoBarrier_Load(&other.min_value_));
+
+  uint64_t total_copied_count = 0;
+  // Copy the counts in order of ascending magnitude.
+  for (int i = 0; i < counts_array_length_; i++) {
+    uint64_t count = NoBarrier_Load(&other.counts_[i]);
+    NoBarrier_Store(&counts_[i], count);
+    total_copied_count += count;
+  }
+  // Copy the max observed value last.
+  NoBarrier_Store(&max_value_, NoBarrier_Load(&other.max_value_));
+  // We must ensure the total is consistent with the copied counts.
+  NoBarrier_Store(&total_count_, total_copied_count);
+}
+
+bool HdrHistogram::IsValidHighestTrackableValue(uint64_t highest_trackable_value) {
+  return highest_trackable_value >= kMinHighestTrackableValue;
+}
+
+bool HdrHistogram::IsValidNumSignificantDigits(int num_significant_digits) {
+  return num_significant_digits >= kMinValidNumSignificantDigits &&
+         num_significant_digits <= kMaxValidNumSignificantDigits;
+}
+
+void HdrHistogram::Init() {
+  // Verify parameter validity
+  CHECK(IsValidHighestTrackableValue(highest_trackable_value_)) <<
+      Substitute("highest_trackable_value must be >= $0", kMinHighestTrackableValue);
+  CHECK(IsValidNumSignificantDigits(num_significant_digits_)) <<
+      Substitute("num_significant_digits must be between $0 and $1",
+          kMinValidNumSignificantDigits, kMaxValidNumSignificantDigits);
+
+  uint32_t largest_value_with_single_unit_resolution =
+      2 * static_cast<uint32_t>(pow(10.0, num_significant_digits_));
+
+  // We need to maintain power-of-two sub_bucket_count_ (for clean direct
+  // indexing) that is large enough to provide unit resolution to at least
+  // largest_value_with_single_unit_resolution. So figure out
+  // largest_value_with_single_unit_resolution's nearest power-of-two
+  // (rounded up), and use that:
+
+  // The sub-buckets take care of the precision.
+  // Each sub-bucket is sized to have enough bits for the requested
+  // 10^precision accuracy.
+  int sub_bucket_count_magnitude =
+      Bits::Log2Ceiling(largest_value_with_single_unit_resolution);
+  sub_bucket_half_count_magnitude_ =
+      (sub_bucket_count_magnitude >= 1) ? sub_bucket_count_magnitude - 1 : 0;
+
+  // sub_bucket_count_ is approx. 10^num_sig_digits (as a power of 2)
+  sub_bucket_count_ = pow(2.0, sub_bucket_half_count_magnitude_ + 1);
+  sub_bucket_mask_ = sub_bucket_count_ - 1;
+  sub_bucket_half_count_ = sub_bucket_count_ / 2;
+
+  // The buckets take care of the magnitude.
+  // Determine exponent range needed to support the trackable value with no
+  // overflow:
+  uint64_t trackable_value = sub_bucket_count_ - 1;
+  int buckets_needed = 1;
+  while (trackable_value < highest_trackable_value_) {
+    trackable_value <<= 1;
+    buckets_needed++;
+  }
+  bucket_count_ = buckets_needed;
+
+  counts_array_length_ = (bucket_count_ + 1) * sub_bucket_half_count_;
+  counts_.reset(new Atomic64[counts_array_length_]());  // value-initialized
+}
+
+void HdrHistogram::Increment(int64_t value) {
+  IncrementBy(value, 1);
+}
+
+void HdrHistogram::IncrementBy(int64_t value, int64_t count) {
+  DCHECK_GE(value, 0);
+  DCHECK_GE(count, 0);
+
+  // Dissect the value into bucket and sub-bucket parts, and derive index into
+  // counts array:
+  int bucket_index = BucketIndex(value);
+  int sub_bucket_index = SubBucketIndex(value, bucket_index);
+  int counts_index = CountsArrayIndex(bucket_index, sub_bucket_index);
+
+  // Increment bucket, total, and sum.
+  NoBarrier_AtomicIncrement(&counts_[counts_index], count);
+  NoBarrier_AtomicIncrement(&total_count_, count);
+  NoBarrier_AtomicIncrement(&total_sum_, value * count);
+
+  // Update min, if needed.
+  {
+    Atomic64 min_val;
+    while (PREDICT_FALSE(value < (min_val = MinValue()))) {
+      Atomic64 old_val = NoBarrier_CompareAndSwap(&min_value_, min_val, value);
+      if (PREDICT_TRUE(old_val == min_val)) break; // CAS success.
+    }
+  }
+
+  // Update max, if needed.
+  {
+    Atomic64 max_val;
+    while (PREDICT_FALSE(value > (max_val = MaxValue()))) {
+      Atomic64 old_val = NoBarrier_CompareAndSwap(&max_value_, max_val, value);
+      if (PREDICT_TRUE(old_val == max_val)) break; // CAS success.
+    }
+  }
+}
+
+void HdrHistogram::IncrementWithExpectedInterval(int64_t value,
+                                                 int64_t expected_interval_between_samples) {
+  Increment(value);
+  if (expected_interval_between_samples <= 0) {
+    return;
+  }
+  for (int64_t missing_value = value - expected_interval_between_samples;
+      missing_value >= expected_interval_between_samples;
+      missing_value -= expected_interval_between_samples) {
+    Increment(missing_value);
+  }
+}
+
+////////////////////////////////////
+
+int HdrHistogram::BucketIndex(uint64_t value) const {
+  if (PREDICT_FALSE(value > highest_trackable_value_)) {
+    value = highest_trackable_value_;
+  }
+  // Here we are calculating the power-of-2 magnitude of the value with a
+  // correction for precision in the first bucket.
+  // Smallest power of 2 containing value.
+  int pow2ceiling = Bits::Log2Ceiling64(value | sub_bucket_mask_);
+  return pow2ceiling - (sub_bucket_half_count_magnitude_ + 1);
+}
+
+int HdrHistogram::SubBucketIndex(uint64_t value, int bucket_index) const {
+  if (PREDICT_FALSE(value > highest_trackable_value_)) {
+    value = highest_trackable_value_;
+  }
+  // We hack off the magnitude and are left with only the relevant precision
+  // portion, which gives us a direct index into the sub-bucket. TODO: Right??
+  return static_cast<int>(value >> bucket_index);
+}
+
+int HdrHistogram::CountsArrayIndex(int bucket_index, int sub_bucket_index) const {
+  DCHECK(sub_bucket_index < sub_bucket_count_);
+  DCHECK(bucket_index < bucket_count_);
+  DCHECK(bucket_index == 0 || (sub_bucket_index >= sub_bucket_half_count_));
+  // Calculate the index for the first entry in the bucket:
+  // (The following is the equivalent of ((bucket_index + 1) * sub_bucket_half_count_) ):
+  int bucket_base_index = (bucket_index + 1) << sub_bucket_half_count_magnitude_;
+  // Calculate the offset in the bucket:
+  int offset_in_bucket = sub_bucket_index - sub_bucket_half_count_;
+  return bucket_base_index + offset_in_bucket;
+}
+
+uint64_t HdrHistogram::CountAt(int bucket_index, int sub_bucket_index) const {
+  return counts_[CountsArrayIndex(bucket_index, sub_bucket_index)];
+}
+
+uint64_t HdrHistogram::CountInBucketForValue(uint64_t value) const {
+  int bucket_index = BucketIndex(value);
+  int sub_bucket_index = SubBucketIndex(value, bucket_index);
+  return CountAt(bucket_index, sub_bucket_index);
+}
+
+uint64_t HdrHistogram::ValueFromIndex(int bucket_index, int sub_bucket_index) {
+  return static_cast<uint64_t>(sub_bucket_index) << bucket_index;
+}
+
+////////////////////////////////////
+
+uint64_t HdrHistogram::SizeOfEquivalentValueRange(uint64_t value) const {
+  int bucket_index = BucketIndex(value);
+  int sub_bucket_index = SubBucketIndex(value, bucket_index);
+  uint64_t distance_to_next_value =
+    (1 << ((sub_bucket_index >= sub_bucket_count_) ? (bucket_index + 1) : bucket_index));
+  return distance_to_next_value;
+}
+
+uint64_t HdrHistogram::LowestEquivalentValue(uint64_t value) const {
+  int bucket_index = BucketIndex(value);
+  int sub_bucket_index = SubBucketIndex(value, bucket_index);
+  uint64_t this_value_base_level = ValueFromIndex(bucket_index, sub_bucket_index);
+  return this_value_base_level;
+}
+
+uint64_t HdrHistogram::HighestEquivalentValue(uint64_t value) const {
+  return NextNonEquivalentValue(value) - 1;
+}
+
+uint64_t HdrHistogram::MedianEquivalentValue(uint64_t value) const {
+  return (LowestEquivalentValue(value) + (SizeOfEquivalentValueRange(value) >> 1));
+}
+
+uint64_t HdrHistogram::NextNonEquivalentValue(uint64_t value) const {
+  return LowestEquivalentValue(value) + SizeOfEquivalentValueRange(value);
+}
+
+bool HdrHistogram::ValuesAreEquivalent(uint64_t value1, uint64_t value2) const {
+  return (LowestEquivalentValue(value1) == LowestEquivalentValue(value2));
+}
+
+uint64_t HdrHistogram::MinValue() const {
+  if (PREDICT_FALSE(TotalCount() == 0)) return 0;
+  return NoBarrier_Load(&min_value_);
+}
+
+uint64_t HdrHistogram::MaxValue() const {
+  if (PREDICT_FALSE(TotalCount() == 0)) return 0;
+  return NoBarrier_Load(&max_value_);
+}
+
+double HdrHistogram::MeanValue() const {
+  uint64_t count = TotalCount();
+  if (PREDICT_FALSE(count == 0)) return 0.0;
+  return static_cast<double>(TotalSum()) / count;
+}
+
+uint64_t HdrHistogram::ValueAtPercentile(double percentile) const {
+  uint64_t count = TotalCount();
+  if (PREDICT_FALSE(count == 0)) return 0;
+
+  double requested_percentile = std::min(percentile, 100.0); // Truncate down to 100%
+  uint64_t count_at_percentile = static_cast<uint64_t>(
+      ((requested_percentile / 100.0) * count) + 0.5); // NOLINT(misc-incorrect-roundings)
+  // Make sure we at least reach the first recorded entry
+  count_at_percentile = std::max(count_at_percentile, static_cast<uint64_t>(1));
+
+  uint64_t total_to_current_iJ = 0;
+  for (int i = 0; i < bucket_count_; i++) {
+    int j = (i == 0) ? 0 : (sub_bucket_count_ / 2);
+    for (; j < sub_bucket_count_; j++) {
+      total_to_current_iJ += CountAt(i, j);
+      if (total_to_current_iJ >= count_at_percentile) {
+        uint64_t valueAtIndex = ValueFromIndex(i, j);
+        return valueAtIndex;
+      }
+    }
+  }
+
+  LOG(DFATAL) << "Fell through while iterating, likely concurrent modification of histogram";
+  return 0;
+}
+
+///////////////////////////////////////////////////////////////////////
+// AbstractHistogramIterator
+///////////////////////////////////////////////////////////////////////
+
+AbstractHistogramIterator::AbstractHistogramIterator(const HdrHistogram* histogram)
+  : histogram_(CHECK_NOTNULL(histogram)),
+    cur_iter_val_(),
+    histogram_total_count_(histogram_->TotalCount()),
+    current_bucket_index_(0),
+    current_sub_bucket_index_(0),
+    current_value_at_index_(0),
+    next_bucket_index_(0),
+    next_sub_bucket_index_(1),
+    next_value_at_index_(1),
+    prev_value_iterated_to_(0),
+    total_count_to_prev_index_(0),
+    total_count_to_current_index_(0),
+    total_value_to_current_index_(0),
+    count_at_this_value_(0),
+    fresh_sub_bucket_(true) {
+}
+
+bool AbstractHistogramIterator::HasNext() const {
+  return total_count_to_current_index_ < histogram_total_count_;
+}
+
+Status AbstractHistogramIterator::Next(HistogramIterationValue* value) {
+  if (histogram_->TotalCount() != histogram_total_count_) {
+    return Status::IllegalState("Concurrently modified histogram while traversing it");
+  }
+
+  // Move through the sub buckets and buckets until we hit the next reporting level:
+  while (!ExhaustedSubBuckets()) {
+    count_at_this_value_ =
+        histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_);
+    if (fresh_sub_bucket_) { // Don't add unless we've incremented since last bucket...
+      total_count_to_current_index_ += count_at_this_value_;
+      total_value_to_current_index_ +=
+        count_at_this_value_ * histogram_->MedianEquivalentValue(current_value_at_index_);
+      fresh_sub_bucket_ = false;
+    }
+    if (ReachedIterationLevel()) {
+      uint64_t value_iterated_to = ValueIteratedTo();
+
+      // Update iterator value.
+      cur_iter_val_.value_iterated_to = value_iterated_to;
+      cur_iter_val_.value_iterated_from = prev_value_iterated_to_;
+      cur_iter_val_.count_at_value_iterated_to = count_at_this_value_;
+      cur_iter_val_.count_added_in_this_iteration_step =
+          (total_count_to_current_index_ - total_count_to_prev_index_);
+      cur_iter_val_.total_count_to_this_value = total_count_to_current_index_;
+      cur_iter_val_.total_value_to_this_value = total_value_to_current_index_;
+      cur_iter_val_.percentile =
+          ((100.0 * total_count_to_current_index_) / histogram_total_count_);
+      cur_iter_val_.percentile_level_iterated_to = PercentileIteratedTo();
+
+      prev_value_iterated_to_ = value_iterated_to;
+      total_count_to_prev_index_ = total_count_to_current_index_;
+      // Move the next percentile reporting level forward.
+      IncrementIterationLevel();
+
+      *value = cur_iter_val_;
+      return Status::OK();
+    }
+    IncrementSubBucket();
+  }
+  return Status::IllegalState("Histogram array index out of bounds while traversing");
+}
+
+double AbstractHistogramIterator::PercentileIteratedTo() const {
+  return (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_;
+}
+
+double AbstractHistogramIterator::PercentileIteratedFrom() const {
+  return (100.0 * static_cast<double>(total_count_to_prev_index_)) / histogram_total_count_;
+}
+
+uint64_t AbstractHistogramIterator::ValueIteratedTo() const {
+  return histogram_->HighestEquivalentValue(current_value_at_index_);
+}
+
+bool AbstractHistogramIterator::ExhaustedSubBuckets() const {
+  return (current_bucket_index_ >= histogram_->bucket_count_);
+}
+
+void AbstractHistogramIterator::IncrementSubBucket() {
+  fresh_sub_bucket_ = true;
+  // Take on the next index:
+  current_bucket_index_ = next_bucket_index_;
+  current_sub_bucket_index_ = next_sub_bucket_index_;
+  current_value_at_index_ = next_value_at_index_;
+  // Figure out the next next index:
+  next_sub_bucket_index_++;
+  if (next_sub_bucket_index_ >= histogram_->sub_bucket_count_) {
+    next_sub_bucket_index_ = histogram_->sub_bucket_half_count_;
+    next_bucket_index_++;
+  }
+  next_value_at_index_ = HdrHistogram::ValueFromIndex(next_bucket_index_, next_sub_bucket_index_);
+}
+
+///////////////////////////////////////////////////////////////////////
+// RecordedValuesIterator
+///////////////////////////////////////////////////////////////////////
+
+RecordedValuesIterator::RecordedValuesIterator(const HdrHistogram* histogram)
+  : AbstractHistogramIterator(histogram),
+    visited_sub_bucket_index_(-1),
+    visited_bucket_index_(-1) {
+}
+
+void RecordedValuesIterator::IncrementIterationLevel() {
+  visited_sub_bucket_index_ = current_sub_bucket_index_;
+  visited_bucket_index_ = current_bucket_index_;
+}
+
+bool RecordedValuesIterator::ReachedIterationLevel() const {
+  uint64_t current_ij_count =
+      histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_);
+  return current_ij_count != 0 &&
+      ((visited_sub_bucket_index_ != current_sub_bucket_index_) ||
+       (visited_bucket_index_ != current_bucket_index_));
+}
+
+///////////////////////////////////////////////////////////////////////
+// PercentileIterator
+///////////////////////////////////////////////////////////////////////
+
+PercentileIterator::PercentileIterator(const HdrHistogram* histogram,
+                                       int percentile_ticks_per_half_distance)
+  : AbstractHistogramIterator(histogram),
+    percentile_ticks_per_half_distance_(percentile_ticks_per_half_distance),
+    percentile_level_to_iterate_to_(0.0),
+    percentile_level_to_iterate_from_(0.0),
+    reached_last_recorded_value_(false) {
+}
+
+bool PercentileIterator::HasNext() const {
+  if (AbstractHistogramIterator::HasNext()) {
+    return true;
+  }
+  // We want one additional last step to 100%
+  if (!reached_last_recorded_value_ && (histogram_total_count_ > 0)) {
+    const_cast<PercentileIterator*>(this)->percentile_level_to_iterate_to_ = 100.0;
+    const_cast<PercentileIterator*>(this)->reached_last_recorded_value_ = true;
+    return true;
+  }
+  return false;
+}
+
+double PercentileIterator::PercentileIteratedTo() const {
+  return percentile_level_to_iterate_to_;
+}
+
+
+double PercentileIterator::PercentileIteratedFrom() const {
+  return percentile_level_to_iterate_from_;
+}
+
+void PercentileIterator::IncrementIterationLevel() {
+  percentile_level_to_iterate_from_ = percentile_level_to_iterate_to_;
+  // TODO: Can this expression be simplified?
+  uint64_t percentile_reporting_ticks = percentile_ticks_per_half_distance_ *
+    static_cast<uint64_t>(pow(2.0,
+          static_cast<int>(log(100.0 / (100.0 - (percentile_level_to_iterate_to_))) / log(2)) + 1));
+  percentile_level_to_iterate_to_ += 100.0 / percentile_reporting_ticks;
+}
+
+bool PercentileIterator::ReachedIterationLevel() const {
+  if (count_at_this_value_ == 0) return false;
+  double current_percentile =
+      (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_;
+  return (current_percentile >= percentile_level_to_iterate_to_);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hdr_histogram.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hdr_histogram.h b/be/src/kudu/util/hdr_histogram.h
new file mode 100644
index 0000000..14b5e95
--- /dev/null
+++ b/be/src/kudu/util/hdr_histogram.h
@@ -0,0 +1,351 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_HDRHISTOGRAM_H_
+#define KUDU_UTIL_HDRHISTOGRAM_H_
+
+// C++ (TR1) port of HdrHistogram.
+//
+// Portions of these classes were ported from Java to C++ from the sources
+// available at https://github.com/HdrHistogram/HdrHistogram .
+//
+//   The code in this repository code was Written by Gil Tene, Michael Barker,
+//   and Matt Warren, and released to the public domain, as explained at
+//   http://creativecommons.org/publicdomain/zero/1.0/
+// ---------------------------------------------------------------------------
+//
+// A High Dynamic Range (HDR) Histogram
+//
+// HdrHistogram supports the recording and analyzing sampled data value counts
+// across a configurable integer value range with configurable value precision
+// within the range. Value precision is expressed as the number of significant
+// digits in the value recording, and provides control over value quantization
+// behavior across the value range and the subsequent value resolution at any
+// given level.
+//
+// For example, a Histogram could be configured to track the counts of observed
+// integer values between 0 and 3,600,000,000 while maintaining a value
+// precision of 3 significant digits across that range. Value quantization
+// within the range will thus be no larger than 1/1,000th (or 0.1%) of any
+// value. This example Histogram could be used to track and analyze the counts
+// of observed response times ranging between 1 microsecond and 1 hour in
+// magnitude, while maintaining a value resolution of 1 microsecond up to 1
+// millisecond, a resolution of 1 millisecond (or better) up to one second, and
+// a resolution of 1 second (or better) up to 1,000 seconds. At it's maximum
+// tracked value (1 hour), it would still maintain a resolution of 3.6 seconds
+// (or better).
+
+#include <stdint.h>
+
+#include "kudu/gutil/atomicops.h"
+#include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/port.h"
+
+namespace kudu {
+
+class Status;
+
+// This implementation allows you to specify a range and accuracy (significant
+// digits) to support in an instance of a histogram. The class takes care of
+// the rest. At this time, only uint64_t values are supported.
+//
+// An HdrHistogram consists of a set of buckets, which bucket the magnitude of
+// a value stored, and a set of sub-buckets, which implement the tunable
+// precision of the storage. So if you specify 3 significant digits of
+// precision, then you will get about 10^3 sub-buckets (as a power of 2) for
+// each level of magnitude. Magnitude buckets are tracked in powers of 2.
+//
+// This class is thread-safe.
+class HdrHistogram {
+ public:
+  // Specify the highest trackable value so that the class has a bound on the
+  // number of buckets, and # of significant digits (in decimal) so that the
+  // class can determine the granularity of those buckets.
+  HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits);
+
+  // Copy-construct a (non-consistent) snapshot of other.
+  explicit HdrHistogram(const HdrHistogram& other);
+
+  // Validate your params before trying to construct the object.
+  static bool IsValidHighestTrackableValue(uint64_t highest_trackable_value);
+  static bool IsValidNumSignificantDigits(int num_significant_digits);
+
+  // Record new data.
+  void Increment(int64_t value);
+  void IncrementBy(int64_t value, int64_t count);
+
+  // Record new data, correcting for "coordinated omission".
+  //
+  // See https://groups.google.com/d/msg/mechanical-sympathy/icNZJejUHfE/BfDekfBEs_sJ
+  // for more details.
+  void IncrementWithExpectedInterval(int64_t value,
+                                     int64_t expected_interval_between_samples);
+
+  // Fetch configuration params.
+  uint64_t highest_trackable_value() const { return highest_trackable_value_; }
+  int num_significant_digits() const { return num_significant_digits_; }
+
+  // Get indexes into histogram based on value.
+  int BucketIndex(uint64_t value) const;
+  int SubBucketIndex(uint64_t value, int bucket_index) const;
+
+  // Count of all events recorded.
+  uint64_t TotalCount() const { return base::subtle::NoBarrier_Load(&total_count_); }
+
+  // Sum of all events recorded.
+  uint64_t TotalSum() const { return base::subtle::NoBarrier_Load(&total_sum_); }
+
+  // Return number of items at index.
+  uint64_t CountAt(int bucket_index, int sub_bucket_index) const;
+
+  // Return count of values in bucket with values equivalent to value.
+  uint64_t CountInBucketForValue(uint64_t) const;
+
+  // Return representative value based on index.
+  static uint64_t ValueFromIndex(int bucket_index, int sub_bucket_index);
+
+  // Get the size (in value units) of the range of values that are equivalent
+  // to the given value within the histogram's resolution. Where "equivalent"
+  // means that value samples recorded for any two equivalent values are
+  // counted in a common total count.
+  uint64_t SizeOfEquivalentValueRange(uint64_t value) const;
+
+  // Get the lowest value that is equivalent to the given value within the
+  // histogram's resolution. Where "equivalent" means that value samples
+  // recorded for any two equivalent values are counted in a common total
+  // count.
+  uint64_t LowestEquivalentValue(uint64_t value) const;
+
+  // Get the highest value that is equivalent to the given value within the
+  // histogram's resolution.
+  uint64_t HighestEquivalentValue(uint64_t value) const;
+
+  // Get a value that lies in the middle (rounded up) of the range of values
+  // equivalent the given value.
+  uint64_t MedianEquivalentValue(uint64_t value) const;
+
+  // Get the next value that is not equivalent to the given value within the
+  // histogram's resolution.
+  uint64_t NextNonEquivalentValue(uint64_t value) const;
+
+  // Determine if two values are equivalent with the histogram's resolution.
+  bool ValuesAreEquivalent(uint64_t value1, uint64_t value2) const;
+
+  // Get the exact minimum value (may lie outside the histogram).
+  uint64_t MinValue() const;
+
+  // Get the exact maximum value (may lie outside the histogram).
+  uint64_t MaxValue() const;
+
+  // Get the exact mean value of all recorded values in the histogram.
+  double MeanValue() const;
+
+  // Get the value at a given percentile.
+  // This is a percentile in percents, i.e. 99.99 percentile.
+  uint64_t ValueAtPercentile(double percentile) const;
+
+  // Get the percentile at a given value
+  // TODO: implement
+  // double PercentileAtOrBelowValue(uint64_t value) const;
+
+  // Get the count of recorded values within a range of value levels.
+  // (inclusive to within the histogram's resolution)
+  // TODO: implement
+  //uint64_t CountBetweenValues(uint64_t low_value, uint64_t high_value) const;
+
+ private:
+  friend class AbstractHistogramIterator;
+
+  static const uint64_t kMinHighestTrackableValue = 2;
+  static const int kMinValidNumSignificantDigits = 1;
+  static const int kMaxValidNumSignificantDigits = 5;
+
+  void Init();
+  int CountsArrayIndex(int bucket_index, int sub_bucket_index) const;
+
+  uint64_t highest_trackable_value_;
+  int num_significant_digits_;
+  int counts_array_length_;
+  int bucket_count_;
+  int sub_bucket_count_;
+
+  // "Hot" fields in the write path.
+  uint8_t sub_bucket_half_count_magnitude_;
+  int sub_bucket_half_count_;
+  uint32_t sub_bucket_mask_;
+
+  // Also hot.
+  base::subtle::Atomic64 total_count_;
+  base::subtle::Atomic64 total_sum_;
+  base::subtle::Atomic64 min_value_;
+  base::subtle::Atomic64 max_value_;
+  gscoped_array<base::subtle::Atomic64> counts_;
+
+  HdrHistogram& operator=(const HdrHistogram& other); // Disable assignment operator.
+};
+
+// Value returned from iterators.
+struct HistogramIterationValue {
+  HistogramIterationValue()
+    : value_iterated_to(0),
+      value_iterated_from(0),
+      count_at_value_iterated_to(0),
+      count_added_in_this_iteration_step(0),
+      total_count_to_this_value(0),
+      total_value_to_this_value(0),
+      percentile(0.0),
+      percentile_level_iterated_to(0.0) {
+  }
+
+  void Reset() {
+    value_iterated_to = 0;
+    value_iterated_from = 0;
+    count_at_value_iterated_to = 0;
+    count_added_in_this_iteration_step = 0;
+    total_count_to_this_value = 0;
+    total_value_to_this_value = 0;
+    percentile = 0.0;
+    percentile_level_iterated_to = 0.0;
+  }
+
+  uint64_t value_iterated_to;
+  uint64_t value_iterated_from;
+  uint64_t count_at_value_iterated_to;
+  uint64_t count_added_in_this_iteration_step;
+  uint64_t total_count_to_this_value;
+  uint64_t total_value_to_this_value;
+  double percentile;
+  double percentile_level_iterated_to;
+};
+
+// Base class for iterating through histogram values.
+//
+// The underlying histogram must not be modified or destroyed while this class
+// is iterating over it.
+//
+// This class is not thread-safe.
+class AbstractHistogramIterator {
+ public:
+  // Create iterator with new histogram.
+  // The histogram must not be mutated while the iterator is in use.
+  explicit AbstractHistogramIterator(const HdrHistogram* histogram);
+  virtual ~AbstractHistogramIterator() {
+  }
+
+  // Returns true if the iteration has more elements.
+  virtual bool HasNext() const;
+
+  // Returns the next element in the iteration.
+  Status Next(HistogramIterationValue* value);
+
+  virtual double PercentileIteratedTo() const;
+  virtual double PercentileIteratedFrom() const;
+  uint64_t ValueIteratedTo() const;
+
+ protected:
+  // Implementations must override these methods.
+  virtual void IncrementIterationLevel() = 0;
+  virtual bool ReachedIterationLevel() const = 0;
+
+  const HdrHistogram* histogram_;
+  HistogramIterationValue cur_iter_val_;
+
+  uint64_t histogram_total_count_;
+
+  int current_bucket_index_;
+  int current_sub_bucket_index_;
+  uint64_t current_value_at_index_;
+
+  int next_bucket_index_;
+  int next_sub_bucket_index_;
+  uint64_t next_value_at_index_;
+
+  uint64_t prev_value_iterated_to_;
+  uint64_t total_count_to_prev_index_;
+
+  uint64_t total_count_to_current_index_;
+  uint64_t total_value_to_current_index_;
+
+  uint64_t count_at_this_value_;
+
+ private:
+  bool ExhaustedSubBuckets() const;
+  void IncrementSubBucket();
+
+  bool fresh_sub_bucket_;
+
+  DISALLOW_COPY_AND_ASSIGN(AbstractHistogramIterator);
+};
+
+// Used for iterating through all recorded histogram values using the finest
+// granularity steps supported by the underlying representation. The iteration
+// steps through all non-zero recorded value counts, and terminates when all
+// recorded histogram values are exhausted.
+//
+// The underlying histogram must not be modified or destroyed while this class
+// is iterating over it.
+//
+// This class is not thread-safe.
+class RecordedValuesIterator : public AbstractHistogramIterator {
+ public:
+  explicit RecordedValuesIterator(const HdrHistogram* histogram);
+
+ protected:
+  virtual void IncrementIterationLevel() OVERRIDE;
+  virtual bool ReachedIterationLevel() const OVERRIDE;
+
+ private:
+  int visited_sub_bucket_index_;
+  int visited_bucket_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(RecordedValuesIterator);
+};
+
+// Used for iterating through histogram values according to percentile levels.
+// The iteration is performed in steps that start at 0% and reduce their
+// distance to 100% according to the percentileTicksPerHalfDistance parameter,
+// ultimately reaching 100% when all recorded histogram values are exhausted.
+//
+// The underlying histogram must not be modified or destroyed while this class
+// is iterating over it.
+//
+// This class is not thread-safe.
+class PercentileIterator : public AbstractHistogramIterator {
+ public:
+  // TODO: Explain percentile_ticks_per_half_distance.
+  PercentileIterator(const HdrHistogram* histogram,
+                     int percentile_ticks_per_half_distance);
+  virtual bool HasNext() const OVERRIDE;
+  virtual double PercentileIteratedTo() const OVERRIDE;
+  virtual double PercentileIteratedFrom() const OVERRIDE;
+
+ protected:
+  virtual void IncrementIterationLevel() OVERRIDE;
+  virtual bool ReachedIterationLevel() const OVERRIDE;
+
+ private:
+  int percentile_ticks_per_half_distance_;
+  double percentile_level_to_iterate_to_;
+  double percentile_level_to_iterate_from_;
+  bool reached_last_recorded_value_;
+
+  DISALLOW_COPY_AND_ASSIGN(PercentileIterator);
+};
+
+} // namespace kudu
+
+#endif // KUDU_UTIL_HDRHISTOGRAM_H_

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hexdump.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hexdump.cc b/be/src/kudu/util/hexdump.cc
new file mode 100644
index 0000000..ddecd9c
--- /dev/null
+++ b/be/src/kudu/util/hexdump.cc
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/hexdump.h"
+
+#include <algorithm>
+#include <cctype>
+#include <cstdint>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/stringprintf.h"
+#include "kudu/util/logging.h"
+#include "kudu/util/slice.h"
+
+namespace kudu {
+
+std::string HexDump(const Slice &slice) {
+  if (KUDU_SHOULD_REDACT()) {
+    return kRedactionMessage;
+  }
+
+  std::string output;
+  output.reserve(slice.size() * 5);
+
+  const uint8_t *p = slice.data();
+
+  int rem = slice.size();
+  while (rem > 0) {
+    const uint8_t *line_p = p;
+    int line_len = std::min(rem, 16);
+    int line_rem = line_len;
+    StringAppendF(&output, "%06lx: ", line_p - slice.data());
+
+    while (line_rem >= 2) {
+      StringAppendF(&output, "%02x%02x ",
+                    p[0] & 0xff, p[1] & 0xff);
+      p += 2;
+      line_rem -= 2;
+    }
+
+    if (line_rem == 1) {
+      StringAppendF(&output, "%02x   ",
+                    p[0] & 0xff);
+      p += 1;
+      line_rem -= 1;
+    }
+    DCHECK_EQ(line_rem, 0);
+
+    int padding = (16 - line_len) / 2;
+
+    for (int i = 0; i < padding; i++) {
+      output.append("     ");
+    }
+
+    for (int i = 0; i < line_len; i++) {
+      char c = line_p[i];
+      if (isprint(c)) {
+        output.push_back(c);
+      } else {
+        output.push_back('.');
+      }
+    }
+
+    output.push_back('\n');
+    rem -= line_len;
+  }
+  return output;
+}
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hexdump.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hexdump.h b/be/src/kudu/util/hexdump.h
new file mode 100644
index 0000000..eacfad2
--- /dev/null
+++ b/be/src/kudu/util/hexdump.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_HEXDUMP_H
+#define KUDU_UTIL_HEXDUMP_H
+
+#include <string>
+
+namespace kudu {
+
+class Slice;
+
+// Generate an 'xxd'-style hexdump of the given slice.  This should only be used
+// for debugging, as the format is subject to change and it has not been
+// implemented for speed.
+//
+// The returned string will be redacted if redaction is enabled.
+std::string HexDump(const Slice &slice);
+
+} // namespace kudu
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/high_water_mark.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/high_water_mark.h b/be/src/kudu/util/high_water_mark.h
new file mode 100644
index 0000000..dfc30e4
--- /dev/null
+++ b/be/src/kudu/util/high_water_mark.h
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_HIGH_WATER_MARK_H
+#define KUDU_UTIL_HIGH_WATER_MARK_H
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/atomic.h"
+
+namespace kudu {
+
+// Lock-free integer that keeps track of the highest value seen.
+// Similar to Impala's RuntimeProfile::HighWaterMarkCounter.
+// HighWaterMark::max_value() returns the highest value seen;
+// HighWaterMark::current_value() returns the current value.
+class HighWaterMark {
+ public:
+  explicit HighWaterMark(int64_t initial_value)
+    : current_value_(initial_value),
+      max_value_(initial_value) {
+  }
+
+  // Return the current value.
+  int64_t current_value() const {
+    return current_value_.Load(kMemOrderNoBarrier);
+  }
+
+  // Return the max value.
+  int64_t max_value() const {
+    return max_value_.Load(kMemOrderNoBarrier);
+  }
+
+  // If current value + 'delta' is <= 'max', increment current value
+  // by 'delta' and return true; return false otherwise.
+  bool TryIncrementBy(int64_t delta, int64_t max) {
+    while (true) {
+      int64_t old_val = current_value();
+      int64_t new_val = old_val + delta;
+      if (new_val > max) {
+        return false;
+      }
+      if (PREDICT_TRUE(current_value_.CompareAndSet(old_val,
+                                                    new_val,
+                                                    kMemOrderNoBarrier))) {
+        UpdateMax(new_val);
+        return true;
+      }
+    }
+  }
+
+  void IncrementBy(int64_t amount) {
+    UpdateMax(current_value_.IncrementBy(amount, kMemOrderNoBarrier));
+  }
+
+  void set_value(int64_t v) {
+    current_value_.Store(v, kMemOrderNoBarrier);
+    UpdateMax(v);
+  }
+
+ private:
+  void UpdateMax(int64_t value) {
+    max_value_.StoreMax(value, kMemOrderNoBarrier);
+  }
+
+  AtomicInt<int64_t> current_value_;
+  AtomicInt<int64_t> max_value_;
+};
+
+} // namespace kudu
+#endif /* KUDU_UTIL_HIGH_WATER_MARK_H */
+
+

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/histogram.proto
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/histogram.proto b/be/src/kudu/util/histogram.proto
new file mode 100644
index 0000000..e4526e7
--- /dev/null
+++ b/be/src/kudu/util/histogram.proto
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+syntax = "proto2";
+package kudu;
+
+option java_package = "org.apache.kudu";
+
+// Captures the state of an Histogram.
+message HistogramSnapshotPB {
+  required string type = 1;
+  required string name = 2;
+  optional string description = 3;
+  required string unit = 4;
+  optional string label = 19;
+
+  required uint64 max_trackable_value = 5;
+  required int32 num_significant_digits = 6;
+  required uint64 total_count = 7;
+  optional uint64 total_sum = 18;
+  required uint64 min = 8;
+  required double mean = 9;
+  required uint64 percentile_75 = 10;
+  required uint64 percentile_95 = 11;
+  required uint64 percentile_99 = 12;
+  required uint64 percentile_99_9 = 13;
+  required uint64 percentile_99_99 = 14;
+  required uint64 max = 15;
+  repeated uint64 values = 16 [packed = true];
+  repeated uint64 counts = 17 [packed = true];
+}
+
+message HistogramSnapshotsListPB {
+  repeated HistogramSnapshotPB histograms = 1;
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/init.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/init.cc b/be/src/kudu/util/init.cc
new file mode 100644
index 0000000..bd97d79
--- /dev/null
+++ b/be/src/kudu/util/init.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/init.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <cstdlib>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/cpu.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/status.h"
+
+using std::string;
+
+namespace kudu {
+
+Status BadCPUStatus(const base::CPU& cpu, const char* instruction_set) {
+  return Status::NotSupported(strings::Substitute(
+      "The CPU on this system ($0) does not support the $1 instruction "
+      "set which is required for running Kudu. If you are running inside a VM, "
+      "you may need to enable SSE4.2 pass-through.",
+      cpu.cpu_brand(), instruction_set));
+}
+
+bool IsFdOpen(int fd) {
+  return fcntl(fd, F_GETFL) != -1;
+}
+
+// Checks that the standard file descriptors are open when the process
+// starts.
+//
+// If these descriptors aren't open, we can run into serious issues:
+// we later might open some other files which end up reusing the same
+// file descriptor numbers as stderr, and then some library like glog
+// may decide to write a log message to what it thinks is stderr. That
+// would then overwrite one of our important data files and cause
+// corruption!
+void CheckStandardFds() {
+  if (!IsFdOpen(STDIN_FILENO) ||
+      !IsFdOpen(STDOUT_FILENO) ||
+      !IsFdOpen(STDERR_FILENO)) {
+    // We can't use LOG(FATAL) here because glog isn't initialized yet, and even if it
+    // were, it would try to write to stderr, which might end up writing the log message
+    // into some unexpected place. This is a rare enough issue that people can deal with
+    // the core dump.
+    abort();
+  }
+}
+
+Status CheckCPUFlags() {
+  base::CPU cpu;
+  if (!cpu.has_sse42()) {
+    return BadCPUStatus(cpu, "SSE4.2");
+  }
+
+  if (!cpu.has_ssse3()) {
+    return BadCPUStatus(cpu, "SSSE3");
+  }
+
+  return Status::OK();
+}
+
+void InitKuduOrDie() {
+  CheckStandardFds();
+  CHECK_OK(CheckCPUFlags());
+  // NOTE: this function is called before flags are parsed.
+  // Do not add anything in here which is flag-dependent.
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/init.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/init.h b/be/src/kudu/util/init.h
new file mode 100644
index 0000000..84e36e1
--- /dev/null
+++ b/be/src/kudu/util/init.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_INIT_H
+#define KUDU_UTIL_INIT_H
+
+#include "kudu/util/status.h"
+
+namespace kudu {
+
+// Return a NotSupported Status if the current CPU does not support the CPU flags
+// required for Kudu.
+Status CheckCPUFlags();
+
+// Initialize Kudu, checking that the platform we are running on is supported, etc.
+// Issues a FATAL log message if we fail to init.
+void InitKuduOrDie();
+
+} // namespace kudu
+#endif /* KUDU_UTIL_INIT_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/inline_slice-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/inline_slice-test.cc b/be/src/kudu/util/inline_slice-test.cc
new file mode 100644
index 0000000..60a0005
--- /dev/null
+++ b/be/src/kudu/util/inline_slice-test.cc
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/util/inline_slice.h"
+#include "kudu/util/memory/arena.h"
+#include "kudu/util/slice.h"
+
+namespace kudu {
+
+template<size_t N>
+static void TestRoundTrip(InlineSlice<N> *slice,
+                          Arena *arena,
+                          size_t test_size) {
+  gscoped_ptr<uint8_t[]> buf(new uint8_t[test_size]);
+  for (int i = 0; i < test_size; i++) {
+    buf[i] = i & 0xff;
+  }
+
+  Slice test_input(buf.get(), test_size);
+
+  slice->set(test_input, arena);
+  Slice ret = slice->as_slice();
+  ASSERT_TRUE(ret == test_input)
+    << "test_size  =" << test_size << "\n"
+    << "ret        = " << ret.ToDebugString() << "\n"
+    << "test_input = " << test_input.ToDebugString();
+
+  // If the data is small enough to fit inline, then
+  // the returned slice should point directly into the
+  // InlineSlice object.
+  if (test_size < N) {
+    ASSERT_EQ(reinterpret_cast<const uint8_t *>(slice) + 1,
+              ret.data());
+  }
+}
+
+// Sweep a variety of inputs for a given size of inline
+// data
+template<size_t N>
+static void DoTest() {
+  Arena arena(1024);
+
+  // Test a range of inputs both growing and shrinking
+  InlineSlice<N> my_slice;
+  ASSERT_EQ(N, sizeof(my_slice));
+
+  for (size_t to_test = 0; to_test < 1000; to_test++) {
+    TestRoundTrip(&my_slice, &arena, to_test);
+  }
+  for (size_t to_test = 1000; to_test > 0; to_test--) {
+    TestRoundTrip(&my_slice, &arena, to_test);
+  }
+}
+
+TEST(TestInlineSlice, Test8ByteInline) {
+  DoTest<8>();
+}
+
+TEST(TestInlineSlice, Test12ByteInline) {
+  DoTest<12>();
+}
+
+TEST(TestInlineSlice, Test16ByteInline) {
+  DoTest<16>();
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/inline_slice.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/inline_slice.h b/be/src/kudu/util/inline_slice.h
new file mode 100644
index 0000000..248f5b1
--- /dev/null
+++ b/be/src/kudu/util/inline_slice.h
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_INLINE_SLICE_H
+#define KUDU_UTIL_INLINE_SLICE_H
+
+#include "kudu/gutil/atomicops.h"
+#include "kudu/gutil/casts.h"
+#include "kudu/util/memory/arena.h"
+
+namespace kudu {
+
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+#error This needs to be ported for big endian
+#endif
+
+// Class which represents short strings inline, and stores longer ones
+// by instead storing a pointer.
+//
+// Internal format:
+// The buffer must be at least as large as a pointer (eg 8 bytes for 64-bit).
+// Let ptr = bit-casting the first 8 bytes as a pointer:
+// If buf_[0] < 0xff:
+//   buf_[0] == length of stored data
+//   buf_[1..1 + buf_[0]] == inline data
+// If buf_[0] == 0xff:
+//   buf_[1..sizeof(uint8_t *)] == pointer to indirect data, minus the MSB.
+//   buf_[sizeof(uint8_t *)..] = unused
+//     TODO: we could store a prefix of the indirect data in this unused space
+//     in the future, which might be able to short-circuit some comparisons
+//
+// The indirect data which is pointed to is stored as a 4 byte length followed by
+// the actual data.
+//
+// This class relies on the fact that the most significant bit of any x86 pointer is
+// 0 (i.e pointers only use the bottom 48 bits)
+//
+// If ATOMIC is true, then this class has the semantics that readers will never see
+// invalid pointers, even in the case of concurrent access. However, they _may_ see
+// invalid *data*. That is to say, calling 'as_slice()' will always return a slice
+// which points to a valid memory region -- the memory region may contain garbage
+// but will not cause a segfault on access.
+//
+// These ATOMIC semantics may seem too loose to be useful, but can be used in
+// optimistic concurrency control schemes -- so long as accessing the slice doesn't
+// produce a segfault, it's OK to read bad data on a race because the higher-level
+// concurrency control will cause a retry.
+template<size_t STORAGE_SIZE, bool ATOMIC = false>
+class InlineSlice {
+ private:
+  enum {
+    kPointerByteWidth = sizeof(uintptr_t),
+    kPointerBitWidth = kPointerByteWidth * 8,
+    kMaxInlineData = STORAGE_SIZE - 1
+  };
+
+  static_assert(STORAGE_SIZE >= kPointerByteWidth,
+                "InlineSlice storage size must be greater than the width of a pointer");
+  static_assert(STORAGE_SIZE <= 256,
+                "InlineSlice storage size must be less than 256 bytes");
+ public:
+  InlineSlice() {
+  }
+
+  inline const Slice as_slice() const ATTRIBUTE_ALWAYS_INLINE {
+    DiscriminatedPointer dptr = LoadValue();
+
+    if (dptr.is_indirect()) {
+      const uint8_t *indir_data = reinterpret_cast<const uint8_t *>(dptr.pointer);
+      uint32_t len = *reinterpret_cast<const uint32_t *>(indir_data);
+      indir_data += sizeof(uint32_t);
+      return Slice(indir_data, static_cast<size_t>(len));
+    }
+    uint8_t len = dptr.discriminator;
+    DCHECK_LE(len, STORAGE_SIZE - 1);
+    return Slice(&buf_[1], len);
+  }
+
+  template<class ArenaType>
+  void set(const Slice &src, ArenaType *alloc_arena) {
+    set(src.data(), src.size(), alloc_arena);
+  }
+
+  template<class ArenaType>
+  void set(const uint8_t *src, size_t len,
+           ArenaType *alloc_arena) {
+    if (len <= kMaxInlineData) {
+      if (ATOMIC) {
+        // If atomic, we need to make sure that we store the discriminator
+        // before we copy in any data. Otherwise the data would overwrite
+        // part of a pointer and a reader might see an invalid address.
+        DiscriminatedPointer dptr;
+        dptr.discriminator = len;
+        dptr.pointer = 0; // will be overwritten
+        // "Acquire" ensures that the later memcpy doesn't reorder above the
+        // set of the discriminator bit.
+        base::subtle::Acquire_Store(reinterpret_cast<volatile AtomicWord *>(buf_),
+                                    bit_cast<uintptr_t>(dptr));
+      } else {
+        buf_[0] = len;
+      }
+      memcpy(&buf_[1], src, len);
+
+    } else {
+      // TODO: if already indirect and the current storage has enough space, just reuse that.
+
+      // Set up the pointed-to data before setting a pointer to it. This ensures that readers
+      // never see a pointer to an invalid region (i.e one without a proper length header).
+      void *in_arena = CHECK_NOTNULL(alloc_arena->AllocateBytes(len + sizeof(uint32_t)));
+      *reinterpret_cast<uint32_t *>(in_arena) = len;
+      memcpy(reinterpret_cast<uint8_t *>(in_arena) + sizeof(uint32_t), src, len);
+      set_ptr(in_arena);
+    }
+  }
+
+ private:
+  struct DiscriminatedPointer {
+    uint8_t discriminator : 8;
+    uintptr_t pointer : 54;
+
+    bool is_indirect() const {
+      return discriminator == 0xff;
+    }
+  };
+
+  DiscriminatedPointer LoadValue() const {
+    if (ATOMIC) {
+      // Load with "Acquire" semantics -- if we load a pointer, this ensures
+      // that we also see the pointed-to data.
+      uintptr_t ptr_val = base::subtle::Acquire_Load(
+        reinterpret_cast<volatile const AtomicWord *>(buf_));
+      return bit_cast<DiscriminatedPointer>(ptr_val);
+    } else {
+      DiscriminatedPointer ret;
+      memcpy(&ret, buf_, sizeof(ret));
+      return ret;
+    }
+  }
+
+  // Set the internal storage to be an indirect pointer to the given
+  // address.
+  void set_ptr(void *ptr) {
+    uintptr_t ptr_int = reinterpret_cast<uintptr_t>(ptr);
+    DCHECK_EQ(ptr_int >> (kPointerBitWidth - 8), 0) <<
+      "bad pointer (should have 0x00 MSB): " << ptr;
+
+    DiscriminatedPointer dptr;
+    dptr.discriminator = 0xff;
+    dptr.pointer = ptr_int;
+
+    if (ATOMIC) {
+      // Store with "Release" semantics -- this ensures that the pointed-to data
+      // is visible to any readers who see this pointer.
+      uintptr_t to_store = bit_cast<uintptr_t>(dptr);
+      base::subtle::Release_Store(reinterpret_cast<volatile AtomicWord *>(buf_),
+                                  to_store);
+    } else {
+      memcpy(&buf_[0], &dptr, sizeof(dptr));
+    }
+  }
+
+  uint8_t buf_[STORAGE_SIZE];
+
+} PACKED;
+
+} // namespace kudu
+
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/int128-test.cc b/be/src/kudu/util/int128-test.cc
new file mode 100644
index 0000000..cc1e174
--- /dev/null
+++ b/be/src/kudu/util/int128-test.cc
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstddef>
+#include <cstdint>
+#include <iosfwd>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/int128.h"
+#include "kudu/util/int128_util.h"
+
+using std::string;
+
+namespace kudu {
+
+TEST(TestInt128, TestOstreamSigned) {
+  int128_t INTEGERS[] = {0, -1, 1, -1234567890,
+                         INT64_MIN, UINT64_MAX,
+                         INT128_MIN,
+                         INT128_MAX};
+  std::string STRINGS[] = {"0", "-1", "1", "-1234567890",
+                           "-9223372036854775808", "18446744073709551615",
+                           "-170141183460469231731687303715884105728",
+                           "170141183460469231731687303715884105727"};
+  for (size_t i = 0; i < arraysize(INTEGERS); i++) {
+    std::ostringstream ss;
+    ss << INTEGERS[i];
+    ASSERT_EQ(STRINGS[i], ss.str());
+  }
+}
+
+TEST(TestInt128, TestOstreamUnsigned) {
+  uint128_t INTEGERS[] = {0, 1, 1234567890,
+                          UINT128_MIN, UINT128_MAX};
+  string STRINGS[] = {"0", "1", "1234567890",
+                      "0", "340282366920938463463374607431768211455"};
+  for (size_t i = 0; i < arraysize(INTEGERS); i++) {
+    std::ostringstream ss;
+    ss << INTEGERS[i];
+    ASSERT_EQ(STRINGS[i], ss.str());
+  }
+}
+
+TEST(TestInt128, TestCasting) {
+  uint128_t mathToMax = (static_cast<uint128_t>(INT128_MAX) * 2) + 1;
+  ASSERT_EQ(UINT128_MAX, mathToMax);
+
+  uint128_t castToMax = static_cast<uint128_t>(-1);
+  ASSERT_EQ(UINT128_MAX, castToMax);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/int128.h b/be/src/kudu/util/int128.h
new file mode 100644
index 0000000..ac35d08
--- /dev/null
+++ b/be/src/kudu/util/int128.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This file is the central location for defining the int128 type
+// used by Kudu. Though this file is small it ensures flexibility
+// as choices and standards around int128 change.
+#pragma once
+
+// __int128 is not supported before gcc 4.6
+#if defined(__clang__) || \
+  (defined(__GNUC__) && \
+  (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40600)
+#define KUDU_INT128_SUPPORTED 1
+#else
+#define KUDU_INT128_SUPPORTED 0
+#endif
+
+#if KUDU_INT128_SUPPORTED
+namespace kudu {
+
+typedef unsigned __int128 uint128_t;
+typedef signed __int128 int128_t;
+
+// Note: We don't use numeric_limits because it can give incorrect
+// values for __int128 and unsigned __int128.
+static const uint128_t UINT128_MIN = (uint128_t) 0;
+static const uint128_t UINT128_MAX = ((uint128_t) -1);
+static const int128_t INT128_MAX = ((int128_t)(UINT128_MAX >> 1));
+static const int128_t INT128_MIN = (-INT128_MAX - 1);
+
+} // namespace kudu
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128_util.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/int128_util.h b/be/src/kudu/util/int128_util.h
new file mode 100644
index 0000000..2d01de7
--- /dev/null
+++ b/be/src/kudu/util/int128_util.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/int128.h"
+
+#include <iostream>
+#include <string>
+
+#include "kudu/gutil/strings/numbers.h"
+
+namespace std {
+
+// Support the << operator on int128_t and uint128_t types.
+//
+inline std::ostream& operator<<(std::ostream& os, const __int128& val) {
+  os << SimpleItoa(val);
+  return os;
+}
+inline std::ostream& operator<<(std::ostream& os, const unsigned __int128& val) {
+  os << SimpleItoa(val);
+  return os;
+}
+
+} // namespace std
+

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/interval_tree-inl.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/interval_tree-inl.h b/be/src/kudu/util/interval_tree-inl.h
new file mode 100644
index 0000000..7637317
--- /dev/null
+++ b/be/src/kudu/util/interval_tree-inl.h
@@ -0,0 +1,444 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_INTERVAL_TREE_INL_H
+#define KUDU_UTIL_INTERVAL_TREE_INL_H
+
+#include <algorithm>
+#include <vector>
+
+#include "kudu/util/interval_tree.h"
+
+namespace kudu {
+
+template<class Traits>
+IntervalTree<Traits>::IntervalTree(const IntervalVector &intervals)
+  : root_(NULL) {
+  if (!intervals.empty()) {
+    root_ = CreateNode(intervals);
+  }
+}
+
+template<class Traits>
+IntervalTree<Traits>::~IntervalTree() {
+  delete root_;
+}
+
+template<class Traits>
+template<class QueryPointType>
+void IntervalTree<Traits>::FindContainingPoint(const QueryPointType &query,
+                                               IntervalVector *results) const {
+  if (root_) {
+    root_->FindContainingPoint(query, results);
+  }
+}
+
+template<class Traits>
+template<class Callback, class QueryContainer>
+void IntervalTree<Traits>::ForEachIntervalContainingPoints(
+    const QueryContainer& queries,
+    const Callback& cb) const {
+  if (root_) {
+    root_->ForEachIntervalContainingPoints(queries.begin(), queries.end(), cb);
+  }
+}
+
+
+template<class Traits>
+void IntervalTree<Traits>::FindIntersectingInterval(const interval_type &query,
+                                                    IntervalVector *results) const {
+  if (root_) {
+    root_->FindIntersectingInterval(query, results);
+  }
+}
+
+template<class Traits>
+static bool LessThan(const typename Traits::point_type &a,
+                     const typename Traits::point_type &b) {
+  return Traits::compare(a, b) < 0;
+}
+
+// Select a split point which attempts to evenly divide 'in' into three groups:
+//  (a) those that are fully left of the split point
+//  (b) those that overlap the split point.
+//  (c) those that are fully right of the split point
+// These three groups are stored in the output parameters '*left', '*overlapping',
+// and '*right', respectively. The selected split point is stored in *split_point.
+//
+// For example, the input interval set:
+//
+//   |------1-------|         |-----2-----|
+//       |--3--|    |---4--|    |----5----|
+//                     |
+// Resulting split:    | Partition point
+//                     |
+//
+// *left: intervals 1 and 3
+// *overlapping: interval 4
+// *right: intervals 2 and 5
+template<class Traits>
+void IntervalTree<Traits>::Partition(const IntervalVector &in,
+                                     point_type *split_point,
+                                     IntervalVector *left,
+                                     IntervalVector *overlapping,
+                                     IntervalVector *right) {
+  CHECK(!in.empty());
+
+  // Pick a split point which is the median of all of the interval boundaries.
+  std::vector<point_type> endpoints;
+  endpoints.reserve(in.size() * 2);
+  for (const interval_type &interval : in) {
+    endpoints.push_back(Traits::get_left(interval));
+    endpoints.push_back(Traits::get_right(interval));
+  }
+  std::sort(endpoints.begin(), endpoints.end(), LessThan<Traits>);
+  *split_point = endpoints[endpoints.size() / 2];
+
+  // Partition into the groups based on the determined split point.
+  for (const interval_type &interval : in) {
+    if (Traits::compare(Traits::get_right(interval), *split_point) < 0) {
+      //                 | split point
+      // |------------|  |
+      //    interval
+      left->push_back(interval);
+    } else if (Traits::compare(Traits::get_left(interval), *split_point) > 0) {
+      //                 | split point
+      //                 |    |------------|
+      //                         interval
+      right->push_back(interval);
+    } else {
+      //                 | split point
+      //                 |
+      //          |------------|
+      //             interval
+      overlapping->push_back(interval);
+    }
+  }
+}
+
+template<class Traits>
+typename IntervalTree<Traits>::node_type *IntervalTree<Traits>::CreateNode(
+  const IntervalVector &intervals) {
+  IntervalVector left, right, overlap;
+  point_type split_point;
+
+  // First partition the input intervals and select a split point
+  Partition(intervals, &split_point, &left, &overlap, &right);
+
+  // Recursively subdivide the intervals which are fully left or fully
+  // right of the split point into subtree nodes.
+  node_type *left_node = !left.empty() ? CreateNode(left) : NULL;
+  node_type *right_node = !right.empty() ? CreateNode(right) : NULL;
+
+  return new node_type(split_point, left_node, overlap, right_node);
+}
+
+namespace interval_tree_internal {
+
+// Node in the interval tree.
+template<typename Traits>
+class ITNode {
+ private:
+  // Import types.
+  typedef std::vector<typename Traits::interval_type> IntervalVector;
+  typedef typename Traits::interval_type interval_type;
+  typedef typename Traits::point_type point_type;
+
+ public:
+  ITNode(point_type split_point,
+         ITNode<Traits> *left,
+         const IntervalVector &overlap,
+         ITNode<Traits> *right);
+  ~ITNode();
+
+  // See IntervalTree::FindContainingPoint(...)
+  template<class QueryPointType>
+  void FindContainingPoint(const QueryPointType &query,
+                           IntervalVector *results) const;
+
+  // See IntervalTree::ForEachIntervalContainingPoints().
+  // We use iterators here since as recursion progresses down the tree, we
+  // process sub-sequences of the original set of query points.
+  template<class Callback, class ItType>
+  void ForEachIntervalContainingPoints(ItType begin_queries,
+                                       ItType end_queries,
+                                       const Callback& cb) const;
+
+  // See IntervalTree::FindIntersectingInterval(...)
+  void FindIntersectingInterval(const interval_type &query,
+                                IntervalVector *results) const;
+
+ private:
+  // Comparators for sorting lists of intervals.
+  static bool SortByAscLeft(const interval_type &a, const interval_type &b);
+  static bool SortByDescRight(const interval_type &a, const interval_type &b);
+
+  // Partition point of this node.
+  point_type split_point_;
+
+  // Those nodes that overlap with split_point_, in ascending order by their left side.
+  IntervalVector overlapping_by_asc_left_;
+
+  // Those nodes that overlap with split_point_, in descending order by their right side.
+  IntervalVector overlapping_by_desc_right_;
+
+  // Tree node for intervals fully left of split_point_, or NULL.
+  ITNode *left_;
+
+  // Tree node for intervals fully right of split_point_, or NULL.
+  ITNode *right_;
+
+  DISALLOW_COPY_AND_ASSIGN(ITNode);
+};
+
+template<class Traits>
+bool ITNode<Traits>::SortByAscLeft(const interval_type &a, const interval_type &b) {
+  return Traits::compare(Traits::get_left(a), Traits::get_left(b)) < 0;
+}
+
+template<class Traits>
+bool ITNode<Traits>::SortByDescRight(const interval_type &a, const interval_type &b) {
+  return Traits::compare(Traits::get_right(a), Traits::get_right(b)) > 0;
+}
+
+template <class Traits>
+ITNode<Traits>::ITNode(typename Traits::point_type split_point,
+                       ITNode<Traits> *left, const IntervalVector &overlap,
+                       ITNode<Traits> *right)
+    : split_point_(std::move(split_point)), left_(left), right_(right) {
+  // Store two copies of the set of intervals which overlap the split point:
+  // 1) Sorted by ascending left boundary
+  overlapping_by_asc_left_.assign(overlap.begin(), overlap.end());
+  std::sort(overlapping_by_asc_left_.begin(), overlapping_by_asc_left_.end(), SortByAscLeft);
+  // 2) Sorted by descending right boundary
+  overlapping_by_desc_right_.assign(overlap.begin(), overlap.end());
+  std::sort(overlapping_by_desc_right_.begin(), overlapping_by_desc_right_.end(), SortByDescRight);
+}
+
+template<class Traits>
+ITNode<Traits>::~ITNode() {
+  if (left_) delete left_;
+  if (right_) delete right_;
+}
+
+template<class Traits>
+template<class Callback, class ItType>
+void ITNode<Traits>::ForEachIntervalContainingPoints(ItType begin_queries,
+                                                     ItType end_queries,
+                                                     const Callback& cb) const {
+  if (begin_queries == end_queries) return;
+
+  typedef decltype(*begin_queries) QueryPointType;
+  const auto& partitioner = [&](const QueryPointType& query_point) {
+    return Traits::compare(query_point, split_point_) < 0;
+  };
+
+  // Partition the query points into those less than the split_point_ and those greater
+  // than or equal to the split_point_. Because the input queries are already sorted, we
+  // can use 'std::partition_point' instead of 'std::partition'.
+  //
+  // The resulting 'partition_point' is the first query point in the second group.
+  //
+  // Complexity: O(log(number of query points))
+  DCHECK(std::is_partitioned(begin_queries, end_queries, partitioner));
+  auto partition_point = std::partition_point(begin_queries, end_queries, partitioner);
+
+  // Recurse left: any query points left of the split point may intersect
+  // with non-overlapping intervals fully-left of our split point.
+  if (left_ != NULL) {
+    left_->ForEachIntervalContainingPoints(begin_queries, partition_point, cb);
+  }
+
+  // Handle the query points < split_point
+  //
+  //      split_point_
+  //         |
+  //   [------]         \
+  //     [-------]       | overlapping_by_asc_left_
+  //       [--------]   /
+  // Q   Q      Q
+  // ^   ^      \___ not handled (right of split_point_)
+  // |   |
+  // \___\___ these points will be handled here
+  //
+
+  // Lower bound of query points still relevant.
+  auto rem_queries = begin_queries;
+  for (const interval_type &interval : overlapping_by_asc_left_) {
+    const auto& interval_left = Traits::get_left(interval);
+    // Find those query points which are right of the left side of the interval.
+    // 'first_match' here is the first query point >= interval_left.
+    // Complexity: O(log(num_queries))
+    //
+    // TODO(todd): The non-batched implementation is O(log(num_intervals) * num_queries)
+    // whereas this loop ends up O(num_intervals * log(num_queries)). So, for
+    // small numbers of queries this is not the fastest way to structure these loops.
+    auto first_match = std::partition_point(
+        rem_queries, partition_point,
+        [&](const QueryPointType& query_point) {
+          return Traits::compare(query_point, interval_left) < 0;
+        });
+    for (auto it = first_match; it != partition_point; ++it) {
+      cb(*it, interval);
+    }
+    // Since the intervals are sorted in ascending-left order, we can start
+    // the search for the next interval at the first match in this interval.
+    // (any query point which was left of the current interval will also be left
+    // of all future intervals).
+    rem_queries = std::move(first_match);
+  }
+
+  // Handle the query points >= split_point
+  //
+  //    split_point_
+  //       |
+  //     [--------]   \
+  //   [-------]       | overlapping_by_desc_right_
+  // [------]         /
+  //   Q   Q      Q
+  //   |    \______\___ these points will be handled here
+  //   |
+  //   \___ not handled (left of split_point_)
+
+  // Upper bound of query points still relevant.
+  rem_queries = end_queries;
+  for (const interval_type &interval : overlapping_by_desc_right_) {
+    const auto& interval_right = Traits::get_right(interval);
+    // Find the first query point which is > the right side of the interval.
+    auto first_non_match = std::partition_point(
+        partition_point, rem_queries,
+        [&](const QueryPointType& query_point) {
+          return Traits::compare(query_point, interval_right) <= 0;
+          });
+    for (auto it = partition_point; it != first_non_match; ++it) {
+      cb(*it, interval);
+    }
+    // Same logic as above: if a query point was fully right of 'interval',
+    // then it will be fully right of all following intervals because they are
+    // sorted by descending-right.
+    rem_queries = std::move(first_non_match);
+  }
+
+  if (right_ != NULL) {
+    while (partition_point != end_queries &&
+           Traits::compare(*partition_point, split_point_) == 0) {
+      ++partition_point;
+    }
+    right_->ForEachIntervalContainingPoints(partition_point, end_queries, cb);
+  }
+}
+
+template<class Traits>
+template<class QueryPointType>
+void ITNode<Traits>::FindContainingPoint(const QueryPointType &query,
+                                         IntervalVector *results) const {
+  int cmp = Traits::compare(query, split_point_);
+  if (cmp < 0) {
+    // None of the intervals in right_ may intersect this.
+    if (left_ != NULL) {
+      left_->FindContainingPoint(query, results);
+    }
+
+    // Any intervals which start before the query point and overlap the split point
+    // must therefore contain the query point.
+    auto p = std::partition_point(
+        overlapping_by_asc_left_.cbegin(), overlapping_by_asc_left_.cend(),
+        [&](const interval_type& interval) {
+          return Traits::compare(Traits::get_left(interval), query) <= 0;
+        });
+    results->insert(results->end(), overlapping_by_asc_left_.cbegin(), p);
+  } else if (cmp > 0) {
+    // None of the intervals in left_ may intersect this.
+    if (right_ != NULL) {
+      right_->FindContainingPoint(query, results);
+    }
+
+    // Any intervals which end after the query point and overlap the split point
+    // must therefore contain the query point.
+    auto p = std::partition_point(
+        overlapping_by_desc_right_.cbegin(), overlapping_by_desc_right_.cend(),
+        [&](const interval_type& interval) {
+          return Traits::compare(Traits::get_right(interval), query) >= 0;
+        });
+    results->insert(results->end(), overlapping_by_desc_right_.cbegin(), p);
+  } else {
+    DCHECK_EQ(cmp, 0);
+    // The query is exactly our split point -- in this case we've already got
+    // the computed list of overlapping intervals.
+    results->insert(results->end(), overlapping_by_asc_left_.begin(),
+                    overlapping_by_asc_left_.end());
+  }
+}
+
+template<class Traits>
+void ITNode<Traits>::FindIntersectingInterval(const interval_type &query,
+                                              IntervalVector *results) const {
+  if (Traits::compare(Traits::get_right(query), split_point_) < 0) {
+    // The interval is fully left of the split point. So, it may not overlap
+    // with any in 'right_'
+    if (left_ != NULL) {
+      left_->FindIntersectingInterval(query, results);
+    }
+
+    // Any intervals whose left edge is <= the query interval's right edge
+    // intersect the query interval. 'std::partition_point' returns the first
+    // such interval which does not meet that criterion, so we insert all
+    // up to that point.
+    auto first_greater = std::partition_point(
+        overlapping_by_asc_left_.cbegin(), overlapping_by_asc_left_.cend(),
+        [&](const interval_type& interval) {
+          return Traits::compare(Traits::get_left(interval), Traits::get_right(query)) <= 0;
+        });
+    results->insert(results->end(), overlapping_by_asc_left_.cbegin(), first_greater);
+  } else if (Traits::compare(Traits::get_left(query), split_point_) > 0) {
+    // The interval is fully right of the split point. So, it may not overlap
+    // with any in 'left_'.
+    if (right_ != NULL) {
+      right_->FindIntersectingInterval(query, results);
+    }
+
+    // Any intervals whose right edge is >= the query interval's left edge
+    // intersect the query interval. 'std::partition_point' returns the first
+    // such interval which does not meet that criterion, so we insert all
+    // up to that point.
+    auto first_lesser = std::partition_point(
+        overlapping_by_desc_right_.cbegin(), overlapping_by_desc_right_.cend(),
+        [&](const interval_type& interval) {
+          return Traits::compare(Traits::get_right(interval), Traits::get_left(query)) >= 0;
+        });
+    results->insert(results->end(), overlapping_by_desc_right_.cbegin(), first_lesser);
+  } else {
+    // The query interval contains the split point. Therefore all other intervals
+    // which also contain the split point are intersecting.
+    results->insert(results->end(), overlapping_by_asc_left_.begin(),
+                    overlapping_by_asc_left_.end());
+
+    // The query interval may _also_ intersect some in either child.
+    if (left_ != NULL) {
+      left_->FindIntersectingInterval(query, results);
+    }
+    if (right_ != NULL) {
+      right_->FindIntersectingInterval(query, results);
+    }
+  }
+}
+
+
+} // namespace interval_tree_internal
+
+} // namespace kudu
+
+#endif