You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/04/10 20:19:32 UTC

svn commit: r1466607 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/io/hfile/ main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/regionserver/metrics/ main/java/org/...

Author: liyin
Date: Wed Apr 10 18:19:32 2013
New Revision: 1466607

URL: http://svn.apache.org/r1466607
Log:
[HBASE-8307] Adding a Histogram utility which captures data and creates a histogram

Author: manukranthk

Summary: This diff is to create a Histogram class which can be used to find the Percentile estimates of metrics in HBase. We can use this utility to calculate P99/P95 metrics of various metrics like latency etc.

Test Plan: Unit Testing

Reviewers: liyintang, rshroff, adela

Reviewed By: liyintang

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D715751

Task ID: 2117219

Added:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java
Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Apr 10 18:19:32 2013
@@ -661,6 +661,22 @@ public final class HConstants {
   public static final String HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS
                                 = "hbase.master.assignment.load.balancer.waittime.ms";
   public static final int DEFAULT_HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS = 60000;
+  
+  /*
+   * This defines the number of buckets used for computing the histogram of
+   * pread latency.
+   */
+  public static final String PREAD_LATENCY_HISTOGRAM_NUM_BUCKETS =
+      "hbase.histogrambasedmetric.numbuckets.preadlatency";
+
+  /*
+   * This defines the number of buckets used for computing the histogram of
+   * pread latency during compaction.
+   */
+  public static final String PREAD_COMPACTION_LATENCY_HISTOGRAM_NUM_BUCKETS =
+      "hbase.histogrambasedmetric.numbuckets.preadcompactionlatency";
+  public static final String HISTOGRAM_BASED_METRICS_WINDOW =
+      "hbase.histogrambasedmetric.window";
 
   private HConstants() {
     // Can't be instantiated with this constructor.

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Wed Apr 10 18:19:32 2013
@@ -46,10 +46,12 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueContext;
 import org.apache.hadoop.hbase.io.HbaseMapWritable;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.regionserver.metrics.PercentileMetric;
 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.SchemaAware;
 import org.apache.hadoop.hbase.util.BloomFilterWriter;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Histogram;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Writable;
 
@@ -170,7 +172,15 @@ public class HFile {
   //For measuring latency of pread during compactions
   static final AtomicInteger preadCompactionOps = new AtomicInteger();
   static final AtomicLong preadCompactionTimeNano = new AtomicLong();
-
+  // These outliers measure in nano seconds.
+  public static final Histogram preadHistogram = new Histogram(
+    PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT,
+    PercentileMetric.HISTOGRAM_MINVALUE_DEFAULT,
+    PercentileMetric.HISTOGRAM_MAXVALUE_DEFAULT);
+  public static final Histogram preadCompactionHistogram = new Histogram(
+    PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT,
+    PercentileMetric.HISTOGRAM_MINVALUE_DEFAULT,
+    PercentileMetric.HISTOGRAM_MAXVALUE_DEFAULT);
   /**
    * Get the number of positional read operations during compaction
    * and reset it to zero.

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java Wed Apr 10 18:19:32 2013
@@ -247,6 +247,7 @@ public class HFileReaderV1 extends Abstr
       long delta = System.nanoTime() - startTimeNs;
       HFile.preadTimeNano.addAndGet(delta);
       HFile.preadOps.incrementAndGet();
+      HFile.preadHistogram.addValue(delta);
       getSchemaMetrics().updateOnCacheMiss(BlockCategory.META, false, delta);
 
       // Cache the block
@@ -327,9 +328,11 @@ public class HFileReaderV1 extends Abstr
       long delta = System.nanoTime() - startTimeNs;
       if (isCompaction) {
         HFile.preadCompactionTimeNano.addAndGet(delta);
+        HFile.preadCompactionHistogram.addValue(delta);
         HFile.preadCompactionOps.incrementAndGet();
       } else {
         HFile.preadTimeNano.addAndGet(delta);
+        HFile.preadHistogram.addValue(delta);
         HFile.preadOps.incrementAndGet();
       }
       getSchemaMetrics().updateOnCacheMiss(BlockCategory.DATA, isCompaction,

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Wed Apr 10 18:19:32 2013
@@ -224,6 +224,7 @@ public class HFileReaderV2 extends Abstr
 
       long deltaNs = System.nanoTime() - startTimeNs;
       HFile.preadTimeNano.addAndGet(deltaNs);
+      HFile.preadHistogram.addValue(deltaNs);
       HFile.preadOps.incrementAndGet();
       getSchemaMetrics().updateOnCacheMiss(BlockCategory.META, false,
           TimeUnit.NANOSECONDS.toMillis(deltaNs));
@@ -316,9 +317,11 @@ public class HFileReaderV2 extends Abstr
 
       if (isCompaction) {
         HFile.preadCompactionTimeNano.addAndGet(deltaNs);
+        HFile.preadCompactionHistogram.addValue(deltaNs);
         HFile.preadCompactionOps.incrementAndGet();
       } else {
         HFile.preadTimeNano.addAndGet(deltaNs);
+        HFile.preadHistogram.addValue(deltaNs);
         HFile.preadOps.incrementAndGet();
       }
       if (kvContext != null) {

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Apr 10 18:19:32 2013
@@ -1050,7 +1050,7 @@ public class HRegionServer implements HR
       this.setNumHDFSQuorumReadThreads(parallelHDFSReadPoolSize);
       
       // Init in here rather than in constructor after thread name has been set
-      this.metrics = new RegionServerMetrics();
+      this.metrics = new RegionServerMetrics(this.conf);
       this.dynamicMetrics = RegionServerDynamicMetrics.newInstance(this);
       startServiceThreads();
       isOnline = true;

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.metrics;
+import org.apache.hadoop.hbase.util.Histogram;
+import org.apache.hadoop.metrics.util.MetricsLongValue;
+import org.apache.hadoop.metrics.util.MetricsRegistry;
+
+/*
+ * Used the org.apache.hadoop.hbase.util.Histogram to maintain time varying
+ * metrics. The histogram class can provide various approximate details about
+ * a stream of data supplied to the PercentileMetric without actually
+ * storing the data.
+ */
+public class PercentileMetric extends MetricsLongValue{
+  public static final int HISTOGRAM_NUM_BUCKETS_DEFAULT = 20;
+  public static final double HISTOGRAM_MINVALUE_DEFAULT = 0.0;
+  public static final double HISTOGRAM_MAXVALUE_DEFAULT = 1000000000.0;
+  public static final double DEFAULT_PERCENTILE = 99.0;
+  public static final long DEFAULT_SAMPLE_WINDOW = 60;
+  public static final double P99 = 99.0;
+
+  private int numBuckets;
+  private double percentile;
+  private Histogram underlyingHistogram;
+
+  /*
+   * This constructor provides a way to create a HistogramMetric which uses a
+   * Histogram to maintain the statistics of a metric stream.
+   */
+  public PercentileMetric(final String nam, final MetricsRegistry registry,
+    Histogram histogram) {
+    super(nam, registry);
+    underlyingHistogram = histogram;
+  }
+
+  /*
+   * The histogram which has the values updated.
+   */
+  public void setHistogram(final Histogram hist) {
+    this.underlyingHistogram = hist;
+  }
+
+  /*
+   * numBuckets : This denotes the number of buckets used to sample the data.
+   * the updateMetric and refresh calls will run in O(numBuckets).
+   */
+  public void setNumBuckets(final int numBuckets) {
+    this.numBuckets = numBuckets;
+  }
+
+  /*
+   * percentile : The percentile estimate of the metric that will be seeked
+   * using this metric. The value should be between 0 and 100,
+   * else it will throw and exception.
+   */
+  public void setPercentile(final double prcntyl) {
+    this.percentile = prcntyl;
+  }
+
+  public double getValue() {
+    return this.get();
+  }
+
+  public void updateMetric() {
+    this.set(underlyingHistogram.getPercentileEstimate(percentile).longValue());
+  }
+
+  public void refresh() {
+    underlyingHistogram.refresh(this.numBuckets);
+  }
+}

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java Wed Apr 10 18:19:32 2013
@@ -21,6 +21,8 @@ package org.apache.hadoop.hbase.regionse
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.metrics.HBaseInfo;
 import org.apache.hadoop.hbase.metrics.MetricsRate;
@@ -57,6 +59,7 @@ public class RegionServerMetrics impleme
   private final MetricsRecord metricsRecord;
   private long lastUpdate = System.currentTimeMillis();
   private long lastExtUpdate = System.currentTimeMillis();
+  private long lastHistUpdate = System.currentTimeMillis();
   private long extendedPeriod = 0;
   private static final int MB = 1024*1024;
   private MetricsRegistry registry = new MetricsRegistry();
@@ -173,6 +176,13 @@ public class RegionServerMetrics impleme
       new MetricsTimeVaryingRate("fsReadLatency", registry);
 
   /**
+   * filesystem p99 read latency outlier for positional read operations
+   */
+  public final PercentileMetric fsReadLatencyP99 =
+      new PercentileMetric("fsReadLatencyP99", registry,
+          HFile.preadHistogram);
+
+  /**
    * filesystem read latency for positional read operations during
    * compactions
    */
@@ -180,6 +190,14 @@ public class RegionServerMetrics impleme
       new MetricsTimeVaryingRate("fsCompactionReadLatency", registry);
 
   /**
+   * filesystem p99 read latency outlier for positional read operations during
+   * compactions
+   */
+  public final PercentileMetric fsCompactionReadLatencyP99 =
+      new PercentileMetric("fsReadCompactionLatencyP99", registry,
+          HFile.preadCompactionHistogram);
+
+  /**
    * filesystem write latency
    */
   public final MetricsTimeVaryingRate fsWriteLatency =
@@ -264,7 +282,12 @@ public class RegionServerMetrics impleme
   public final MetricsLongValue quorumReadsExecutedInCurThread =
       new MetricsLongValue("quorumReadsExecutedInCurThread", registry);
 
-  public RegionServerMetrics() {
+  // The histogram metrics are updated every histogramMetricWindow seconds
+  private long histogramMetricWindow = 240;
+
+  public final Configuration conf;
+
+  public RegionServerMetrics(Configuration conf) {
     MetricsContext context = MetricsUtil.getContext("hbase");
     metricsRecord = MetricsUtil.createRecord(context, "regionserver");
     String name = Thread.currentThread().getName();
@@ -288,9 +311,27 @@ public class RegionServerMetrics impleme
       LOG.info("Couldn't load ContextFactory for Metrics config info");
     }
 
+    this.conf = conf;
+    // Initializing the HistogramBasedMetrics here since
+    // they will be needing conf
+    initializeHistogramBasedMetrics();
     LOG.info("Initialized");
   }
 
+  private void initializeHistogramBasedMetrics() {
+    histogramMetricWindow = 1000 *
+        conf.getLong(HConstants.HISTOGRAM_BASED_METRICS_WINDOW,
+        PercentileMetric.DEFAULT_SAMPLE_WINDOW);
+    fsReadLatencyP99.setNumBuckets(
+        conf.getInt(HConstants.PREAD_LATENCY_HISTOGRAM_NUM_BUCKETS,
+        PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT));
+    fsReadLatencyP99.setPercentile(PercentileMetric.P99);
+    fsCompactionReadLatencyP99.setNumBuckets(conf.getInt(
+        HConstants.PREAD_COMPACTION_LATENCY_HISTOGRAM_NUM_BUCKETS,
+        PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT));
+    fsCompactionReadLatencyP99.setPercentile(99.0);
+  }
+
   public void shutdown() {
     if (statistics != null)
       statistics.shutdown();
@@ -311,6 +352,11 @@ public class RegionServerMetrics impleme
         this.lastExtUpdate = this.lastUpdate;
         this.resetAllMinMax();
       }
+      if (this.histogramMetricWindow > 0 &&
+        ((this.lastUpdate - this.lastHistUpdate) >= this.histogramMetricWindow)) {
+        this.lastHistUpdate = this.lastUpdate;
+        this.resetAllHistogramBasedMetrics();
+      }
 
       this.stores.pushMetric(this.metricsRecord);
       this.storefiles.pushMetric(this.metricsRecord);
@@ -357,6 +403,13 @@ public class RegionServerMetrics impleme
           HFile.getPreadCompactionOpsAndReset(),
           HFile.getPreadCompactionTimeMsAndReset());
 
+      try {
+        fsReadLatencyP99.updateMetric();
+        fsCompactionReadLatencyP99.updateMetric();
+      } catch (UnsupportedOperationException e) {
+        LOG.error("Exception in Histogram based metric : " + e.getMessage());
+      }
+
       /* NOTE: removed HFile write latency.  2 reasons:
        * 1) Mixing HLog latencies are far higher priority since they're
        *      on-demand and HFile is used in background (compact/flush)
@@ -372,6 +425,8 @@ public class RegionServerMetrics impleme
       }
 
       // push the result
+      this.fsReadLatencyP99.pushMetric(this.metricsRecord);
+      this.fsCompactionReadLatencyP99.pushMetric(this.metricsRecord);
       this.fsReadLatency.pushMetric(this.metricsRecord);
       this.fsCompactionReadLatency.pushMetric(this.metricsRecord);
       this.fsWriteLatency.pushMetric(this.metricsRecord);
@@ -403,6 +458,11 @@ public class RegionServerMetrics impleme
     this.metricsRecord.update();
   }
 
+  private void resetAllHistogramBasedMetrics() {
+    this.fsReadLatencyP99.refresh();
+    this.fsCompactionReadLatencyP99.refresh();
+  }
+
   /**
    * Increment the given latency metric using the number of operations and total read time
    * obtained from HFile.

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Histogram {
+  private List<Bucket> buckets;
+  private int numBuckets;
+  private Double minValue;
+  private Double maxValue;
+
+  // Bucket indexing is from 1 to N
+  public Histogram(int numBuckets, Double minValue, Double maxValue) {
+    if (numBuckets < 1 || minValue >= maxValue) {
+      throw new UnsupportedOperationException();
+    }
+    buckets = new ArrayList<Bucket>(numBuckets);
+    refresh(numBuckets, minValue, maxValue);
+  }
+
+  // This is included in the bucket
+  private Double getBucketStartValue(int bucketIndex) {
+    if (bucketIndex < 1 || bucketIndex > this.numBuckets) {
+      throw new IndexOutOfBoundsException();
+    }
+    Double gap = this.maxValue - this.minValue;
+    Double slice = gap/this.numBuckets;
+    return this.minValue + (bucketIndex - 1.0)*slice;
+  }
+
+  //This is not included in the bucket
+  private Double getBucketEndValue(int bucketIndex) {
+    if (bucketIndex < 1 || bucketIndex > this.numBuckets) {
+      throw new IndexOutOfBoundsException();
+    }
+    Double gap = this.maxValue - this.minValue;
+    Double slice = gap/this.numBuckets;
+    return this.minValue + (bucketIndex)*slice;
+  }
+
+  private int getBucketIndex(Double value) {
+    if (value < this.minValue) {
+      return 0;
+    } else if (value >= this.maxValue) {
+      return this.numBuckets + 1;
+    } else {
+      Double gap = value - this.minValue;
+      Double idx = this.numBuckets * gap / (this.maxValue - this.minValue);
+      int i = idx.intValue() + 1;
+      // Check if the index falls in the margin somehow.
+      if (value < this.getBucketStartValue(i)) i--;
+      else if (value >= this.getBucketEndValue(i)) i++;
+      return i; // Due to 1 indexing
+    }
+  }
+
+  public void refresh(int numBuckets) {
+    Double minValue = this.minValue;
+    Double maxValue = this.maxValue;
+    for (Bucket bucket : this.buckets) {
+      if (bucket.count > 0) {
+        minValue = bucket.getMinValue();
+        break;
+      }
+    }
+    for (int i = this.buckets.size() - 1; i>=0; i--) {
+      Bucket bucket = this.buckets.get(i);
+      if (bucket.count > 0) {
+        maxValue = bucket.getMaxValue();
+        break;
+      }
+    }
+    this.refresh(numBuckets, minValue, maxValue);
+  }
+
+  private void refresh(int numBuckets, Double minValue, Double maxValue) {
+    this.numBuckets = numBuckets;
+    this.minValue = minValue;
+    this.maxValue = maxValue;
+    this.buckets.clear();
+    buckets.add(new Bucket(Double.MIN_VALUE, this.getBucketStartValue(1)));
+    for (int i = 1; i<=this.numBuckets; i++) {
+      this.buckets.add(new Bucket(this.getBucketStartValue(i),
+          this.getBucketEndValue(i)));
+    }
+    buckets.add(new Bucket(this.getBucketEndValue(this.numBuckets),
+        Double.MAX_VALUE));
+  }
+
+  public Double getPercentileEstimate(Double prcntyl) {
+    // We scan from the end of the table since our use case is to find the
+    // p99, p95 latencies.
+    if (prcntyl > 100.0 || prcntyl < 0.0) {
+      throw new UnsupportedOperationException("Percentile input value not in range.");
+    } else {
+      prcntyl = 100.0 - prcntyl;
+    }
+    int totalCount = 0;
+    for (Bucket bucket : this.buckets) {
+      totalCount += bucket.count;
+    }
+    if (totalCount == 0) {
+      throw new UnsupportedOperationException("Too few data points.");
+    }
+    Double countToCoverDouble = (totalCount * prcntyl / 100.0);
+    int countToCover = countToCoverDouble.intValue();
+    for (int i=this.buckets.size() - 1; i >= 0; i--) {
+      Bucket bucket = this.buckets.get(i);
+      if (bucket.getCount() >= countToCover) {
+        return bucket.getGreaterValue(bucket.getCount() - countToCover);
+      } else {
+        countToCover -= bucket.getCount();
+      }
+    }
+    return this.maxValue;
+  }
+
+  public void addValue(Double value) {
+    Bucket bucket = buckets.get(this.getBucketIndex(value));
+    bucket.addValue(value);
+  }
+
+  public void addValue(Long value) {
+    Bucket bucket = buckets.get(this.getBucketIndex((double)value));
+    bucket.addValue((double)value);
+  }
+
+  public class Bucket {
+    private Double sum;
+    private int count;
+    private Double minValue;
+    private Double maxValue;
+    private Double startValue;
+    private Double endValue;
+    public Bucket(Double startValue, Double endValue) {
+      this.sum = 0.0;
+      this.count = 0;
+      this.minValue = endValue;
+      this.maxValue = startValue;
+      this.startValue = startValue;
+      this.endValue = endValue;
+    }
+
+    public void addValue(Double value) {
+      this.sum = this.sum + value;
+      count++;
+      this.minValue = Math.min(this.minValue, value);
+      this.maxValue = Math.max(this.maxValue, value);
+    }
+
+    /*
+     * This function gives the count of the number of items in the bucket
+     * which are smaller than the given value;
+     * For the purpose of this calculation, the distribution over the bucket
+     * is assumed to be uniformly distributed between minValue and maxValue
+     */
+    public int getGreaterCount(Double value) {
+      if (!(this.minValue < value && this.maxValue >= value)) {
+        throw new UnsupportedOperationException();
+      }
+      if (this.count == 0) return 0;
+      else if (this.count == 1) {
+        if (this.minValue > value) return 0;
+        else return 1;
+      }
+      Double gap = value - this.minValue;
+      Double ret = this.count * gap / (this.maxValue - this.minValue);
+      return ret.intValue();
+    }
+
+    /*
+     * This function gives the value which is more than a certain count in
+     * this bucket.
+     * */
+    public Double getGreaterValue(int count) {
+      if (count > this.count) {
+        throw new UnsupportedOperationException();
+      }
+      if (count == 0) return this.endValue;
+      Double gap = this.maxValue - this.minValue;
+      Double ret = this.minValue + gap * count / this.count;
+      return ret;
+    }
+
+    public Double getSum() {
+      return this.sum;
+    }
+
+    public int getCount() {
+      return this.count;
+    }
+
+    public Double getMinValue() {
+      return this.minValue;
+    }
+
+    public Double getMaxValue() {
+      return this.maxValue;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder s = new StringBuilder();
+      s.append("Bucket Details :");
+      s.append(" count : " + this.count);
+      s.append(" sum : " + this.sum);
+      s.append(" minValue : " + this.minValue);
+      s.append(" maxValue : " + this.maxValue);
+      s.append(" startValue : " + this.startValue);
+      s.append(" endValue : " + this.endValue);
+      return s.toString();
+    }
+  }
+}

Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+public class HistogramTest extends TestCase{
+
+  @Test
+  public void testAboveMaxValue() {
+    Histogram hist = new Histogram(1000, 100.0, 10000.0);
+    for (int i=0; i<100; i++) {
+      Double tmp = i * 1.0;
+      hist.addValue(tmp);
+    }
+    Double prcntyl = hist.getPercentileEstimate(95.0);
+    assertTrue(prcntyl >= 94 && prcntyl <= 96);
+  }
+  @Test
+  public void testBelowMinValue() {
+    Histogram hist = new Histogram(1000, 100.0, 10000.0);
+    for (int i=0; i<100; i++) {
+      Double tmp = i * 1.0;
+      hist.addValue(tmp);
+    }
+    Double prcntyl = hist.getPercentileEstimate(95.0);
+    assertTrue(prcntyl >= 94 && prcntyl <= 96);
+  }
+
+  @Test
+  public void testHistogram() {
+    Histogram hist = new Histogram(1000, 0.0, 10000.0);
+    for (int i=1; i<10000; i++) {
+      Double tmp = i * 1.0;
+      hist.addValue(tmp);
+    }
+    Double prcntyl = hist.getPercentileEstimate(99.0);
+    assertTrue(prcntyl >= 9890 && prcntyl <= 9910);
+
+    prcntyl = hist.getPercentileEstimate(95.0);
+    assertTrue(prcntyl >= 9490 && prcntyl <= 9510);
+  }
+
+  @Test
+  public void testHistogramExtremeValues() {
+    Histogram hist = new Histogram(100, 0.0, 100.0);
+    for (int i=1; i<100; i++) {
+      Double tmp = i - 0.1;
+      hist.addValue(tmp);
+    }
+    hist.addValue(100.1);
+    hist.addValue(100.2);
+    Double prcntyl = hist.getPercentileEstimate(99.9999999999);
+    assertTrue(prcntyl <= 100.2);
+
+    hist = new Histogram(100, 10.0, 100.0);
+    for (int i=11; i<100; i++) {
+      Double tmp = i - 0.1;
+      hist.addValue(tmp);
+    }
+    hist.addValue(5.1);
+    hist.addValue(9.1);
+    prcntyl = hist.getPercentileEstimate(0.0);
+    assertTrue(prcntyl >= 5.1);
+  }
+}