You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/04/10 20:19:32 UTC
svn commit: r1466607 - in /hbase/branches/0.89-fb/src:
main/java/org/apache/hadoop/hbase/
main/java/org/apache/hadoop/hbase/io/hfile/
main/java/org/apache/hadoop/hbase/regionserver/
main/java/org/apache/hadoop/hbase/regionserver/metrics/ main/java/org/...
Author: liyin
Date: Wed Apr 10 18:19:32 2013
New Revision: 1466607
URL: http://svn.apache.org/r1466607
Log:
[HBASE-8307] Adding a Histogram utility which captures data and creates a histogram
Author: manukranthk
Summary: This diff is to create a Histogram class which can be used to find the Percentile estimates of metrics in HBase. We can use this utility to calculate P99/P95 metrics of various metrics like latency etc.
Test Plan: Unit Testing
Reviewers: liyintang, rshroff, adela
Reviewed By: liyintang
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D715751
Task ID: 2117219
Added:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Apr 10 18:19:32 2013
@@ -661,6 +661,22 @@ public final class HConstants {
public static final String HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS
= "hbase.master.assignment.load.balancer.waittime.ms";
public static final int DEFAULT_HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS = 60000;
+
+ /*
+ * This defines the number of buckets used for computing the histogram of
+ * pread latency.
+ */
+ public static final String PREAD_LATENCY_HISTOGRAM_NUM_BUCKETS =
+ "hbase.histogrambasedmetric.numbuckets.preadlatency";
+
+ /*
+ * This defines the number of buckets used for computing the histogram of
+ * pread latency during compaction.
+ */
+ public static final String PREAD_COMPACTION_LATENCY_HISTOGRAM_NUM_BUCKETS =
+ "hbase.histogrambasedmetric.numbuckets.preadcompactionlatency";
+ public static final String HISTOGRAM_BASED_METRICS_WINDOW =
+ "hbase.histogrambasedmetric.window";
private HConstants() {
// Can't be instantiated with this constructor.
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Wed Apr 10 18:19:32 2013
@@ -46,10 +46,12 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueContext;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.regionserver.metrics.PercentileMetric;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.SchemaAware;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Histogram;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
@@ -170,7 +172,15 @@ public class HFile {
//For measuring latency of pread during compactions
static final AtomicInteger preadCompactionOps = new AtomicInteger();
static final AtomicLong preadCompactionTimeNano = new AtomicLong();
-
+ // These outliers measure in nano seconds.
+ public static final Histogram preadHistogram = new Histogram(
+ PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT,
+ PercentileMetric.HISTOGRAM_MINVALUE_DEFAULT,
+ PercentileMetric.HISTOGRAM_MAXVALUE_DEFAULT);
+ public static final Histogram preadCompactionHistogram = new Histogram(
+ PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT,
+ PercentileMetric.HISTOGRAM_MINVALUE_DEFAULT,
+ PercentileMetric.HISTOGRAM_MAXVALUE_DEFAULT);
/**
* Get the number of positional read operations during compaction
* and reset it to zero.
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java Wed Apr 10 18:19:32 2013
@@ -247,6 +247,7 @@ public class HFileReaderV1 extends Abstr
long delta = System.nanoTime() - startTimeNs;
HFile.preadTimeNano.addAndGet(delta);
HFile.preadOps.incrementAndGet();
+ HFile.preadHistogram.addValue(delta);
getSchemaMetrics().updateOnCacheMiss(BlockCategory.META, false, delta);
// Cache the block
@@ -327,9 +328,11 @@ public class HFileReaderV1 extends Abstr
long delta = System.nanoTime() - startTimeNs;
if (isCompaction) {
HFile.preadCompactionTimeNano.addAndGet(delta);
+ HFile.preadCompactionHistogram.addValue(delta);
HFile.preadCompactionOps.incrementAndGet();
} else {
HFile.preadTimeNano.addAndGet(delta);
+ HFile.preadHistogram.addValue(delta);
HFile.preadOps.incrementAndGet();
}
getSchemaMetrics().updateOnCacheMiss(BlockCategory.DATA, isCompaction,
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Wed Apr 10 18:19:32 2013
@@ -224,6 +224,7 @@ public class HFileReaderV2 extends Abstr
long deltaNs = System.nanoTime() - startTimeNs;
HFile.preadTimeNano.addAndGet(deltaNs);
+ HFile.preadHistogram.addValue(deltaNs);
HFile.preadOps.incrementAndGet();
getSchemaMetrics().updateOnCacheMiss(BlockCategory.META, false,
TimeUnit.NANOSECONDS.toMillis(deltaNs));
@@ -316,9 +317,11 @@ public class HFileReaderV2 extends Abstr
if (isCompaction) {
HFile.preadCompactionTimeNano.addAndGet(deltaNs);
+ HFile.preadCompactionHistogram.addValue(deltaNs);
HFile.preadCompactionOps.incrementAndGet();
} else {
HFile.preadTimeNano.addAndGet(deltaNs);
+ HFile.preadHistogram.addValue(deltaNs);
HFile.preadOps.incrementAndGet();
}
if (kvContext != null) {
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Apr 10 18:19:32 2013
@@ -1050,7 +1050,7 @@ public class HRegionServer implements HR
this.setNumHDFSQuorumReadThreads(parallelHDFSReadPoolSize);
// Init in here rather than in constructor after thread name has been set
- this.metrics = new RegionServerMetrics();
+ this.metrics = new RegionServerMetrics(this.conf);
this.dynamicMetrics = RegionServerDynamicMetrics.newInstance(this);
startServiceThreads();
isOnline = true;
Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.metrics;
+import org.apache.hadoop.hbase.util.Histogram;
+import org.apache.hadoop.metrics.util.MetricsLongValue;
+import org.apache.hadoop.metrics.util.MetricsRegistry;
+
+/*
+ * Used the org.apache.hadoop.hbase.util.Histogram to maintain time varying
+ * metrics. The histogram class can provide various approximate details about
+ * a stream of data supplied to the PercentileMetric without actually
+ * storing the data.
+ */
+public class PercentileMetric extends MetricsLongValue{
+ public static final int HISTOGRAM_NUM_BUCKETS_DEFAULT = 20;
+ public static final double HISTOGRAM_MINVALUE_DEFAULT = 0.0;
+ public static final double HISTOGRAM_MAXVALUE_DEFAULT = 1000000000.0;
+ public static final double DEFAULT_PERCENTILE = 99.0;
+ public static final long DEFAULT_SAMPLE_WINDOW = 60;
+ public static final double P99 = 99.0;
+
+ private int numBuckets;
+ private double percentile;
+ private Histogram underlyingHistogram;
+
+ /*
+ * This constructor provides a way to create a HistogramMetric which uses a
+ * Histogram to maintain the statistics of a metric stream.
+ */
+ public PercentileMetric(final String nam, final MetricsRegistry registry,
+ Histogram histogram) {
+ super(nam, registry);
+ underlyingHistogram = histogram;
+ }
+
+ /*
+ * The histogram which has the values updated.
+ */
+ public void setHistogram(final Histogram hist) {
+ this.underlyingHistogram = hist;
+ }
+
+ /*
+ * numBuckets : This denotes the number of buckets used to sample the data.
+ * the updateMetric and refresh calls will run in O(numBuckets).
+ */
+ public void setNumBuckets(final int numBuckets) {
+ this.numBuckets = numBuckets;
+ }
+
+ /*
+ * percentile : The percentile estimate of the metric that will be seeked
+ * using this metric. The value should be between 0 and 100,
+ * else it will throw and exception.
+ */
+ public void setPercentile(final double prcntyl) {
+ this.percentile = prcntyl;
+ }
+
+ public double getValue() {
+ return this.get();
+ }
+
+ public void updateMetric() {
+ this.set(underlyingHistogram.getPercentileEstimate(percentile).longValue());
+ }
+
+ public void refresh() {
+ underlyingHistogram.refresh(this.numBuckets);
+ }
+}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java?rev=1466607&r1=1466606&r2=1466607&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java Wed Apr 10 18:19:32 2013
@@ -21,6 +21,8 @@ package org.apache.hadoop.hbase.regionse
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.metrics.HBaseInfo;
import org.apache.hadoop.hbase.metrics.MetricsRate;
@@ -57,6 +59,7 @@ public class RegionServerMetrics impleme
private final MetricsRecord metricsRecord;
private long lastUpdate = System.currentTimeMillis();
private long lastExtUpdate = System.currentTimeMillis();
+ private long lastHistUpdate = System.currentTimeMillis();
private long extendedPeriod = 0;
private static final int MB = 1024*1024;
private MetricsRegistry registry = new MetricsRegistry();
@@ -173,6 +176,13 @@ public class RegionServerMetrics impleme
new MetricsTimeVaryingRate("fsReadLatency", registry);
/**
+ * filesystem p99 read latency outlier for positional read operations
+ */
+ public final PercentileMetric fsReadLatencyP99 =
+ new PercentileMetric("fsReadLatencyP99", registry,
+ HFile.preadHistogram);
+
+ /**
* filesystem read latency for positional read operations during
* compactions
*/
@@ -180,6 +190,14 @@ public class RegionServerMetrics impleme
new MetricsTimeVaryingRate("fsCompactionReadLatency", registry);
/**
+ * filesystem p99 read latency outlier for positional read operations during
+ * compactions
+ */
+ public final PercentileMetric fsCompactionReadLatencyP99 =
+ new PercentileMetric("fsReadCompactionLatencyP99", registry,
+ HFile.preadCompactionHistogram);
+
+ /**
* filesystem write latency
*/
public final MetricsTimeVaryingRate fsWriteLatency =
@@ -264,7 +282,12 @@ public class RegionServerMetrics impleme
public final MetricsLongValue quorumReadsExecutedInCurThread =
new MetricsLongValue("quorumReadsExecutedInCurThread", registry);
- public RegionServerMetrics() {
+ // The histogram metrics are updated every histogramMetricWindow seconds
+ private long histogramMetricWindow = 240;
+
+ public final Configuration conf;
+
+ public RegionServerMetrics(Configuration conf) {
MetricsContext context = MetricsUtil.getContext("hbase");
metricsRecord = MetricsUtil.createRecord(context, "regionserver");
String name = Thread.currentThread().getName();
@@ -288,9 +311,27 @@ public class RegionServerMetrics impleme
LOG.info("Couldn't load ContextFactory for Metrics config info");
}
+ this.conf = conf;
+ // Initializing the HistogramBasedMetrics here since
+ // they will be needing conf
+ initializeHistogramBasedMetrics();
LOG.info("Initialized");
}
+ private void initializeHistogramBasedMetrics() {
+ histogramMetricWindow = 1000 *
+ conf.getLong(HConstants.HISTOGRAM_BASED_METRICS_WINDOW,
+ PercentileMetric.DEFAULT_SAMPLE_WINDOW);
+ fsReadLatencyP99.setNumBuckets(
+ conf.getInt(HConstants.PREAD_LATENCY_HISTOGRAM_NUM_BUCKETS,
+ PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT));
+ fsReadLatencyP99.setPercentile(PercentileMetric.P99);
+ fsCompactionReadLatencyP99.setNumBuckets(conf.getInt(
+ HConstants.PREAD_COMPACTION_LATENCY_HISTOGRAM_NUM_BUCKETS,
+ PercentileMetric.HISTOGRAM_NUM_BUCKETS_DEFAULT));
+ fsCompactionReadLatencyP99.setPercentile(99.0);
+ }
+
public void shutdown() {
if (statistics != null)
statistics.shutdown();
@@ -311,6 +352,11 @@ public class RegionServerMetrics impleme
this.lastExtUpdate = this.lastUpdate;
this.resetAllMinMax();
}
+ if (this.histogramMetricWindow > 0 &&
+ ((this.lastUpdate - this.lastHistUpdate) >= this.histogramMetricWindow)) {
+ this.lastHistUpdate = this.lastUpdate;
+ this.resetAllHistogramBasedMetrics();
+ }
this.stores.pushMetric(this.metricsRecord);
this.storefiles.pushMetric(this.metricsRecord);
@@ -357,6 +403,13 @@ public class RegionServerMetrics impleme
HFile.getPreadCompactionOpsAndReset(),
HFile.getPreadCompactionTimeMsAndReset());
+ try {
+ fsReadLatencyP99.updateMetric();
+ fsCompactionReadLatencyP99.updateMetric();
+ } catch (UnsupportedOperationException e) {
+ LOG.error("Exception in Histogram based metric : " + e.getMessage());
+ }
+
/* NOTE: removed HFile write latency. 2 reasons:
* 1) Mixing HLog latencies are far higher priority since they're
* on-demand and HFile is used in background (compact/flush)
@@ -372,6 +425,8 @@ public class RegionServerMetrics impleme
}
// push the result
+ this.fsReadLatencyP99.pushMetric(this.metricsRecord);
+ this.fsCompactionReadLatencyP99.pushMetric(this.metricsRecord);
this.fsReadLatency.pushMetric(this.metricsRecord);
this.fsCompactionReadLatency.pushMetric(this.metricsRecord);
this.fsWriteLatency.pushMetric(this.metricsRecord);
@@ -403,6 +458,11 @@ public class RegionServerMetrics impleme
this.metricsRecord.update();
}
+ private void resetAllHistogramBasedMetrics() {
+ this.fsReadLatencyP99.refresh();
+ this.fsCompactionReadLatencyP99.refresh();
+ }
+
/**
* Increment the given latency metric using the number of operations and total read time
* obtained from HFile.
Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Histogram {
+ private List<Bucket> buckets;
+ private int numBuckets;
+ private Double minValue;
+ private Double maxValue;
+
+ // Bucket indexing is from 1 to N
+ public Histogram(int numBuckets, Double minValue, Double maxValue) {
+ if (numBuckets < 1 || minValue >= maxValue) {
+ throw new UnsupportedOperationException();
+ }
+ buckets = new ArrayList<Bucket>(numBuckets);
+ refresh(numBuckets, minValue, maxValue);
+ }
+
+ // This is included in the bucket
+ private Double getBucketStartValue(int bucketIndex) {
+ if (bucketIndex < 1 || bucketIndex > this.numBuckets) {
+ throw new IndexOutOfBoundsException();
+ }
+ Double gap = this.maxValue - this.minValue;
+ Double slice = gap/this.numBuckets;
+ return this.minValue + (bucketIndex - 1.0)*slice;
+ }
+
+ //This is not included in the bucket
+ private Double getBucketEndValue(int bucketIndex) {
+ if (bucketIndex < 1 || bucketIndex > this.numBuckets) {
+ throw new IndexOutOfBoundsException();
+ }
+ Double gap = this.maxValue - this.minValue;
+ Double slice = gap/this.numBuckets;
+ return this.minValue + (bucketIndex)*slice;
+ }
+
+ private int getBucketIndex(Double value) {
+ if (value < this.minValue) {
+ return 0;
+ } else if (value >= this.maxValue) {
+ return this.numBuckets + 1;
+ } else {
+ Double gap = value - this.minValue;
+ Double idx = this.numBuckets * gap / (this.maxValue - this.minValue);
+ int i = idx.intValue() + 1;
+ // Check if the index falls in the margin somehow.
+ if (value < this.getBucketStartValue(i)) i--;
+ else if (value >= this.getBucketEndValue(i)) i++;
+ return i; // Due to 1 indexing
+ }
+ }
+
+ public void refresh(int numBuckets) {
+ Double minValue = this.minValue;
+ Double maxValue = this.maxValue;
+ for (Bucket bucket : this.buckets) {
+ if (bucket.count > 0) {
+ minValue = bucket.getMinValue();
+ break;
+ }
+ }
+ for (int i = this.buckets.size() - 1; i>=0; i--) {
+ Bucket bucket = this.buckets.get(i);
+ if (bucket.count > 0) {
+ maxValue = bucket.getMaxValue();
+ break;
+ }
+ }
+ this.refresh(numBuckets, minValue, maxValue);
+ }
+
+ private void refresh(int numBuckets, Double minValue, Double maxValue) {
+ this.numBuckets = numBuckets;
+ this.minValue = minValue;
+ this.maxValue = maxValue;
+ this.buckets.clear();
+ buckets.add(new Bucket(Double.MIN_VALUE, this.getBucketStartValue(1)));
+ for (int i = 1; i<=this.numBuckets; i++) {
+ this.buckets.add(new Bucket(this.getBucketStartValue(i),
+ this.getBucketEndValue(i)));
+ }
+ buckets.add(new Bucket(this.getBucketEndValue(this.numBuckets),
+ Double.MAX_VALUE));
+ }
+
+ public Double getPercentileEstimate(Double prcntyl) {
+ // We scan from the end of the table since our use case is to find the
+ // p99, p95 latencies.
+ if (prcntyl > 100.0 || prcntyl < 0.0) {
+ throw new UnsupportedOperationException("Percentile input value not in range.");
+ } else {
+ prcntyl = 100.0 - prcntyl;
+ }
+ int totalCount = 0;
+ for (Bucket bucket : this.buckets) {
+ totalCount += bucket.count;
+ }
+ if (totalCount == 0) {
+ throw new UnsupportedOperationException("Too few data points.");
+ }
+ Double countToCoverDouble = (totalCount * prcntyl / 100.0);
+ int countToCover = countToCoverDouble.intValue();
+ for (int i=this.buckets.size() - 1; i >= 0; i--) {
+ Bucket bucket = this.buckets.get(i);
+ if (bucket.getCount() >= countToCover) {
+ return bucket.getGreaterValue(bucket.getCount() - countToCover);
+ } else {
+ countToCover -= bucket.getCount();
+ }
+ }
+ return this.maxValue;
+ }
+
+ public void addValue(Double value) {
+ Bucket bucket = buckets.get(this.getBucketIndex(value));
+ bucket.addValue(value);
+ }
+
+ public void addValue(Long value) {
+ Bucket bucket = buckets.get(this.getBucketIndex((double)value));
+ bucket.addValue((double)value);
+ }
+
+ public class Bucket {
+ private Double sum;
+ private int count;
+ private Double minValue;
+ private Double maxValue;
+ private Double startValue;
+ private Double endValue;
+ public Bucket(Double startValue, Double endValue) {
+ this.sum = 0.0;
+ this.count = 0;
+ this.minValue = endValue;
+ this.maxValue = startValue;
+ this.startValue = startValue;
+ this.endValue = endValue;
+ }
+
+ public void addValue(Double value) {
+ this.sum = this.sum + value;
+ count++;
+ this.minValue = Math.min(this.minValue, value);
+ this.maxValue = Math.max(this.maxValue, value);
+ }
+
+ /*
+ * This function gives the count of the number of items in the bucket
+ * which are smaller than the given value;
+ * For the purpose of this calculation, the distribution over the bucket
+ * is assumed to be uniformly distributed between minValue and maxValue
+ */
+ public int getGreaterCount(Double value) {
+ if (!(this.minValue < value && this.maxValue >= value)) {
+ throw new UnsupportedOperationException();
+ }
+ if (this.count == 0) return 0;
+ else if (this.count == 1) {
+ if (this.minValue > value) return 0;
+ else return 1;
+ }
+ Double gap = value - this.minValue;
+ Double ret = this.count * gap / (this.maxValue - this.minValue);
+ return ret.intValue();
+ }
+
+ /*
+ * This function gives the value which is more than a certain count in
+ * this bucket.
+ * */
+ public Double getGreaterValue(int count) {
+ if (count > this.count) {
+ throw new UnsupportedOperationException();
+ }
+ if (count == 0) return this.endValue;
+ Double gap = this.maxValue - this.minValue;
+ Double ret = this.minValue + gap * count / this.count;
+ return ret;
+ }
+
+ public Double getSum() {
+ return this.sum;
+ }
+
+ public int getCount() {
+ return this.count;
+ }
+
+ public Double getMinValue() {
+ return this.minValue;
+ }
+
+ public Double getMaxValue() {
+ return this.maxValue;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder s = new StringBuilder();
+ s.append("Bucket Details :");
+ s.append(" count : " + this.count);
+ s.append(" sum : " + this.sum);
+ s.append(" minValue : " + this.minValue);
+ s.append(" maxValue : " + this.maxValue);
+ s.append(" startValue : " + this.startValue);
+ s.append(" endValue : " + this.endValue);
+ return s.toString();
+ }
+ }
+}
Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java?rev=1466607&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java Wed Apr 10 18:19:32 2013
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+public class HistogramTest extends TestCase{
+
+ @Test
+ public void testAboveMaxValue() {
+ Histogram hist = new Histogram(1000, 100.0, 10000.0);
+ for (int i=0; i<100; i++) {
+ Double tmp = i * 1.0;
+ hist.addValue(tmp);
+ }
+ Double prcntyl = hist.getPercentileEstimate(95.0);
+ assertTrue(prcntyl >= 94 && prcntyl <= 96);
+ }
+ @Test
+ public void testBelowMinValue() {
+ Histogram hist = new Histogram(1000, 100.0, 10000.0);
+ for (int i=0; i<100; i++) {
+ Double tmp = i * 1.0;
+ hist.addValue(tmp);
+ }
+ Double prcntyl = hist.getPercentileEstimate(95.0);
+ assertTrue(prcntyl >= 94 && prcntyl <= 96);
+ }
+
+ @Test
+ public void testHistogram() {
+ Histogram hist = new Histogram(1000, 0.0, 10000.0);
+ for (int i=1; i<10000; i++) {
+ Double tmp = i * 1.0;
+ hist.addValue(tmp);
+ }
+ Double prcntyl = hist.getPercentileEstimate(99.0);
+ assertTrue(prcntyl >= 9890 && prcntyl <= 9910);
+
+ prcntyl = hist.getPercentileEstimate(95.0);
+ assertTrue(prcntyl >= 9490 && prcntyl <= 9510);
+ }
+
+ @Test
+ public void testHistogramExtremeValues() {
+ Histogram hist = new Histogram(100, 0.0, 100.0);
+ for (int i=1; i<100; i++) {
+ Double tmp = i - 0.1;
+ hist.addValue(tmp);
+ }
+ hist.addValue(100.1);
+ hist.addValue(100.2);
+ Double prcntyl = hist.getPercentileEstimate(99.9999999999);
+ assertTrue(prcntyl <= 100.2);
+
+ hist = new Histogram(100, 10.0, 100.0);
+ for (int i=11; i<100; i++) {
+ Double tmp = i - 0.1;
+ hist.addValue(tmp);
+ }
+ hist.addValue(5.1);
+ hist.addValue(9.1);
+ prcntyl = hist.getPercentileEstimate(0.0);
+ assertTrue(prcntyl >= 5.1);
+ }
+}