You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by br...@apache.org on 2011/02/24 22:15:22 UTC
svn commit: r1074291 - in /cassandra/branches/cassandra-0.7:
src/java/org/apache/cassandra/db/ src/java/org/apache/cassandra/tools/
src/java/org/apache/cassandra/utils/ test/unit/org/apache/cassandra/utils/
Author: brandonwilliams
Date: Thu Feb 24 21:15:22 2011
New Revision: 1074291
URL: http://svn.apache.org/viewvc?rev=1074291&view=rev
Log:
Cleanup and document EstimatedHistogram
Patch by jbellis, reviewed by brandonwilliams for CASSANDRA-2232
Modified:
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/tools/NodeCmd.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/LatencyTracker.java
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1074291&r1=1074290&r2=1074291&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Thu Feb 24 21:15:22 2011
@@ -410,7 +410,7 @@ public class ColumnFamilyStore implement
long count = 0;
for (SSTableReader sstable : ssTables)
{
- sum += sstable.getEstimatedRowSize().median();
+ sum += sstable.getEstimatedRowSize().mean();
count++;
}
return count > 0 ? sum / count : 0;
@@ -422,7 +422,7 @@ public class ColumnFamilyStore implement
int count = 0;
for (SSTableReader sstable : ssTables)
{
- sum += sstable.getEstimatedColumnCount().median();
+ sum += sstable.getEstimatedColumnCount().mean();
count++;
}
return count > 0 ? (int) (sum / count) : 0;
@@ -1023,12 +1023,12 @@ public class ColumnFamilyStore implement
public long[] getRecentSSTablesPerReadHistogram()
{
- return recentSSTablesPerRead.get(true);
+ return recentSSTablesPerRead.getBuckets(true);
}
public long[] getSSTablesPerReadHistogram()
{
- return sstablesPerRead.get(false);
+ return sstablesPerRead.getBuckets(false);
}
public long getReadCount()
@@ -1984,7 +1984,7 @@ public class ColumnFamilyStore implement
for (SSTableReader sstable : ssTables)
{
- long[] rowSize = sstable.getEstimatedRowSize().get(false);
+ long[] rowSize = sstable.getEstimatedRowSize().getBuckets(false);
for (int i = 0; i < histogram.length; i++)
histogram[i] += rowSize[i];
@@ -1999,7 +1999,7 @@ public class ColumnFamilyStore implement
for (SSTableReader sstable : ssTables)
{
- long[] columnSize = sstable.getEstimatedColumnCount().get(false);
+ long[] columnSize = sstable.getEstimatedColumnCount().getBuckets(false);
for (int i = 0; i < histogram.length; i++)
histogram[i] += columnSize[i];
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/tools/NodeCmd.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/tools/NodeCmd.java?rev=1074291&r1=1074290&r2=1074291&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/tools/NodeCmd.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/tools/NodeCmd.java Thu Feb 24 21:15:22 2011
@@ -441,7 +441,7 @@ public class NodeCmd {
ColumnFamilyStoreMBean store = this.probe.getCfsProxy(keySpace, columnFamily);
// default is 90 offsets
- long[] offsets = new EstimatedHistogram(90).getBucketOffsets();
+ long[] offsets = new EstimatedHistogram().getBucketOffsets();
long[] rrlh = store.getRecentReadLatencyHistogramMicros();
long[] rwlh = store.getRecentWriteLatencyHistogramMicros();
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/EstimatedHistogram.java?rev=1074291&r1=1074290&r2=1074291&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/EstimatedHistogram.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/EstimatedHistogram.java Thu Feb 24 21:15:22 2011
@@ -28,23 +28,24 @@ import org.apache.cassandra.io.ICompactS
public class EstimatedHistogram
{
+ public static EstimatedHistogramSerializer serializer = new EstimatedHistogramSerializer();
/**
* The series of values to which the counts in `buckets` correspond:
- * 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 18, 22, etc.
+ * 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc.
* Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4.
*
* The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1
* to around 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to
* 36 seconds, with less precision as the numbers get larger.
+ *
+ * Each bucket represents values from (previous bucket offset, current offset].
*/
private long[] bucketOffsets;
- private int numBuckets;
+ // buckets is one element longer than bucketOffsets -- the last element is values greater than the last offset
final AtomicLongArray buckets;
- public static EstimatedHistogramSerializer serializer = new EstimatedHistogramSerializer();
-
public EstimatedHistogram()
{
this(90);
@@ -53,7 +54,7 @@ public class EstimatedHistogram
public EstimatedHistogram(int bucketCount)
{
makeOffsets(bucketCount);
- buckets = new AtomicLongArray(numBuckets);
+ buckets = new AtomicLongArray(bucketOffsets.length + 1);
}
public EstimatedHistogram(long[] offsets, long[] bucketData)
@@ -61,7 +62,6 @@ public class EstimatedHistogram
assert bucketData.length == offsets.length +1;
bucketOffsets = offsets;
buckets = new AtomicLongArray(bucketData);
- numBuckets = bucketData.length;
}
private void makeOffsets(int size)
@@ -69,7 +69,7 @@ public class EstimatedHistogram
bucketOffsets = new long[size];
long last = 1;
bucketOffsets[0] = last;
- for(int i = 1; i < size; i++)
+ for (int i = 1; i < size; i++)
{
long next = Math.round(last * 1.2);
if (next == last)
@@ -77,58 +77,79 @@ public class EstimatedHistogram
bucketOffsets[i] = next;
last = next;
}
- numBuckets = bucketOffsets.length + 1;
}
+ /**
+ * @return the histogram values corresponding to each bucket index
+ */
public long[] getBucketOffsets()
{
return bucketOffsets;
}
-
+
+ /**
+ * Increments the count of the bucket closest to n, rounding UP.
+ * @param n
+ */
public void add(long n)
{
int index = Arrays.binarySearch(bucketOffsets, n);
if (index < 0)
{
- //inexact match, find closest bucket
+ // inexact match, take the first bucket higher than n
index = -index - 1;
}
- else
- {
- //exact match, so we want the next highest one
- index += 1;
- }
+ // else exact match; we're good
buckets.incrementAndGet(index);
}
- public long[] get(boolean reset)
+ /**
+ * @return the count in the given bucket
+ */
+ long get(int bucket)
+ {
+ return buckets.get(bucket);
+ }
+
+ /**
+ * @param reset: zero out buckets afterwards if true
+ * @return a long[] containing the current histogram buckets
+ */
+ public long[] getBuckets(boolean reset)
{
- long[] rv = new long[numBuckets];
- for (int i = 0; i < numBuckets; i++)
+ long[] rv = new long[buckets.length()];
+ for (int i = 0; i < buckets.length(); i++)
rv[i] = buckets.get(i);
if (reset)
- for (int i = 0; i < numBuckets; i++)
+ for (int i = 0; i < buckets.length(); i++)
buckets.set(i, 0L);
return rv;
}
+ /**
+ * @return the smallest value that could have been added to this histogram
+ */
public long min()
{
- for (int i = 0; i < numBuckets; i++)
+ for (int i = 0; i < buckets.length(); i++)
{
if (buckets.get(i) > 0)
- return bucketOffsets[i == 0 ? 0 : i - 1];
+ return i == 0 ? 0 : 1 + bucketOffsets[i - 1];
}
return 0;
}
+ /**
+ * @return the largest value that could have been added to this histogram. If the histogram
+ * overflowed, returns Long.MAX_VALUE.
+ */
public long max()
{
- int lastBucket = numBuckets - 1;
+ int lastBucket = buckets.length() - 1;
if (buckets.get(lastBucket) > 0)
- throw new IllegalStateException("Unable to compute ceiling for max when all buckets are full");
+ return Long.MAX_VALUE;
for (int i = lastBucket - 1; i >= 0; i--)
{
@@ -138,20 +159,33 @@ public class EstimatedHistogram
return 0;
}
- public long median()
+ /**
+ * @return the mean histogram value (average of bucket offsets, weighted by count)
+ * @throws IllegalStateException if any values were greater than the largest bucket threshold
+ */
+ public long mean()
{
- long max = 0;
- long median = 0;
- for (int i = 0; i < numBuckets; i++)
+ int lastBucket = buckets.length() - 1;
+ if (buckets.get(lastBucket) > 0)
+ throw new IllegalStateException("Unable to compute ceiling for max when histogram overflowed");
+
+ long elements = 0;
+ long sum = 0;
+ for (int i = 0; i < lastBucket; i++)
{
- if (max < 1 || buckets.get(i) > max)
- {
- max = buckets.get(i);
- if (max > 0)
- median = bucketOffsets[i == 0 ? 0 : i - 1];
- }
+ elements += buckets.get(i);
+ sum += buckets.get(i) * bucketOffsets[i];
}
- return median;
+
+ return (long) Math.ceil((double) sum / elements);
+ }
+
+ /**
+ * @return true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was added
+ */
+ public boolean isOverflowed()
+ {
+ return buckets.get(buckets.length() - 1) > 0;
}
public static class EstimatedHistogramSerializer implements ICompactSerializer<EstimatedHistogram>
@@ -159,7 +193,7 @@ public class EstimatedHistogram
public void serialize(EstimatedHistogram eh, DataOutputStream dos) throws IOException
{
long[] offsets = eh.getBucketOffsets();
- long[] buckets = eh.get(false);
+ long[] buckets = eh.getBuckets(false);
dos.writeInt(buckets.length);
for (int i = 0; i < buckets.length; i++)
{
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/LatencyTracker.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/LatencyTracker.java?rev=1074291&r1=1074290&r2=1074291&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/LatencyTracker.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/LatencyTracker.java Thu Feb 24 21:15:22 2011
@@ -76,11 +76,11 @@ public class LatencyTracker
public long[] getTotalLatencyHistogramMicros()
{
- return totalHistogram.get(false);
+ return totalHistogram.getBuckets(false);
}
public long[] getRecentLatencyHistogramMicros()
{
- return recentHistogram.get(true);
+ return recentHistogram.getBuckets(true);
}
}
Modified: cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java?rev=1074291&r1=1074290&r2=1074291&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java (original)
+++ cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java Thu Feb 24 21:15:22 2011
@@ -26,28 +26,49 @@ import static org.junit.Assert.*;
public class EstimatedHistogramTest
{
@Test
- public void testFindingCorrectBuckets()
+ public void testSimple()
{
+ // 0 and 1 map to the same, first bucket
EstimatedHistogram histogram = new EstimatedHistogram();
+ histogram.add(0);
+ assertEquals(1, histogram.get(0));
+ histogram.add(1);
+ assertEquals(2, histogram.get(0));
+ }
- histogram.add(0L);
- assertEquals(1, histogram.get(false)[0]);
+ @Test
+ public void testOverflow()
+ {
+ EstimatedHistogram histogram = new EstimatedHistogram(1);
+ histogram.add(100);
+ assert histogram.isOverflowed();
+ assertEquals(Long.MAX_VALUE, histogram.max());
+ }
- histogram.add(23282687);
- assertEquals(1, histogram.get(false)[histogram.buckets.length() - 2]);
+ @Test
+ public void testMinMax()
+ {
+ EstimatedHistogram histogram = new EstimatedHistogram();
+ histogram.add(16);
+ assertEquals(15, histogram.min());
+ assertEquals(17, histogram.max());
+ }
- histogram.add(1);
- assertEquals(1, histogram.get(false)[1]);
+ @Test
+ public void testFindingCorrectBuckets()
+ {
+ EstimatedHistogram histogram = new EstimatedHistogram();
+ histogram.add(23282687);
+ assert !histogram.isOverflowed();
+ assertEquals(1, histogram.getBuckets(false)[histogram.buckets.length() - 2]);
histogram.add(9);
- assertEquals(1, histogram.get(false)[8]);
+ assertEquals(1, histogram.getBuckets(false)[8]);
histogram.add(20);
histogram.add(21);
histogram.add(22);
- assertEquals(3, histogram.get(false)[13]);
- assertEquals(1, histogram.min());
- assertEquals(25109160, histogram.max());
- assertEquals(20, histogram.median());
+ assertEquals(2, histogram.getBuckets(false)[13]);
+ assertEquals(5021848, histogram.mean());
}
}