You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ph...@apache.org on 2010/06/25 00:03:17 UTC
svn commit: r957730 - in /avro/trunk: CHANGES.txt
lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java
lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java
lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java
Author: philz
Date: Thu Jun 24 22:03:17 2010
New Revision: 957730
URL: http://svn.apache.org/viewvc?rev=957730&view=rev
Log:
AVRO-584. Update Histogram for Stats Plugin (Contributed by Patrick Wendell)
Added:
avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java
avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Jun 24 22:03:17 2010
@@ -23,6 +23,8 @@ Avro 1.4.0 (unreleased)
(Patrick Wendell via cutting)
IMPROVEMENTS
+ AVRO-584. Update Histogram for Stats Plugin
+ (Patrick Wendell via philz)
AVRO-501. missing function in C api to access array elements after
decoding an array. (Bruce Mitchener via massie)
Modified: avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java Thu Jun 24 22:03:17 2010
@@ -17,14 +17,18 @@
*/
package org.apache.avro.ipc.stats;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeMap;
/**
* Represents a histogram of values. This class uses a {@link Segmenter}
- * to determine which bucket to place a given value into.
+ * to determine which bucket to place a given value into. Also stores the last
+ * MAX_HISTORY_SIZE entries which have been added to this histogram, in order.
*
* Note that Histogram, by itself, is not synchronized.
* @param <B> Bucket type. Often String, since buckets are typically
@@ -32,9 +36,15 @@ import java.util.TreeMap;
* @param <T> Type of value
*/
class Histogram<B, T> {
+ /**
+ * How many recent additions we should track.
+ */
+ public static final int MAX_HISTORY_SIZE = 20;
+
private Segmenter<B, T> segmenter;
private int[] counts;
protected int totalCount;
+ private LinkedList<T> recentAdditions;
/**
* Interface to determine which bucket to place a value in.
@@ -57,6 +67,17 @@ class Histogram<B, T> {
* is consistent with the segment numbers.
*/
Iterator<B> getBuckets();
+
+ /**
+ * Returns a List of bucket boundaries. Useful for printing
+ * segmenters.
+ */
+ List<String> getBoundaryLabels();
+
+ /**
+ * Returns the bucket labels as an array;
+ */
+ List<String> getBucketLabels();
}
public static class SegmenterException extends RuntimeException {
@@ -95,20 +116,41 @@ class Histogram<B, T> {
private String rangeAsString(T a, T b) {
return String.format("[%s,%s)", a, b == null ? "infinity" : b);
}
-
+
+ @Override
+ public ArrayList<String> getBoundaryLabels() {
+ ArrayList<String> outArray = new ArrayList<String>(index.keySet().size());
+ for (T obj: index.keySet()) {
+ outArray.add(obj.toString());
+ }
+ return outArray;
+ }
+
+ @Override
+ public ArrayList<String> getBucketLabels() {
+ ArrayList<String> outArray = new ArrayList<String>(index.keySet().size());
+ Iterator<String> bucketsIt = this.getBuckets();
+ while (bucketsIt.hasNext()) {
+ outArray.add(bucketsIt.next());
+ }
+ return outArray;
+ }
+
@Override
public Iterator<String> getBuckets() {
return new Iterator<String>() {
Iterator<T> it = index.keySet().iterator();
T cur = it.next(); // there's always at least one element
-
+ int pos = 0;
+
@Override
public boolean hasNext() {
- return it.hasNext();
+ return (pos < index.keySet().size());
}
@Override
public String next() {
+ pos = pos + 1;
T left = cur;
cur = it.hasNext() ? it.next() : null;
return rangeAsString(left, cur);
@@ -117,7 +159,6 @@ class Histogram<B, T> {
@Override
public void remove() {
throw new UnsupportedOperationException();
-
}
};
}
@@ -129,6 +170,7 @@ class Histogram<B, T> {
public Histogram(Segmenter<B, T> segmenter) {
this.segmenter = segmenter;
this.counts = new int[segmenter.size()];
+ this.recentAdditions = new LinkedList<T>();
}
/** Tallies a value in the histogram. */
@@ -136,6 +178,10 @@ class Histogram<B, T> {
int i = segmenter.segment(value);
counts[i]++;
totalCount++;
+ if (this.recentAdditions.size() > Histogram.MAX_HISTORY_SIZE) {
+ this.recentAdditions.pollLast();
+ }
+ this.recentAdditions.push(value);
}
/**
@@ -144,11 +190,27 @@ class Histogram<B, T> {
public int[] getHistogram() {
return counts;
}
+
+ /**
+ * Returns the underlying segmenter used for this histogram.
+ */
+ public Segmenter<B, T> getSegmenter() {
+ return this.segmenter;
+ }
+
+ /**
+ * Returns values recently added to this histogram. These are in reverse
+ * order (most recent first).
+ */
+ public List<T> getRecentAdditions() {
+ return this.recentAdditions;
+ }
/** Returns the total count of entries. */
public int getCount() {
return totalCount;
}
+
public String toString() {
StringBuilder sb = new StringBuilder();
Added: avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java?rev=957730&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java Thu Jun 24 22:03:17 2010
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.ipc.stats;
+
+/**
+ * Specific implementation of histogram for integers,
+ * which also keeps track of basic summary statistics.
+ * @param <B>
+ */
+class IntegerHistogram<B> extends Histogram<B, Integer> {
+ private float runningSum;
+ private float runningSumOfSquares;
+
+ public IntegerHistogram(Segmenter<B, Integer> segmenter) {
+ super(segmenter);
+ }
+
+ @Override
+ public void add(Integer value) {
+ super.add(value);
+ runningSum += value;
+ runningSumOfSquares += value*value;
+ }
+
+ public float getMean() {
+ if (totalCount == 0) {
+ return -1;
+ }
+ return runningSum / (float) totalCount;
+ }
+
+ public float getUnbiasedStdDev() {
+ if (totalCount <= 1) {
+ return -1;
+ }
+ float mean = getMean();
+ return (float)Math.sqrt((runningSumOfSquares - totalCount*mean*mean)/
+ (float)(totalCount - 1));
+ }
+}
Modified: avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java (original)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java Thu Jun 24 22:03:17 2010
@@ -19,6 +19,7 @@ package org.apache.avro.ipc.stats;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Arrays;
@@ -46,7 +47,38 @@ public class TestHistogram {
assertArrayEquals(new int[] { 1, 1, 2, 4, 8, 4 }, h.getHistogram());
assertEquals("[0,1)=1;[1,2)=1;[2,4)=2;[4,8)=4;[8,16)=8;[16,infinity)=4", h.toString());
+
+ String[] correctBucketLabels = {
+ "[0,1)", "[1,2)", "[2,4)", "[4,8)", "[8,16)", "[16,infinity)"};
+
+ // test bucket iterator
+ int pos = 0;
+ Iterator<String> it = h.getSegmenter().getBuckets();
+ while (it.hasNext()) {
+ assertEquals(correctBucketLabels[pos], it.next());
+ pos = pos + 1;
+ }
+ assertEquals(correctBucketLabels.length, pos);
+
+ List<String> labels = h.getSegmenter().getBucketLabels();
+ assertEquals(correctBucketLabels.length, labels.size());
+ if (labels.size() == correctBucketLabels.length) {
+ for (int i = 0; i < labels.size(); i++) {
+ assertEquals(correctBucketLabels[i], labels.get(i));
+ }
+ }
+ String[] correctBoundryLabels = {
+ "0", "1", "2", "4", "8", "16"};
+ List<String> boundryLabels = h.getSegmenter().getBoundaryLabels();
+
+ assertEquals(correctBoundryLabels.length, boundryLabels.size());
+ if (boundryLabels.size() == correctBoundryLabels.length) {
+ for (int i = 0; i < boundryLabels.size(); i++) {
+ assertEquals(correctBoundryLabels[i], boundryLabels.get(i));
+ }
+ }
+
List<Entry<String>> entries = new ArrayList<Entry<String>>();
for (Entry<String> entry : h.entries()) {
entries.add(entry);
@@ -54,6 +86,13 @@ public class TestHistogram {
assertEquals("[0,1)", entries.get(0).bucket);
assertEquals(4, entries.get(5).count);
assertEquals(6, entries.size());
+
+ h.add(1010);
+ h.add(9191);
+ List<Integer> recent = h.getRecentAdditions();
+ assertTrue(recent.contains(1010));
+ assertTrue(recent.contains(9191));
+
}
@Test(expected=Histogram.SegmenterException.class)
@@ -71,12 +110,21 @@ public class TestHistogram {
public Iterator<String> getBuckets() {
return Arrays.asList("X").iterator();
}
+
+ public List<String> getBoundaryLabels() {
+ return Arrays.asList("X");
+ }
+
+ public List<String> getBucketLabels() {
+ return Arrays.asList("X");
+ }
@Override
public int segment(Float value) { return 0; }
@Override
public int size() { return 1; }
+
}
@Test