You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ph...@apache.org on 2010/06/25 00:03:17 UTC

svn commit: r957730 - in /avro/trunk: CHANGES.txt lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java

Author: philz
Date: Thu Jun 24 22:03:17 2010
New Revision: 957730

URL: http://svn.apache.org/viewvc?rev=957730&view=rev
Log:
AVRO-584. Update Histogram for Stats Plugin (Contributed by Patrick Wendell)

Added:
    avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java
    avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Jun 24 22:03:17 2010
@@ -23,6 +23,8 @@ Avro 1.4.0 (unreleased)
     (Patrick Wendell via cutting)
 
   IMPROVEMENTS
+    AVRO-584. Update Histogram for Stats Plugin
+    (Patrick Wendell via philz)
 
     AVRO-501. missing function in C api to access array elements after 
     decoding an array. (Bruce Mitchener via massie)

Modified: avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/Histogram.java Thu Jun 24 22:03:17 2010
@@ -17,14 +17,18 @@
  */
 package org.apache.avro.ipc.stats;
 
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
 import java.util.SortedSet;
 import java.util.TreeMap;
 
 /**
  * Represents a histogram of values.  This class uses a {@link Segmenter}
- * to determine which bucket to place a given value into.
+ * to determine which bucket to place a given value into. Also stores the last
+ * MAX_HISTORY_SIZE entries which have been added to this histogram, in order.
  *
  * Note that Histogram, by itself, is not synchronized.
  * @param <B> Bucket type.  Often String, since buckets are typically
@@ -32,9 +36,15 @@ import java.util.TreeMap;
  * @param <T> Type of value
  */
 class Histogram<B, T> {
+  /**
+   * How many recent additions we should track.
+   */
+  public static final int MAX_HISTORY_SIZE = 20; 
+  
   private Segmenter<B, T> segmenter;
   private int[] counts;
   protected int totalCount;
+  private LinkedList<T> recentAdditions;
 
   /**
    * Interface to determine which bucket to place a value in.
@@ -57,6 +67,17 @@ class Histogram<B, T> {
      * is consistent with the segment numbers.
      */
     Iterator<B> getBuckets();
+    
+    /**
+     * Returns a List of bucket boundaries. Useful for printing
+     * segmenters.
+     */
+    List<String> getBoundaryLabels();
+    
+    /**
+     * Returns the bucket labels as an array;
+     */
+    List<String> getBucketLabels();
   }
 
   public static class SegmenterException extends RuntimeException {
@@ -95,20 +116,41 @@ class Histogram<B, T> {
     private String rangeAsString(T a, T b) {
       return String.format("[%s,%s)", a, b == null ? "infinity" : b);
     }
-
+    
+    @Override
+    public ArrayList<String> getBoundaryLabels() {
+      ArrayList<String> outArray = new ArrayList<String>(index.keySet().size());
+      for (T obj: index.keySet()) {
+        outArray.add(obj.toString());
+      }
+      return outArray;
+    }
+    
+    @Override
+    public ArrayList<String> getBucketLabels() {
+      ArrayList<String> outArray = new ArrayList<String>(index.keySet().size());
+      Iterator<String> bucketsIt = this.getBuckets();
+      while (bucketsIt.hasNext()) {
+        outArray.add(bucketsIt.next());
+      }
+      return outArray;
+    }
+    
     @Override
     public Iterator<String> getBuckets() {
       return new Iterator<String>() {
         Iterator<T> it = index.keySet().iterator();
         T cur = it.next(); // there's always at least one element
-
+        int pos = 0;
+        
         @Override
         public boolean hasNext() {
-          return it.hasNext();
+          return (pos < index.keySet().size());
         }
 
         @Override
         public String next() {
+          pos = pos + 1;
           T left = cur;
           cur = it.hasNext() ? it.next() : null;
           return rangeAsString(left, cur);
@@ -117,7 +159,6 @@ class Histogram<B, T> {
         @Override
         public void remove() {
           throw new UnsupportedOperationException();
-
         }
       };
     }
@@ -129,6 +170,7 @@ class Histogram<B, T> {
   public Histogram(Segmenter<B, T> segmenter) {
     this.segmenter = segmenter;
     this.counts = new int[segmenter.size()];
+    this.recentAdditions = new LinkedList<T>();
   }
 
   /** Tallies a value in the histogram. */
@@ -136,6 +178,10 @@ class Histogram<B, T> {
     int i = segmenter.segment(value);
     counts[i]++;
     totalCount++;
+    if (this.recentAdditions.size() > Histogram.MAX_HISTORY_SIZE) {
+      this.recentAdditions.pollLast();
+    }
+    this.recentAdditions.push(value);
   }
 
   /**
@@ -144,11 +190,27 @@ class Histogram<B, T> {
   public int[] getHistogram() {
     return counts;
   }
+  
+  /**
+   * Returns the underlying segmenter used for this histogram.
+   */
+  public Segmenter<B, T> getSegmenter() {
+    return this.segmenter;
+  }
+  
+  /**
+   * Returns values recently added to this histogram. These are in reverse
+   * order (most recent first).
+   */
+  public List<T> getRecentAdditions() {
+    return this.recentAdditions;
+  }
 
   /** Returns the total count of entries. */
   public int getCount() {
     return totalCount;
   }
+  
 
   public String toString() {
     StringBuilder sb = new StringBuilder();

Added: avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java?rev=957730&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/ipc/stats/IntegerHistogram.java Thu Jun 24 22:03:17 2010
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.ipc.stats;
+
+/**
+ * Specific implementation of histogram for integers,
+ * which also keeps track of basic summary statistics.
+ * @param <B>
+ */
+class IntegerHistogram<B> extends Histogram<B, Integer> {
+  private float runningSum;
+  private float runningSumOfSquares;
+
+  public IntegerHistogram(Segmenter<B, Integer> segmenter) {
+    super(segmenter);
+  }
+
+  @Override
+  public void add(Integer value) {
+    super.add(value);
+    runningSum += value;
+    runningSumOfSquares += value*value;
+  }
+
+  public float getMean() {
+    if (totalCount == 0) {
+      return -1;
+    }
+    return runningSum / (float) totalCount;
+  }
+
+  public float getUnbiasedStdDev() {
+    if (totalCount <= 1) {
+      return -1;
+    }
+    float mean = getMean();
+    return (float)Math.sqrt((runningSumOfSquares - totalCount*mean*mean)/
+        (float)(totalCount - 1));
+  }
+}

Modified: avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java?rev=957730&r1=957729&r2=957730&view=diff
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java (original)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/ipc/stats/TestHistogram.java Thu Jun 24 22:03:17 2010
@@ -19,6 +19,7 @@ package org.apache.avro.ipc.stats;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -46,7 +47,38 @@ public class TestHistogram {
     assertArrayEquals(new int[] { 1, 1, 2, 4, 8, 4 }, h.getHistogram());
 
     assertEquals("[0,1)=1;[1,2)=1;[2,4)=2;[4,8)=4;[8,16)=8;[16,infinity)=4", h.toString());
+    
+    String[] correctBucketLabels = {
+        "[0,1)", "[1,2)", "[2,4)", "[4,8)", "[8,16)", "[16,infinity)"};
+    
+    // test bucket iterator
+    int pos = 0;
+    Iterator<String> it = h.getSegmenter().getBuckets();
+    while (it.hasNext()) {
+      assertEquals(correctBucketLabels[pos], it.next());
+      pos = pos + 1;
+    }
+    assertEquals(correctBucketLabels.length, pos);
+    
+    List<String> labels = h.getSegmenter().getBucketLabels();
+    assertEquals(correctBucketLabels.length, labels.size());
+    if (labels.size() == correctBucketLabels.length) {
+      for (int i = 0; i < labels.size(); i++) {
+        assertEquals(correctBucketLabels[i], labels.get(i));
+      }
+    }
 
+    String[] correctBoundryLabels = {
+        "0", "1", "2", "4", "8", "16"};
+    List<String> boundryLabels = h.getSegmenter().getBoundaryLabels();
+    
+    assertEquals(correctBoundryLabels.length, boundryLabels.size());
+    if (boundryLabels.size() == correctBoundryLabels.length) {
+      for (int i = 0; i < boundryLabels.size(); i++) {
+        assertEquals(correctBoundryLabels[i], boundryLabels.get(i));
+      }
+    }
+    
     List<Entry<String>> entries = new ArrayList<Entry<String>>();
     for (Entry<String> entry : h.entries()) {
       entries.add(entry);
@@ -54,6 +86,13 @@ public class TestHistogram {
     assertEquals("[0,1)", entries.get(0).bucket);
     assertEquals(4, entries.get(5).count);
     assertEquals(6, entries.size());
+    
+    h.add(1010);
+    h.add(9191);
+    List<Integer> recent = h.getRecentAdditions();
+    assertTrue(recent.contains(1010));
+    assertTrue(recent.contains(9191));
+    
   }
 
   @Test(expected=Histogram.SegmenterException.class)
@@ -71,12 +110,21 @@ public class TestHistogram {
     public Iterator<String> getBuckets() {
       return Arrays.asList("X").iterator();
     }
+    
+    public List<String> getBoundaryLabels() {
+      return Arrays.asList("X");
+    }
+    
+    public List<String> getBucketLabels() {
+      return Arrays.asList("X");
+    }
 
     @Override
     public int segment(Float value) { return 0; }
 
     @Override
     public int size() { return 1; }
+
   }
 
   @Test