You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by to...@apache.org on 2003/05/23 19:33:18 UTC

cvs commit: jakarta-commons-sandbox/math/src/test/org/apache/commons/math UnivariateImplTest.java

tobrien     2003/05/23 10:33:18

  Modified:    math     project.xml
               math/src/java/org/apache/commons/math Univariate.java
                        UnivariateImpl.java
               math/src/test/org/apache/commons/math
                        UnivariateImplTest.java
  Log:
  From Phil Steitz patch submission for Issue #20175
  
  The attached patch includes the following improvements to Univariate and
  UnivariateImpl:
  
  * Improved efficiency of min, max and product maintenance when windowSize is
    limited by incorporating suggestion posted to commons-dev by Brend Worden
    (added author credit).  Thanks, Brent!
  
  * Added javadoc specifying NaN contracts for all statistics, definitions for
    geometric and arithmetic means.
  
  * Made some slight modifications to UnivariateImpl to make it consistent with
    NaN contracts
  
  * All interface documentation moved to Univariate. The interface specification
    includes the NaN semantics and a first attempt at clealy defining exactly
    what "rolling" means and how this effects what statistics are defined when.
  
  * Added test cases to verify that min, max, product are correctly maintained
    when "rolling" and to verify that NaN contracts are satisfied.
  
  Revision  Changes    Path
  1.9       +2 -2      jakarta-commons-sandbox/math/project.xml
  
  Index: project.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/math/project.xml,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- project.xml	23 May 2003 16:28:16 -0000	1.8
  +++ project.xml	23 May 2003 17:33:17 -0000	1.9
  @@ -54,8 +54,8 @@
       </dependency> -->
     </dependencies>
   
  -<!--  <issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;email1=&amp;emailtype1=substring&amp;emailassigned_to1=1&amp;email2=&amp;emailtype2=substring&amp;emailreporter2=1&amp;bugidtype=include&amp;bug_id=&amp;changedin=&amp;votes=&amp;chfieldfrom=&amp;chfieldto=Now&amp;chfieldvalue=&amp;product=Commons&amp;component=Sandbox&amp;short_desc=&amp;short_desc_type=allwordssubstr&amp;long_desc=&amp;long_desc_type=allwordssubstr&amp;bug_file_loc=&amp;bug_file_loc_type=allwordssubstr&amp;keywords=&amp;keywords_type=anywords&amp;field0-0-0=noop&amp;type0-0-0=noop&amp;value0-0-0=&amp;cmdtype=doit&amp;order=%27Importance%27</issueTrackingUrl>
  --->
  +  <issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;email1=&amp;emailtype1=substring&amp;emailassigned_to1=1&amp;email2=&amp;emailtype2=substring&amp;emailreporter2=1&amp;bugidtype=include&amp;bug_id=&amp;changedin=&amp;votes=&amp;chfieldfrom=&amp;chfieldto=Now&amp;chfieldvalue=&amp;product=Commons&amp;component=Sandbox&amp;short_desc=&amp;short_desc_type=allwordssubstr&amp;long_desc=&amp;long_desc_type=allwordssubstr&amp;bug_file_loc=&amp;bug_file_loc_type=allwordssubstr&amp;keywords=&amp;keywords_type=anywords&amp;field0-0-0=noop&amp;type0-0-0=noop&amp;value0-0-0=&amp;cmdtype=doit&amp;order=%27Importance%27</issueTrackingUrl>
  +
     <repository>
       <connection>scm:cvs:pserver:anoncvs@cvs.apache.org:/home/cvspublic:jakarta-commons-sandbox/${pom.artifactId.substring(8)}</connection>
       <url>http://cvs.apache.org/viewcvs/jakarta-commons-sandbox/${pom.artifactId.substring(8)}/</url>
  
  
  
  1.6       +75 -33    jakarta-commons-sandbox/math/src/java/org/apache/commons/math/Univariate.java
  
  Index: Univariate.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/Univariate.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- Univariate.java	21 May 2003 17:59:19 -0000	1.5
  +++ Univariate.java	23 May 2003 17:33:18 -0000	1.6
  @@ -56,8 +56,19 @@
   /**
    *
    * Accumulates univariate statistics for values fed in 
  - * through the addValue() method.   This interface defines the LCD interface
  - * which all Univariate implementations must implement.
  + * through the addValue() method. This interface defines the LCD interface
  + * which all Univariate implementations must implement. <p>
  + * A "rolling" capability is supported by all implementations with the following
  + * contract: <p>
  + * <i> Setting the windowSize property limits the domain of all statistics to
  + * the last <code>windowSize</code> values added.</i><p>
  + * We use the term <i>available values</i> throughout the API documentation
  + * to refer to these values when the windowSize is set. For example, if the
  + * windowSize is set to 3 and the values {1,2,3,4,5} have been added <strong>
  + * in that order</strong> then the <i>available values</i> are {3,4,5} and all
  + * reported statistics will be based on these values<p>
  + * The default windowSize is "infinite" -- i.e., all values added are included
  + * in all computations.
    *
    * @author Phil Steitz
    * @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
  @@ -71,65 +82,96 @@
        * Adds the value to the set of numbers
        * @param v the value to be added 
        */
  -    public abstract void addValue(double v);
  +    abstract void addValue(double v);
   
       /** 
  -     * Returns the mean of the values that have been added
  +     * Returns the <a href=http://www.xycoon.com/arithmetic_mean.htm>
  +     * arithmetic mean </a> of the available values <p>
  +     *
  +     * Will return Double.NaN if no values have been added when
  +     * this method is invoked.
  +     *
        * @return mean value
        */
  -    public abstract double getMean();
  +    abstract double getMean();
   
       /** 
  -     * Returns the geometric mean of the values that have been added
  +     * Returns the <a href=http://www.xycoon.com/geometric_mean.htm>
  +     * geometric mean </a> of the available values <p>
  +     *
  +     * Will return Double.NaN if no values have been added or the product
  +     * of the available values is less than or equal to 0.
  +     *
        * @return mean value
        */
  -    public abstract double getGeometricMean();
  +    abstract double getGeometricMean();
   
       /** 
  -     * Returns the product of all values that have been added
  +     * Returns the product of the available values <p>
  +     * Will return Double.NaN if no values have been added.
  +     *
        * @return product of all values
        */
  -    public abstract double getProduct();
  +    abstract double getProduct();
   
       /** 
  -     * Returns the variance of the values that have been added
  -     * @return variance value
  +     * Returns the variance of the available values. <p>
  +     * Double.NaN is returned for an empty set of values and 0.0 is 
  +     * returned for a single value set. 
  +     *
  +     * @return The variance of a set of values.  
        */
  -    public abstract double getVariance();
  +    abstract double getVariance();
   
       /** 
  -     * Returns the standard deviation of the values that have been added
  +     * Returns the variance of the available values. <p>
  +     * Double.NaN is returned for an empty set of values and 0.0 is 
  +     * returned for a single value set. 
  +     *
        * @return standard deviation value
        */
  -    public abstract double getStandardDeviation();
  +    abstract double getStandardDeviation();
   
  -    /** Getter for property max.
  +    /** 
  +     * Returns the maximum of the available values <p>
  +     * Double.NaN is returned in no values have been added
  +     *
        * @return Value of property max.
        */
  -    public abstract double getMax();
  +    abstract double getMax();
   
  -    /** Getter for property min.
  +     /** 
  +     * Returns the minimum of the available values <p>
  +     * Double.NaN is returned in no values have been added
  +     *
        * @return Value of property min.
        */
  -    public abstract double getMin();
  +    abstract double getMin();
   
  -    /** Getter for property n.
  -     * @return Value of property n.
  +    /** 
  +     * Returns the number of available values
  +     * @return the number of available values
        */
  -    public abstract int getN();
  +    abstract int getN();
   
  -    /** Getter for property sum.
  -     * @return Value of property sum.
  +    /**
  +     * Returns the sum of the available values <p>
  +     * Returns 0 if no values have been added.
  +     *
  +     * @return the sum of the available values
        */
  -    public abstract double getSum();
  +    abstract double getSum();
   
  -    /** Getter for property sumsq.
  -     * @return Value of property sumsq.
  +    /**
  +     * Returns the sum of the squares of the available values.
  +     * Returns 0 if no values have been added.
  +     *
  +     * @return the sum of the squares of the available values.
        */
  -    public abstract double getSumsq();
  +    abstract double getSumsq();
   
  -    /** Resets all sums to 0, resets min and max */
  -    public abstract void clear();
  +    /** Resets all statistics */
  +    abstract void clear();
   
       /**
        * This constant signals that a Univariate implementation
  @@ -137,13 +179,13 @@
        * elements.  In other words, if getWindow returns this
        * constant, there is, in effect, no "window".
        */
  -    public static final int INIFINTE_WINDOW = -1;
  +    static final int INIFINTE_WINDOW = -1;
   
       /**
        * Univariate has the ability to return only measures for the
        * last N elements added to the set of values.  This function returns
        */
  -    public abstract int getWindowSize();
  +    abstract int getWindowSize();
   
       /**
        * Sets the window.  windowSize controls the number of value
  @@ -151,5 +193,5 @@
        * For example, a window value of 10 means that getMean()
        * will return the mean of the last 10 values added.
        */
  -    public abstract void setWindowSize(int windowSize);
  +    abstract void setWindowSize(int windowSize);
   }
  
  
  
  1.7       +57 -65    jakarta-commons-sandbox/math/src/java/org/apache/commons/math/UnivariateImpl.java
  
  Index: UnivariateImpl.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/UnivariateImpl.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- UnivariateImpl.java	21 May 2003 17:59:19 -0000	1.6
  +++ UnivariateImpl.java	23 May 2003 17:33:18 -0000	1.7
  @@ -59,13 +59,14 @@
    *
    * Accumulates univariate statistics for values fed in 
    * through the addValue() method.  Does not store raw data values.
  - * All data (including n) are represented internally as doubles.
  + * All data are represented internally as doubles.
    * Integers, floats and longs can be added, but will be converted
    * to doubles by addValue().  
    *
    * @author Phil Steitz
  - * @author Mark Diggory
    * @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
  + * @author Mark Diggory
  + * @author Brent Worden
    * @version $Revision$ $Date$
    * 
   */
  @@ -94,7 +95,7 @@
       /** max of values that have been added */
       private double max = Double.MIN_VALUE;
   
  -    /** produce of values that have been added */
  +    /** product of values that have been added */
       private double product = Double.NaN;
   
       /** Creates new univariate */
  @@ -108,45 +109,36 @@
           doubleArray = new FixedDoubleArray( window );
       }
   
  -    /**
  -     * Adds the value, updating running sums.
  -     * @param v the value to be added 
  -     */
  +     
       public void addValue(double v) {
   
           insertValue(v);
       }
   
  -    /** 
  -     * Returns the mean of the values that have been added
  -     * @return mean value
  -     */
  +    
       public double getMean() {
  -        // FIXME: throw something meaningful if n = 0
  -        return (sum / (double) n );
  +        if (n == 0) {
  +            return Double.NaN;
  +        } else {
  +            return (sum / (double) n );
  +        }
        }
   
  -    /** 
  -     * Returns the geometric mean of the values that have been added
  -     * @return geometric mean value
  -     */
  +     
       public double getGeometricMean() {
  -        return Math.pow(product,( 1.0/n ) );
  +        if ((product <= 0.0) || (n == 0)) {
  +            return Double.NaN; 
  +        } else {
  +            return Math.pow(product,( 1.0/(double)n ) );
  +        }
       }
   
  -    /** 
  -     * Returns the product of all values add to this Univariate
  -     * @return product value
  -     */
  +    
       public double getProduct() {
           return product;
       }
   
  -    /** 
  -     * Returns the variance of the values that have been added. 
  -     * @return The variance of a set of values.  Double.NaN is returned for
  -     *         an empty set of values and 0.0 is returned for a single value set.
  -     */
  +     
       public double getVariance() {
           double variance = Double.NaN;
   
  @@ -160,21 +152,16 @@
           return variance;
       }
   
  -    /** 
  -     * Returns the standard deviation of the values that have been added
  -     * @return The standard deviation of a set of values.  Double.NaN is 
  -     *         returned for an empty set of values and 0.0 is returned for 
  -     *         a single value set.
  -     */
  +    
       public double getStandardDeviation() {
  -        return (new Double(Math.sqrt
  -            ((new Double(getVariance())).doubleValue()))).doubleValue();
  +        double variance = getVariance();
  +        if ((variance == 0.0) || (variance == Double.NaN)) {
  +            return variance;
  +        } else {
  +            return Math.sqrt(variance);
  +        }
       }
  -
  -    /**
  -     * Adds the value, updating running sums.
  -     * @param v the value to be added 
  -     */
  +   
       private void insertValue(double v) {
   
           // The default value of product is NaN, if you
  @@ -197,25 +184,28 @@
                   sum -= discarded;
                   sumsq -= discarded * discarded;
   
  -                // Include the influence of the new
  -                // TODO: The next two lines seems rather expensive, but
  -                // I don't see many alternatives.			 
  -                min = doubleArray.getMin();
  -                max = doubleArray.getMax();
  +                if(discarded == min) {
  +                    min = doubleArray.getMin();
  +                } else {
  +                    if(discarded == max){
  +                    max = doubleArray.getMax();
  +                    }
  +                } 
  +                
                   sum += v;
                   sumsq += v*v;
   
  -                // Note that the product CANNOT be discarded
  -                // properly because one cannot discount the effect
  -                // of a zero value.  For this reason, the product
  -                // of the altered array must be calculated from the
  -                // current array elements.  Product must be recalculated
  -                // everytime the array is "rolled"
  -                product = 1.0;
  -                double[] elements = doubleArray.getElements();
  -                for( int i = 0; i < elements.length; i++ ) {
  +                if(product != 0.0){
  +                    // can safely remove discarded value
  +                    product *= v/discarded;
  +                } else if(discarded == 0.0){
  +                    // need to recompute product
  +                    product = 1.0;
  +                    double[] elements = doubleArray.getElements();
  +                    for( int i = 0; i < elements.length; i++ ) {
                       product *= elements[i];
  -                }
  +                    }
  +                } // else product = 0 and will still be 0 after discard
   
               } else {
                   doubleArray.addElement( v );        	
  @@ -243,21 +233,22 @@
        * @return Value of property max.
        */
       public double getMax() {
  -        return max;
  -    }
  -
  -    /** Setter for property max.
  -     * @param max New value of property max.
  -     */
  -    public void setMax(double max) {
  -        this.max = max;
  +        if (n == 0) { 
  +            return Double.NaN;
  +        } else {
  +            return max;
  +        }
       }
   
       /** Getter for property min.
        * @return Value of property min.
        */
       public double getMin() {
  -        return min;
  +        if (n == 0) { 
  +            return Double.NaN;
  +        } else {
  +            return min;
  +        }
       }
   
       /** Getter for property n.
  @@ -305,6 +296,7 @@
           this.n = 0;
           this.min = Double.MAX_VALUE;
           this.max = Double.MIN_VALUE;
  +        this.product = Double.NaN;
       }
   
       /* (non-Javadoc)
  
  
  
  1.3       +70 -17    jakarta-commons-sandbox/math/src/test/org/apache/commons/math/UnivariateImplTest.java
  
  Index: UnivariateImplTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/math/src/test/org/apache/commons/math/UnivariateImplTest.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- UnivariateImplTest.java	21 May 2003 17:59:20 -0000	1.2
  +++ UnivariateImplTest.java	23 May 2003 17:33:18 -0000	1.3
  @@ -60,7 +60,8 @@
   /**
    * Test cases for the {@link Univariate} class.
    *
  - * @author <a href="mailto:phil@steitz.com">Phil Steitz</a>
  + * @author Phil Steitz
  + * @author Tim Obrien
    * @version $Revision$ $Date$
    */
   
  @@ -114,16 +115,21 @@
       
       public void testN0andN1Conditions() throws Exception {
       	UnivariateImpl u = new UnivariateImpl();
  -    	    	
  -		assertTrue("Mean of n = 0 set should be NaN", Double.isNaN( u.getMean() ) );
  -		assertTrue("Standard Deviation of n = 0 set should be NaN", Double.isNaN( u.getStandardDeviation() ) );
  -		assertTrue("Variance of n = 0 set should be NaN", Double.isNaN(u.getVariance() ) );
  -
  -		u.addValue(one);
  +        assertTrue("Mean of n = 0 set should be NaN", 
  +            Double.isNaN( u.getMean() ) );
  +	assertTrue("Standard Deviation of n = 0 set should be NaN", 
  +            Double.isNaN( u.getStandardDeviation() ) );
  +	assertTrue("Variance of n = 0 set should be NaN", 
  +            Double.isNaN(u.getVariance() ) );
  +	
  +        u.addValue(one);
   
  -		assertTrue( "Mean of n = 1 set should be value of single item n1", u.getMean() == one);
  -		assertTrue( "Mean of n = 1 set should be zero", u.getStandardDeviation() == 0);
  -		assertTrue( "Variance of n = 1 set should be zero", u.getVariance() == 0);	
  +	assertTrue( "Mean of n = 1 set should be value of single item n1", 
  +            u.getMean() == one);
  +	assertTrue( "Mean of n = 1 set should be zero", 
  +            u.getStandardDeviation() == 0);
  +	assertTrue( "Variance of n = 1 set should be zero",
  +            u.getVariance() == 0);	
       }
   
       public void testProductAndGeometricMean() throws Exception {
  @@ -134,8 +140,10 @@
           u.addValue( 3.0 );
           u.addValue( 4.0 );
   
  -        assertEquals( "Product not expected", 24.0, u.getProduct(), Double.MIN_VALUE );
  -        assertEquals( "Geometric mean not expected", 2.213364, u.getGeometricMean(), 0.00001 );
  +        assertEquals( "Product not expected", 24.0, u.getProduct(),
  +            Double.MIN_VALUE );
  +        assertEquals( "Geometric mean not expected", 2.213364, 
  +            u.getGeometricMean(), 0.00001 );
   
           // Now test rolling - UnivariateImpl should discount the contribution
           // of a discarded element
  @@ -144,11 +152,56 @@
           }
           // Values should be (2,3,4,5,6,7,8,9,10,11)
           
  -        assertEquals( "Product not expected", 39916800.0, u.getProduct(), 0.00001 );
  -        assertEquals( "Geometric mean not expected", 5.755931, u.getGeometricMean(), 0.00001 );
  -
  -
  +        assertEquals( "Product not expected", 39916800.0, 
  +            u.getProduct(), 0.00001 );
  +        assertEquals( "Geometric mean not expected", 5.755931, 
  +            u.getGeometricMean(), 0.00001 );
       }
  +    
  +    public void testRollingMinMax() {
  +        UnivariateImpl u = new UnivariateImpl(3);
  +        u.addValue( 1.0 );
  +        u.addValue( 5.0 );
  +        u.addValue( 3.0 );
  +        u.addValue( 4.0 ); // discarding min
  +        assertEquals( "min not expected", 3.0, 
  +            u.getMin(), Double.MIN_VALUE);
  +        u.addValue(1.0);  // discarding max
  +        assertEquals( "max not expected", 4.0, 
  +            u.getMax(), Double.MIN_VALUE);
  +    }
  +    
  +    public void testNaNContracts() {
  +        UnivariateImpl u = new UnivariateImpl();
  +        double nan = Double.NaN;
  +        assertTrue("mean not NaN",Double.isNaN(u.getMean())); 
  +        assertTrue("min not NaN",Double.isNaN(u.getMin())); 
  +        assertTrue("std dev not NaN",Double.isNaN(u.getStandardDeviation())); 
  +        assertTrue("var not NaN",Double.isNaN(u.getVariance())); 
  +        assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
  +        
  +        u.addValue(1.0);
  +        
  +        assertEquals( "mean not expected", 1.0, 
  +            u.getMean(), Double.MIN_VALUE);
  +        assertEquals( "variance not expected", 0.0, 
  +            u.getVariance(), Double.MIN_VALUE);
  +        assertEquals( "geometric mean not expected", 1.0, 
  +            u.getGeometricMean(), Double.MIN_VALUE);
  +        
  +        u.addValue(-1.0);
  +        
  +        assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
  +        
  +        u.addValue(0.0);
  +        
  +        assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
  +        
  +        //FiXME: test all other NaN contract specs
  +    }
  +        
  +        
  +        
   
   }
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org