You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by md...@apache.org on 2003/07/05 20:29:35 UTC

cvs commit: jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat StatUtils.java

mdiggory    2003/07/05 11:29:35

  Modified:    math/src/java/org/apache/commons/math/stat StatUtils.java
  Log:
  This is an Implementation of StatUtils that uses the new UnivariateStatistic Framework and passes all JUnit StatUtils tests.
  
  Revision  Changes    Path
  1.11      +104 -172  jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java
  
  Index: StatUtils.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- StatUtils.java	27 Jun 2003 20:31:52 -0000	1.10
  +++ StatUtils.java	5 Jul 2003 18:29:35 -0000	1.11
  @@ -53,6 +53,21 @@
    */
   package org.apache.commons.math.stat;
   
  +import org.apache.commons.math.stat.univariate.UnivariateStatistic;
  +import org.apache.commons.math.stat.univariate.moment.GeometricMean;
  +import org.apache.commons.math.stat.univariate.moment.Kurtosis;
  +import org.apache.commons.math.stat.univariate.moment.Mean;
  +import org.apache.commons.math.stat.univariate.moment.Skewness;
  +import org.apache.commons.math.stat.univariate.moment.Variance;
  +import org.apache.commons.math.stat.univariate.rank.Max;
  +import org.apache.commons.math.stat.univariate.rank.Median;
  +import org.apache.commons.math.stat.univariate.rank.Min;
  +import org.apache.commons.math.stat.univariate.rank.Percentile;
  +import org.apache.commons.math.stat.univariate.summary.Product;
  +import org.apache.commons.math.stat.univariate.summary.Sum;
  +import org.apache.commons.math.stat.univariate.summary.SumOfLogs;
  +import org.apache.commons.math.stat.univariate.summary.SumOfSquares;
  +
   /**
    * StatUtils provides easy static implementations of common double[] based
    * statistical methods. These return a single result value or in some cases, as
  @@ -62,13 +77,52 @@
    */
   public class StatUtils {
   
  +    /** Sum Of Logs */
  +    private static UnivariateStatistic sumLog = new SumOfLogs();
  +    
  +    /** Product */
  +    private static UnivariateStatistic product = new Product();
  +    
  +    /** Geometric Mean */
  +    private static UnivariateStatistic geoMean = new GeometricMean();
  +    
  +    /** Mean */
  +    private static UnivariateStatistic mean = new Mean();
  +    
  +    /** Variance */
  +    private static UnivariateStatistic var = new Variance();
  +    
  +    /** Skewness */
  +    private static UnivariateStatistic skew = new Skewness();
  +    
  +    /** Kurtosis */
  +    private static UnivariateStatistic kurt = new Kurtosis();
  +    
  +    /** Min Of Logs */
  +    private static UnivariateStatistic min = new Min();
  +    
  +    /** Max */
  +    private static UnivariateStatistic max = new Max();
  +    
  +    /** Median */
  +    private static UnivariateStatistic median = new Median();
  +    
  +    /** Sum */
  +    private static UnivariateStatistic sum = new Sum();
  +    
  +    /** Sum Of Squares */
  +    private static UnivariateStatistic sumSq = new SumOfSquares();
  +    
  +    /** Percentile */
  +    private static Percentile percentile = new Percentile();
  +
       /**
        * The sum of the values that have been added to Univariate.
        * @param values Is a double[] containing the values
        * @return the sum of the values or Double.NaN if the array is empty
        */
       public static double sum(double[] values) {
  -        return sum(values, 0, values.length);
  +        return sum.evaluate(values, 0, values.length);
       }
   
       /**
  @@ -79,12 +133,7 @@
        * @return the sum of the values or Double.NaN if the array is empty
        */
       public static double sum(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        double accum = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum += values[i];
  -        }
  -        return accum;
  +        return sum.evaluate(values, begin, length);
       }
   
       /**
  @@ -93,7 +142,7 @@
        * @return the sum of the squared values or Double.NaN if the array is empty
        */
       public static double sumSq(double[] values) {
  -        return sumSq(values, 0, values.length);
  +        return sumSq.evaluate(values);
       }
   
       /**
  @@ -104,12 +153,7 @@
        * @return the sum of the squared values or Double.NaN if the array is empty
        */
       public static double sumSq(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        double accum = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum += Math.pow(values[i], 2.0);
  -        }
  -        return accum;
  +        return sumSq.evaluate(values, begin, length);
       }
   
       /**
  @@ -118,7 +162,7 @@
        * @return the product values or Double.NaN if the array is empty
        */
       public static double product(double[] values) {
  -        return product(values, 0, values.length);
  +        return product.evaluate(values);
       }
   
       /**
  @@ -129,12 +173,7 @@
        * @return the product values or Double.NaN if the array is empty
        */
       public static double product(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        double product = 1.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            product *= values[i];
  -        }
  -        return product;
  +        return product.evaluate(values, begin, length);
       }
   
       /**
  @@ -143,7 +182,7 @@
        * @return the sumLog value or Double.NaN if the array is empty
        */
       public static double sumLog(double[] values) {
  -        return sumLog(values, 0, values.length);
  +        return sumLog.evaluate(values);
       }
   
       /**
  @@ -154,12 +193,7 @@
        * @return the sumLog value or Double.NaN if the array is empty
        */
       public static double sumLog(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        double sumLog = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            sumLog += Math.log(values[i]);
  -        }
  -        return sumLog;
  +        return sumLog.evaluate(values, begin, length);
       }
   
       /**
  @@ -169,7 +203,7 @@
        * any of the values are &lt;= 0.
        */
       public static double geometricMean(double[] values) {
  -        return geometricMean(values, 0, values.length);
  +        return geoMean.evaluate(values);
       }
   
       /**
  @@ -180,9 +214,11 @@
        * @return the geometric mean or Double.NaN if the array is empty or
        * any of the values are &lt;= 0.
        */
  -    public static double geometricMean(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        return Math.exp(sumLog(values, begin, length) / (double) length );
  +    public static double geometricMean(
  +        double[] values,
  +        int begin,
  +        int length) {
  +        return geoMean.evaluate(values, begin, length);
       }
   
       /**
  @@ -192,7 +228,7 @@
        * @return the mean of the values or Double.NaN if the array is empty
        */
       public static double mean(double[] values) {
  -        return sum(values) / (double) values.length;
  +        return mean.evaluate(values);
       }
   
       /**
  @@ -204,8 +240,7 @@
         * @return the mean of the values or Double.NaN if the array is empty
         */
       public static double mean(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        return sum(values, begin, length) / ((double) length);
  +        return mean.evaluate(values, begin, length);
       }
   
       /**
  @@ -230,7 +265,7 @@
           double[] values,
           int begin,
           int length) {
  -        testInput(values, begin, length);
  +
           double stdDev = Double.NaN;
           if (values.length != 0) {
               stdDev = Math.sqrt(variance(values, begin, length));
  @@ -271,24 +306,7 @@
        * or 0.0 for a single value set.  
        */
       public static double variance(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -
  -        double variance = Double.NaN;
  -        if (values.length == 1) {
  -            variance = 0;
  -        } else if (values.length > 1) {
  -            double mean = mean(values, begin, length);
  -            double accum = 0.0;
  -            double accum2 = 0.0;
  -            for (int i = begin; i < begin + length; i++) {
  -                accum += Math.pow((values[i] - mean), 2.0);
  -                accum2 += (values[i] - mean);
  -            }
  -            variance =
  -                (accum - (Math.pow(accum2, 2) / ((double)length)))
  -                    / (double) (length - 1);
  -        }
  -        return variance;
  +        return var.evaluate(values, begin, length);
       }
   
       /**
  @@ -300,51 +318,16 @@
       public static double skewness(double[] values) {
           return skewness(values, 0, values.length);
       }
  -        /**
  -     * Returns the skewness of a collection of values.  Skewness is a 
  -     * measure of the assymetry of a given distribution. 
  -     * @param values Is a double[] containing the values
  -     * @param begin processing at this point in the array
  -     * @param length processing at this point in the array
  -     * @return the skewness of the values or Double.NaN if the array is empty
  -     */
  +    /**
  +    * Returns the skewness of a collection of values.  Skewness is a 
  +    * measure of the assymetry of a given distribution. 
  +    * @param values Is a double[] containing the values
  +    * @param begin processing at this point in the array
  +    * @param length processing at this point in the array
  +    * @return the skewness of the values or Double.NaN if the array is empty
  +    */
       public static double skewness(double[] values, int begin, int length) {
  -
  -        testInput(values, begin, length);
  -
  -        // Initialize the skewness
  -        double skewness = Double.NaN;
  -
  -        // Get the mean and the standard deviation
  -        double mean = mean(values, begin, length);
  -
  -        // Calc the std, this is implemented here instead of using the 
  -        // standardDeviation method eliminate a duplicate pass to get the mean
  -        double accum = 0.0;
  -        double accum2 = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum += Math.pow((values[i] - mean), 2.0);
  -            accum2 += (values[i] - mean);
  -        }
  -        double stdDev =
  -            Math.sqrt(
  -                (accum - (Math.pow(accum2, 2) / ((double) length)))
  -                    / (double) (length - 1));
  -
  -        // Calculate the skew as the sum the cubes of the distance 
  -        // from the mean divided by the standard deviation.
  -        double accum3 = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum3 += Math.pow((values[i] - mean) / stdDev, 3.0);
  -        }
  -
  -        // Get N
  -        double n = length;
  -
  -        // Calculate skewness
  -        skewness = (n / ((n - 1) * (n - 2))) * accum3;
  -
  -        return skewness;
  +        return skew.evaluate(values, begin, length);
       }
   
       /**
  @@ -356,7 +339,7 @@
       public static double kurtosis(double[] values) {
           return kurtosis(values, 0, values.length);
       }
  -    
  +
       /**
        * Returns the kurtosis for this collection of values. Kurtosis is a 
        * measure of the "peakedness" of a distribution.
  @@ -366,47 +349,9 @@
        * @return the kurtosis of the values or Double.NaN if the array is empty
        */
       public static double kurtosis(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -
  -        // Initialize the kurtosis
  -        double kurtosis = Double.NaN;
  -
  -        // Get the mean and the standard deviation
  -        double mean = mean(values, begin, length);
  -
  -        // Calc the std, this is implemented here instead of using the 
  -        // standardDeviation method eliminate a duplicate pass to get the mean
  -        double accum = 0.0;
  -        double accum2 = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum += Math.pow((values[i] - mean), 2.0);
  -            accum2 += (values[i] - mean);
  -        }
  -        
  -        double stdDev =
  -            Math.sqrt(
  -                (accum - (Math.pow(accum2, 2) / ((double) length)))
  -                    / (double) (length - 1));
  -
  -        // Sum the ^4 of the distance from the mean divided by the 
  -        // standard deviation
  -        double accum3 = 0.0;
  -        for (int i = begin; i < begin + length; i++) {
  -            accum3 += Math.pow((values[i] - mean) / stdDev, 4.0);
  -        }
  -
  -        // Get N
  -        double n = length;
  -
  -        double coefficientOne = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3));
  -        double termTwo = ((3 * Math.pow(n - 1, 2.0)) / ((n - 2) * (n - 3)));
  -        
  -        // Calculate kurtosis
  -        kurtosis = (coefficientOne * accum3) - termTwo;
  -
  -        return kurtosis;
  +        return kurt.evaluate(values, begin, length);
       }
  -    
  +
       /**
        * Returns the maximum of the available values
        * @param values Is a double[] containing the values
  @@ -424,16 +369,7 @@
        * @return the maximum of the values or Double.NaN if the array is empty
        */
       public static double max(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  -        double max = Double.NaN;
  -        for (int i = begin; i < begin + length; i++) {
  -            if (i == 0) {
  -                max = values[i];
  -            } else {
  -                max = (max > values[i]) ? max : values[i];
  -            }
  -        }
  -        return max;
  +        return max.evaluate(values, begin, length);
       }
   
       /**
  @@ -453,36 +389,32 @@
        * @return the minimum of the values or Double.NaN if the array is empty
        */
       public static double min(double[] values, int begin, int length) {
  -        testInput(values, begin, length);
  +        return min.evaluate(values, begin, length);
  +    }
   
  -        double min = Double.NaN;
  -        for (int i = begin; i < begin + length; i++) {
  -            if (i == 0) {
  -                min = values[i];
  -            } else {
  -                min = (min < values[i]) ? min : values[i];
  -            }
  -        }
  -        return min;
  +    /**
  +     * Returns the p'th percentile for a double[]
  +     * @param values Is a double[] containing the values
  +     * @param p is 0 <= p <= 100
  +     * @return the value at the p'th percentile
  +     */
  +    public static double percentile(double[] values, double p) {
  +        return percentile.evaluate(values, p);
       }
   
       /**
  -     * Private testInput method used by all methods to verify the content 
  -     * of the array and indicies are correct.
  +     * Returns the p'th percentile for a double[]
        * @param values Is a double[] containing the values
        * @param begin processing at this point in the array
        * @param length processing at this point in the array
  +     * @param p is 0 <= p <= 100
  +     * @return the value at the p'th percentile
        */
  -    private static void testInput(double[] values, int begin, int length) {
  -
  -        if (length > values.length)
  -            throw new IllegalArgumentException("length > values.length");
  -
  -        if (begin + length > values.length)
  -            throw new IllegalArgumentException("begin + length > values.length");
  -
  -        if (values == null)
  -            throw new IllegalArgumentException("input value array is null");
  -
  +    public static double percentile(
  +        double[] values,
  +        int begin,
  +        int length,
  +        double p) {
  +        return percentile.evaluate(values, begin, length, p);
       }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org