You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/25 04:31:49 UTC
svn commit: r883972 [10/10] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/fpm/pfpgrowth/
matrix/src/main/java/org/apache/mahout/jet/math/
matrix/src/main/java/org/apache/mahout/jet/random/
matrix/src/main/java/org/apache/mahout/jet/ra...
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java Wed Nov 25 03:31:47 2009
@@ -22,47 +22,47 @@
* @param element the element to search for
*/
protected static IntArrayList binaryMultiSearch(DoubleArrayList list, double element) {
- int index = list.binarySearch(element);
- if (index<0) return null; //not found
+ int index = list.binarySearch(element);
+ if (index<0) return null; //not found
- int from = index-1;
- while (from>=0 && list.get(from)==element) from--;
- from++;
-
- int to = index+1;
- while (to<list.size() && list.get(to)==element) to++;
- to--;
-
- return new IntArrayList(new int[] {from,to});
+ int from = index-1;
+ while (from>=0 && list.get(from)==element) from--;
+ from++;
+
+ int to = index+1;
+ while (to<list.size() && list.get(to)==element) to++;
+ to--;
+
+ return new IntArrayList(new int[] {from,to});
}
/**
* Observed epsilon
*/
public static double epsilon(int size, double phi, double rank) {
- double s = size;
- //System.out.println("\n");
- //System.out.println("s="+size+", rank="+rank+", phi="+phi+", eps="+Math.abs((rank)/s - phi));
- //System.out.println("\n");
- return Math.abs(rank/s - phi);
+ double s = size;
+ //System.out.println("\n");
+ //System.out.println("s="+size+", rank="+rank+", phi="+phi+", eps="+Math.abs((rank)/s - phi));
+ //System.out.println("\n");
+ return Math.abs(rank/s - phi);
}
/**
* Observed epsilon
*/
public static double epsilon(DoubleArrayList sortedList, double phi, double element) {
- double rank = org.apache.mahout.jet.stat.Descriptive.rankInterpolated(sortedList,element);
- return epsilon(sortedList.size(), phi, rank);
+ double rank = org.apache.mahout.jet.stat.Descriptive.rankInterpolated(sortedList,element);
+ return epsilon(sortedList.size(), phi, rank);
}
/**
* Observed epsilon
*/
public static double epsilon(DoubleArrayList sortedList, DoubleQuantileFinder finder, double phi) {
- double element = finder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
- return epsilon(sortedList, phi, element);
+ double element = finder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
+ return epsilon(sortedList, phi, element);
}
public static void main(String[] args) {
- testBestBandKCalculation(args);
- //testQuantileCalculation(args);
- //testCollapse();
+ testBestBandKCalculation(args);
+ //testQuantileCalculation(args);
+ //testCollapse();
}
/**
* This method was created in VisualAge.
@@ -71,26 +71,26 @@
* @param phis double[]
*/
public static double observedEpsilonAtPhi(double phi, ExactDoubleQuantileFinder exactFinder, DoubleQuantileFinder approxFinder) {
- int N = (int) exactFinder.size();
-
- int exactRank = (int) Utils.epsilonCeiling(phi * N) - 1;
- //System.out.println("exactRank="+exactRank);
- exactFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); // just to ensure exactFinder is sorted
- double approxElement = approxFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
- //System.out.println("approxElem="+approxElement);
- IntArrayList approxRanks = binaryMultiSearch(exactFinder.buffer, approxElement);
- int from = approxRanks.get(0);
- int to = approxRanks.get(1);
-
- int distance;
- if (from<=exactRank && exactRank<=to) distance = 0;
- else {
- if (from>exactRank) distance=Math.abs(from-exactRank);
- else distance=Math.abs(exactRank-to);
- }
+ int N = (int) exactFinder.size();
+
+ int exactRank = (int) Utils.epsilonCeiling(phi * N) - 1;
+ //System.out.println("exactRank="+exactRank);
+ exactFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); // just to ensure exactFinder is sorted
+ double approxElement = approxFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
+ //System.out.println("approxElem="+approxElement);
+ IntArrayList approxRanks = binaryMultiSearch(exactFinder.buffer, approxElement);
+ int from = approxRanks.get(0);
+ int to = approxRanks.get(1);
+
+ int distance;
+ if (from<=exactRank && exactRank<=to) distance = 0;
+ else {
+ if (from>exactRank) distance=Math.abs(from-exactRank);
+ else distance=Math.abs(exactRank-to);
+ }
- double epsilon = (double)distance / (double)N;
- return epsilon;
+ double epsilon = (double)distance / (double)N;
+ return epsilon;
}
/**
* This method was created in VisualAge.
@@ -99,341 +99,341 @@
* @param phis double[]
*/
public static DoubleArrayList observedEpsilonsAtPhis(DoubleArrayList phis, ExactDoubleQuantileFinder exactFinder, DoubleQuantileFinder approxFinder, double desiredEpsilon) {
- DoubleArrayList epsilons = new DoubleArrayList(phis.size());
-
- for (int i=phis.size(); --i >=0;) {
- double epsilon = observedEpsilonAtPhi(phis.get(i), exactFinder, approxFinder);
- epsilons.add(epsilon);
- if (epsilon>desiredEpsilon) System.out.println("Real epsilon = "+epsilon+" is larger than desired by "+(epsilon-desiredEpsilon));
- }
- return epsilons;
+ DoubleArrayList epsilons = new DoubleArrayList(phis.size());
+
+ for (int i=phis.size(); --i >=0;) {
+ double epsilon = observedEpsilonAtPhi(phis.get(i), exactFinder, approxFinder);
+ epsilons.add(epsilon);
+ if (epsilon>desiredEpsilon) System.out.println("Real epsilon = "+epsilon+" is larger than desired by "+(epsilon-desiredEpsilon));
+ }
+ return epsilons;
}
/**
* Not yet commented.
*/
public static void test() {
- String[] args = new String[20];
-
- String size="10000";
- args[0]=size;
-
- //String b="5";
- String b="12";
- args[1]=b;
-
- String k="2290";
- args[2]=k;
+ String[] args = new String[20];
+
+ String size="10000";
+ args[0]=size;
+
+ //String b="5";
+ String b="12";
+ args[1]=b;
+
+ String k="2290";
+ args[2]=k;
- String enableLogging="log";
- args[3]=enableLogging;
+ String enableLogging="log";
+ args[3]=enableLogging;
- String chunks="10";
- args[4]=chunks;
+ String chunks="10";
+ args[4]=chunks;
- String computeExactQuantilesAlso="exact";
- args[5]=computeExactQuantilesAlso;
+ String computeExactQuantilesAlso="exact";
+ args[5]=computeExactQuantilesAlso;
- String doShuffle="shuffle";
- args[6]=doShuffle;
+ String doShuffle="shuffle";
+ args[6]=doShuffle;
- String epsilon = "0.001";
- args[7]=epsilon;
+ String epsilon = "0.001";
+ args[7]=epsilon;
- String delta = "0.0001";
- //String delta = "0.0001";
- args[8]=delta;
+ String delta = "0.0001";
+ //String delta = "0.0001";
+ args[8]=delta;
- String quantiles = "1";
- args[9]=quantiles;
+ String quantiles = "1";
+ args[9]=quantiles;
- String max_N = "-1";
- args[10]=max_N;
+ String max_N = "-1";
+ args[10]=max_N;
- testQuantileCalculation(args);
+ testQuantileCalculation(args);
}
/**
* This method was created in VisualAge.
*/
public static void testBestBandKCalculation(String[] args) {
- //boolean known_N;
- //if (args==null) known_N = false;
- //else known_N = new Boolean(args[0]).booleanValue();
-
- int[] quantiles = {100,10000};
- //int[] quantiles = {1,100,10000};
-
- long[] sizes = {Long.MAX_VALUE, 1000000, 10000000, 100000000};
-
- double[] deltas = {0.0, 0.1, 0.00001};
- //double[] deltas = {0.0, 0.001, 0.00001, 0.000001};
-
- //double[] epsilons = {0.0, 0.01, 0.001, 0.0001, 0.00001};
- double[] epsilons = {0.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001};
-
-
-
- //if (! known_N) sizes = new long[] {0};
- System.out.println("\n\n");
- //if (known_N)
- // System.out.println("Computing b's and k's for KNOWN N");
- //else
- // System.out.println("Computing b's and k's for UNKNOWN N");
- System.out.println("mem [Math.round(elements/1000.0)]");
- System.out.println("***********************************");
- Timer timer = new Timer().start();
-
- for (int q=0; q<quantiles.length; q++) {
- int p = quantiles[q];
- System.out.println("------------------------------");
- System.out.println("computing for p = "+p);
- for (int s=0; s<sizes.length; s++) {
- long N = sizes[s];
- System.out.println(" ------------------------------");
- System.out.println(" computing for N = "+N);
- for (int e=0; e<epsilons.length; e++) {
- double epsilon = epsilons[e];
- System.out.println(" ------------------------------");
- System.out.println(" computing for e = "+epsilon);
- for (int d=0; d<deltas.length; d++) {
- double delta = deltas[d];
- for (int knownCounter=0; knownCounter<2; knownCounter++) {
- boolean known_N;
- if (knownCounter ==0) known_N = true;
- else known_N = false;
-
- DoubleQuantileFinder finder = QuantileFinderFactory.newDoubleQuantileFinder(known_N, N, epsilon, delta, p, null);
- //System.out.println(finder.getClass().getName());
- /*
- double[] returnSamplingRate = new double[1];
- long[] result;
- if (known_N) {
- result = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
- }
- else {
- result = QuantileFinderFactory.unknown_N_compute_B_and_K(epsilon, delta, p);
- long b1 = result[0];
- long k1 = result[1];
-
- if (N>=0) {
- long[] resultKnown = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
- long b2 = resultKnown[0];
- long k2 = resultKnown[1];
-
- if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
- result = resultKnown;
- }
- }
- }
-
-
- long b = result[0];
- long k = result[1];
- */
- String knownStr = known_N ? " known" : "unknown";
- long mem = finder.totalMemory();
- if (mem==0) mem = N;
- //else if (mem==0 && !known_N && N<0) mem = Long.MAX_VALUE; // actually infinity
- //else if (mem==0 && !known_N && N>=0) mem = N;
- //System.out.print(" (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
- System.out.print(" (known, d)=("+knownStr+", "+delta+") --> ");
- //System.out.print("(mem,b,k,memF");
- System.out.print("(MB,mem");
- //if (known_N) System.out.print(",sampling");
- //System.out.print(")=("+(Math.round(b*k/1000.0))+","+b+","+k+", "+Math.round(b*k*8/1024.0/1024.0));
- //System.out.print(")=("+b*k/1000.0+","+b+","+k+", "+b*k*8/1024.0/1024.0+", "+Math.round(b*k*8/1024.0/1024.0));
- System.out.print(")=("+mem*8.0/1024.0/1024.0+", "+mem/1000.0+", "+Math.round(mem*8.0/1024.0/1024.0));
- //if (known_N) System.out.print(","+returnSamplingRate[0]);
- System.out.println(")");
- }
- }
- }
- }
- }
+ //boolean known_N;
+ //if (args==null) known_N = false;
+ //else known_N = new Boolean(args[0]).booleanValue();
+
+ int[] quantiles = {100,10000};
+ //int[] quantiles = {1,100,10000};
+
+ long[] sizes = {Long.MAX_VALUE, 1000000, 10000000, 100000000};
+
+ double[] deltas = {0.0, 0.1, 0.00001};
+ //double[] deltas = {0.0, 0.001, 0.00001, 0.000001};
+
+ //double[] epsilons = {0.0, 0.01, 0.001, 0.0001, 0.00001};
+ double[] epsilons = {0.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001};
+
+
+
+ //if (! known_N) sizes = new long[] {0};
+ System.out.println("\n\n");
+ //if (known_N)
+ // System.out.println("Computing b's and k's for KNOWN N");
+ //else
+ // System.out.println("Computing b's and k's for UNKNOWN N");
+ System.out.println("mem [Math.round(elements/1000.0)]");
+ System.out.println("***********************************");
+ Timer timer = new Timer().start();
+
+ for (int q=0; q<quantiles.length; q++) {
+ int p = quantiles[q];
+ System.out.println("------------------------------");
+ System.out.println("computing for p = "+p);
+ for (int s=0; s<sizes.length; s++) {
+ long N = sizes[s];
+ System.out.println(" ------------------------------");
+ System.out.println(" computing for N = "+N);
+ for (int e=0; e<epsilons.length; e++) {
+ double epsilon = epsilons[e];
+ System.out.println(" ------------------------------");
+ System.out.println(" computing for e = "+epsilon);
+ for (int d=0; d<deltas.length; d++) {
+ double delta = deltas[d];
+ for (int knownCounter=0; knownCounter<2; knownCounter++) {
+ boolean known_N;
+ if (knownCounter ==0) known_N = true;
+ else known_N = false;
+
+ DoubleQuantileFinder finder = QuantileFinderFactory.newDoubleQuantileFinder(known_N, N, epsilon, delta, p, null);
+ //System.out.println(finder.getClass().getName());
+ /*
+ double[] returnSamplingRate = new double[1];
+ long[] result;
+ if (known_N) {
+ result = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
+ }
+ else {
+ result = QuantileFinderFactory.unknown_N_compute_B_and_K(epsilon, delta, p);
+ long b1 = result[0];
+ long k1 = result[1];
+
+ if (N>=0) {
+ long[] resultKnown = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
+ long b2 = resultKnown[0];
+ long k2 = resultKnown[1];
+
+ if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
+ result = resultKnown;
+ }
+ }
+ }
+
+
+ long b = result[0];
+ long k = result[1];
+ */
+ String knownStr = known_N ? " known" : "unknown";
+ long mem = finder.totalMemory();
+ if (mem==0) mem = N;
+ //else if (mem==0 && !known_N && N<0) mem = Long.MAX_VALUE; // actually infinity
+ //else if (mem==0 && !known_N && N>=0) mem = N;
+ //System.out.print(" (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
+ System.out.print(" (known, d)=("+knownStr+", "+delta+") --> ");
+ //System.out.print("(mem,b,k,memF");
+ System.out.print("(MB,mem");
+ //if (known_N) System.out.print(",sampling");
+ //System.out.print(")=("+(Math.round(b*k/1000.0))+","+b+","+k+", "+Math.round(b*k*8/1024.0/1024.0));
+ //System.out.print(")=("+b*k/1000.0+","+b+","+k+", "+b*k*8/1024.0/1024.0+", "+Math.round(b*k*8/1024.0/1024.0));
+ System.out.print(")=("+mem*8.0/1024.0/1024.0+", "+mem/1000.0+", "+Math.round(mem*8.0/1024.0/1024.0));
+ //if (known_N) System.out.print(","+returnSamplingRate[0]);
+ System.out.println(")");
+ }
+ }
+ }
+ }
+ }
- timer.stop().display();
+ timer.stop().display();
}
/**
* This method was created in VisualAge.
*/
public static void testLocalVarDeclarationSpeed(int size) {
- System.out.println("free="+Runtime.getRuntime().freeMemory());
- System.out.println("total="+Runtime.getRuntime().totalMemory());
+ System.out.println("free="+Runtime.getRuntime().freeMemory());
+ System.out.println("total="+Runtime.getRuntime().totalMemory());
- /*Timer timer = new Timer().start();
- for (int i=0; i<size; i++) {
- for (int j=0; j<size; j++) {
- DoubleBuffer buffer=null;
- int val=10;
- double f=1.0f;
- }
- }
- System.out.println(timer.stop());
- */
-
- Timer timer = new Timer().start();
- DoubleBuffer buffer;
- int val;
- double f;
- int j;
-
- for (int i=0; i<size; i++) {
- for (j=0; j<size; j++) {
- buffer=null;
- val=10;
- f=1.0f;
- }
- }
- System.out.println(timer.stop());
+ /*Timer timer = new Timer().start();
+ for (int i=0; i<size; i++) {
+ for (int j=0; j<size; j++) {
+ DoubleBuffer buffer=null;
+ int val=10;
+ double f=1.0f;
+ }
+ }
+ System.out.println(timer.stop());
+ */
+
+ Timer timer = new Timer().start();
+ DoubleBuffer buffer;
+ int val;
+ double f;
+ int j;
+
+ for (int i=0; i<size; i++) {
+ for (j=0; j<size; j++) {
+ buffer=null;
+ val=10;
+ f=1.0f;
+ }
+ }
+ System.out.println(timer.stop());
- System.out.println("free="+Runtime.getRuntime().freeMemory());
- System.out.println("total="+Runtime.getRuntime().totalMemory());
+ System.out.println("free="+Runtime.getRuntime().freeMemory());
+ System.out.println("total="+Runtime.getRuntime().totalMemory());
}
/**
*/
public static void testQuantileCalculation(String[] args) {
- int size=Integer.parseInt(args[0]);
- int b=Integer.parseInt(args[1]);
- int k=Integer.parseInt(args[2]);
- //cern.it.util.Log.enableLogging(args[3].equals("log"));
- int chunks=Integer.parseInt(args[4]);
- boolean computeExactQuantilesAlso=args[5].equals("exact");
- boolean doShuffle=args[6].equals("shuffle");
- double epsilon = new Double(args[7]).doubleValue();
- double delta = new Double(args[8]).doubleValue();
- int quantiles = Integer.parseInt(args[9]);
- long max_N = Long.parseLong(args[10]);
-
-
-
- System.out.println("free="+Runtime.getRuntime().freeMemory());
- System.out.println("total="+Runtime.getRuntime().totalMemory());
-
- double[] phis = {0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0};
- //int quantiles = phis.length;
-
- Timer timer = new Timer();
- Timer timer2 = new Timer();
- DoubleQuantileFinder approxFinder;
-
- approxFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, max_N, epsilon, delta, quantiles, null);
- System.out.println(approxFinder);
- //new UnknownApproximateDoubleQuantileFinder(b,k);
- //approxFinder = new ApproximateDoubleQuantileFinder(b,k);
- /*
- double[] returnSamplingRate = new double[1];
- long[] result = ApproximateQuantileFinder.computeBestBandK(size*chunks, epsilon, delta, quantiles, returnSamplingRate);
- approxFinder = new ApproximateQuantileFinder((int) result[0], (int) result[1]);
- System.out.println("epsilon="+epsilon);
- System.out.println("delta="+delta);
- System.out.println("samplingRate="+returnSamplingRate[0]);
- */
-
-
- DoubleQuantileFinder exactFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, -1, 0.0, delta, quantiles, null);
- System.out.println(exactFinder);
-
- DoubleArrayList list = new DoubleArrayList(size);
-
- for (int chunk=0; chunk<chunks; chunk++) {
- list.setSize(0);
- int d = chunk*size;
- timer2.start();
- for (int i=0; i<size; i++) {
- list.add((double)(i + d));
- }
- timer2.stop();
-
-
-
- //System.out.println("unshuffled="+list);
- if (doShuffle) {
- Timer timer3 = new Timer().start();
- list.shuffle();
- System.out.println("shuffling took ");
- timer3.stop().display();
- }
- //System.out.println("shuffled="+list);
- //list.sort();
- //System.out.println("sorted="+list);
-
- timer.start();
- approxFinder.addAllOf(list);
- timer.stop();
-
- if (computeExactQuantilesAlso) {
- exactFinder.addAllOf(list);
- }
-
- }
- System.out.println("list.add() took" + timer2);
- System.out.println("approxFinder.add() took" + timer);
-
- //System.out.println("free="+Runtime.getRuntime().freeMemory());
- //System.out.println("total="+Runtime.getRuntime().totalMemory());
-
- timer.reset().start();
-
- //approxFinder.close();
- DoubleArrayList approxQuantiles = approxFinder.quantileElements(new DoubleArrayList(phis));
-
- timer.stop().display();
-
- System.out.println("Phis="+new DoubleArrayList(phis));
- System.out.println("ApproxQuantiles="+approxQuantiles);
-
- //System.out.println("MaxLevel of full buffers="+maxLevelOfFullBuffers(approxFinder.bufferSet));
-
- //System.out.println("total buffers filled="+ approxFinder.totalBuffersFilled);
- //System.out.println("free="+Runtime.getRuntime().freeMemory());
- //System.out.println("total="+Runtime.getRuntime().totalMemory());
-
-
- if (computeExactQuantilesAlso) {
- System.out.println("Comparing with exact quantile computation...");
-
- timer.reset().start();
-
- //exactFinder.close();
- DoubleArrayList exactQuantiles = exactFinder.quantileElements(new DoubleArrayList(phis));
- timer.stop().display();
-
- System.out.println("ExactQuantiles="+exactQuantiles);
-
-
- //double[] errors1 = errors1(exactQuantiles.elements(), approxQuantiles.elements());
- //System.out.println("Error1="+new DoubleArrayList(errors1));
-
- /*
- final DoubleArrayList buffer = new DoubleArrayList((int)exactFinder.size());
- exactFinder.forEach(
- new org.apache.mahout.matrix.function.DoubleFunction() {
- public void apply(double element) {
- buffer.add(element);
- }
- }
- );
- */
-
-
- DoubleArrayList observedEpsilons = observedEpsilonsAtPhis(new DoubleArrayList(phis), (ExactDoubleQuantileFinder) exactFinder, approxFinder, epsilon);
- System.out.println("observedEpsilons="+observedEpsilons);
-
- double element = 1000.0f;
-
-
- System.out.println("exact phi("+element+")="+exactFinder.phi(element));
- System.out.println("apprx phi("+element+")="+approxFinder.phi(element));
-
- System.out.println("exact elem(phi("+element+"))="+exactFinder.quantileElements(new DoubleArrayList(new double[] {exactFinder.phi(element)})));
- System.out.println("apprx elem(phi("+element+"))="+approxFinder.quantileElements(new DoubleArrayList(new double[] {approxFinder.phi(element)})));
- }
+ int size=Integer.parseInt(args[0]);
+ int b=Integer.parseInt(args[1]);
+ int k=Integer.parseInt(args[2]);
+ //cern.it.util.Log.enableLogging(args[3].equals("log"));
+ int chunks=Integer.parseInt(args[4]);
+ boolean computeExactQuantilesAlso=args[5].equals("exact");
+ boolean doShuffle=args[6].equals("shuffle");
+ double epsilon = new Double(args[7]).doubleValue();
+ double delta = new Double(args[8]).doubleValue();
+ int quantiles = Integer.parseInt(args[9]);
+ long max_N = Long.parseLong(args[10]);
+
+
+
+ System.out.println("free="+Runtime.getRuntime().freeMemory());
+ System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+ double[] phis = {0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0};
+ //int quantiles = phis.length;
+
+ Timer timer = new Timer();
+ Timer timer2 = new Timer();
+ DoubleQuantileFinder approxFinder;
+
+ approxFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, max_N, epsilon, delta, quantiles, null);
+ System.out.println(approxFinder);
+ //new UnknownApproximateDoubleQuantileFinder(b,k);
+ //approxFinder = new ApproximateDoubleQuantileFinder(b,k);
+ /*
+ double[] returnSamplingRate = new double[1];
+ long[] result = ApproximateQuantileFinder.computeBestBandK(size*chunks, epsilon, delta, quantiles, returnSamplingRate);
+ approxFinder = new ApproximateQuantileFinder((int) result[0], (int) result[1]);
+ System.out.println("epsilon="+epsilon);
+ System.out.println("delta="+delta);
+ System.out.println("samplingRate="+returnSamplingRate[0]);
+ */
+
+
+ DoubleQuantileFinder exactFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, -1, 0.0, delta, quantiles, null);
+ System.out.println(exactFinder);
+
+ DoubleArrayList list = new DoubleArrayList(size);
+
+ for (int chunk=0; chunk<chunks; chunk++) {
+ list.setSize(0);
+ int d = chunk*size;
+ timer2.start();
+ for (int i=0; i<size; i++) {
+ list.add((double)(i + d));
+ }
+ timer2.stop();
+
+
+
+ //System.out.println("unshuffled="+list);
+ if (doShuffle) {
+ Timer timer3 = new Timer().start();
+ list.shuffle();
+ System.out.println("shuffling took ");
+ timer3.stop().display();
+ }
+ //System.out.println("shuffled="+list);
+ //list.sort();
+ //System.out.println("sorted="+list);
+
+ timer.start();
+ approxFinder.addAllOf(list);
+ timer.stop();
+
+ if (computeExactQuantilesAlso) {
+ exactFinder.addAllOf(list);
+ }
+
+ }
+ System.out.println("list.add() took" + timer2);
+ System.out.println("approxFinder.add() took" + timer);
+
+ //System.out.println("free="+Runtime.getRuntime().freeMemory());
+ //System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+ timer.reset().start();
+
+ //approxFinder.close();
+ DoubleArrayList approxQuantiles = approxFinder.quantileElements(new DoubleArrayList(phis));
+
+ timer.stop().display();
+
+ System.out.println("Phis="+new DoubleArrayList(phis));
+ System.out.println("ApproxQuantiles="+approxQuantiles);
+
+ //System.out.println("MaxLevel of full buffers="+maxLevelOfFullBuffers(approxFinder.bufferSet));
+
+ //System.out.println("total buffers filled="+ approxFinder.totalBuffersFilled);
+ //System.out.println("free="+Runtime.getRuntime().freeMemory());
+ //System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+
+ if (computeExactQuantilesAlso) {
+ System.out.println("Comparing with exact quantile computation...");
+
+ timer.reset().start();
+
+ //exactFinder.close();
+ DoubleArrayList exactQuantiles = exactFinder.quantileElements(new DoubleArrayList(phis));
+ timer.stop().display();
+
+ System.out.println("ExactQuantiles="+exactQuantiles);
+
+
+ //double[] errors1 = errors1(exactQuantiles.elements(), approxQuantiles.elements());
+ //System.out.println("Error1="+new DoubleArrayList(errors1));
+
+ /*
+ final DoubleArrayList buffer = new DoubleArrayList((int)exactFinder.size());
+ exactFinder.forEach(
+ new org.apache.mahout.matrix.function.DoubleFunction() {
+ public void apply(double element) {
+ buffer.add(element);
+ }
+ }
+ );
+ */
+
+
+ DoubleArrayList observedEpsilons = observedEpsilonsAtPhis(new DoubleArrayList(phis), (ExactDoubleQuantileFinder) exactFinder, approxFinder, epsilon);
+ System.out.println("observedEpsilons="+observedEpsilons);
+
+ double element = 1000.0f;
+
+
+ System.out.println("exact phi("+element+")="+exactFinder.phi(element));
+ System.out.println("apprx phi("+element+")="+approxFinder.phi(element));
+
+ System.out.println("exact elem(phi("+element+"))="+exactFinder.quantileElements(new DoubleArrayList(new double[] {exactFinder.phi(element)})));
+ System.out.println("apprx elem(phi("+element+"))="+approxFinder.quantileElements(new DoubleArrayList(new double[] {approxFinder.phi(element)})));
+ }
}
/**
* Not yet commented.
*/
public static void testRank() {
- DoubleArrayList list = new DoubleArrayList(new double[] {1.0f, 5.0f, 5.0f, 5.0f, 7.0f, 10.f});
- //System.out.println(rankOfWithin(5.0f, list));
+ DoubleArrayList list = new DoubleArrayList(new double[] {1.0f, 5.0f, 5.0f, 5.0f, 7.0f, 10.f});
+ //System.out.println(rankOfWithin(5.0f, list));
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java Wed Nov 25 03:31:47 2009
@@ -32,16 +32,14 @@
* Accepted for Proc. of the 1999 ACM SIGMOD Int. Conf. on Management of Data,
* Paper (soon) available <A HREF="http://www-cad.eecs.berkeley.edu/~manku"> here</A>.
*
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
* @see QuantileFinderFactory
* @see KnownApproximateDoubleQuantileFinder
*/
class UnknownDoubleQuantileEstimator extends DoubleQuantileEstimator {
- protected int currentTreeHeight;
- final protected int treeHeightStartingSampling;
- protected WeightedRandomSampler sampler;
- protected double precomputeEpsilon;
+ protected int currentTreeHeight;
+ final protected int treeHeightStartingSampling;
+ protected WeightedRandomSampler sampler;
+ protected double precomputeEpsilon;
/**
* Constructs an approximate quantile finder with b buffers, each having k elements.
* @param b the number of buffers
@@ -51,36 +49,36 @@
* @param generator a uniform random number generator.
*/
public UnknownDoubleQuantileEstimator(int b, int k, int h, double precomputeEpsilon, RandomEngine generator) {
- this.sampler = new WeightedRandomSampler(1,generator);
- setUp(b,k);
- this.treeHeightStartingSampling = h;
- this.precomputeEpsilon = precomputeEpsilon;
- this.clear();
+ this.sampler = new WeightedRandomSampler(1,generator);
+ setUp(b,k);
+ this.treeHeightStartingSampling = h;
+ this.precomputeEpsilon = precomputeEpsilon;
+ this.clear();
}
/**
* Not yet commented.
*/
protected DoubleBuffer[] buffersToCollapse() {
- DoubleBuffer[] fullBuffers = bufferSet._getFullOrPartialBuffers();
+ DoubleBuffer[] fullBuffers = bufferSet._getFullOrPartialBuffers();
- sortAscendingByLevel(fullBuffers);
-
- // if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
- int minLevel = fullBuffers[1].level();
- if (fullBuffers[0].level() < minLevel) {
- fullBuffers[0].level(minLevel);
- }
+ sortAscendingByLevel(fullBuffers);
+
+ // if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
+ int minLevel = fullBuffers[1].level();
+ if (fullBuffers[0].level() < minLevel) {
+ fullBuffers[0].level(minLevel);
+ }
- return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
+ return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
}
/**
* Removes all elements from the receiver. The receiver will
* be empty after this call returns, and its memory requirements will be close to zero.
*/
public synchronized void clear() {
- super.clear();
- this.currentTreeHeight = 1;
- this.sampler.setWeight(1);
+ super.clear();
+ this.currentTreeHeight = 1;
+ this.sampler.setWeight(1);
}
/**
* Returns a deep copy of the receiver.
@@ -88,30 +86,30 @@
* @return a deep copy of the receiver.
*/
public Object clone() {
- UnknownDoubleQuantileEstimator copy = (UnknownDoubleQuantileEstimator) super.clone();
- if (this.sampler != null) copy.sampler = (WeightedRandomSampler) copy.sampler.clone();
- return copy;
+ UnknownDoubleQuantileEstimator copy = (UnknownDoubleQuantileEstimator) super.clone();
+ if (this.sampler != null) copy.sampler = (WeightedRandomSampler) copy.sampler.clone();
+ return copy;
}
/**
* Not yet commented.
*/
protected void newBuffer() {
- currentBufferToFill = bufferSet._getFirstEmptyBuffer();
- if (currentBufferToFill==null) throw new RuntimeException("Oops, no empty buffer.");
+ currentBufferToFill = bufferSet._getFirstEmptyBuffer();
+ if (currentBufferToFill==null) throw new RuntimeException("Oops, no empty buffer.");
- currentBufferToFill.level(currentTreeHeight - 1);
- currentBufferToFill.weight(sampler.getWeight());
+ currentBufferToFill.level(currentTreeHeight - 1);
+ currentBufferToFill.weight(sampler.getWeight());
}
/**
* Not yet commented.
*/
protected void postCollapse(DoubleBuffer[] toCollapse) {
- if (toCollapse.length == bufferSet.b()) { //delta for unknown finder
- currentTreeHeight++;
- if (currentTreeHeight>=treeHeightStartingSampling) {
- sampler.setWeight(sampler.getWeight() * 2);
- }
- }
+ if (toCollapse.length == bufferSet.b()) { //delta for unknown finder
+ currentTreeHeight++;
+ if (currentTreeHeight>=treeHeightStartingSampling) {
+ sampler.setWeight(sampler.getWeight() * 2);
+ }
+ }
}
/**
* Computes the specified quantile elements over the values previously added.
@@ -119,56 +117,56 @@
* @return the approximate quantile elements.
*/
public DoubleArrayList quantileElements(DoubleArrayList phis) {
- if (precomputeEpsilon<=0.0) return super.quantileElements(phis);
-
- int quantilesToPrecompute = (int) Utils.epsilonCeiling(1.0 / precomputeEpsilon);
- /*
- if (phis.size() > quantilesToPrecompute) {
- // illegal use case!
- // we compute results, but loose explicit approximation guarantees.
- return super.quantileElements(phis);
- }
- */
+ if (precomputeEpsilon<=0.0) return super.quantileElements(phis);
+
+ int quantilesToPrecompute = (int) Utils.epsilonCeiling(1.0 / precomputeEpsilon);
+ /*
+ if (phis.size() > quantilesToPrecompute) {
+ // illegal use case!
+ // we compute results, but loose explicit approximation guarantees.
+ return super.quantileElements(phis);
+ }
+ */
- //select that quantile from the precomputed set that corresponds to a position closest to phi.
- phis = phis.copy();
- double e = precomputeEpsilon;
- for (int index=phis.size(); --index >= 0;) {
- double phi = phis.get(index);
- int i = (int) Math.round( ((2.0*phi/e) - 1.0 ) / 2.0); // finds closest
- i = Math.min(quantilesToPrecompute-1, Math.max(0,i));
- double augmentedPhi = (e/2.0)*(1+2*i);
- phis.set(index,augmentedPhi);
- }
-
- return super.quantileElements(phis);
+ //select that quantile from the precomputed set that corresponds to a position closest to phi.
+ phis = phis.copy();
+ double e = precomputeEpsilon;
+ for (int index=phis.size(); --index >= 0;) {
+ double phi = phis.get(index);
+ int i = (int) Math.round( ((2.0*phi/e) - 1.0 ) / 2.0); // finds closest
+ i = Math.min(quantilesToPrecompute-1, Math.max(0,i));
+ double augmentedPhi = (e/2.0)*(1+2*i);
+ phis.set(index,augmentedPhi);
+ }
+
+ return super.quantileElements(phis);
}
/**
* Not yet commented.
*/
protected boolean sampleNextElement() {
- return sampler.sampleNextElement();
+ return sampler.sampleNextElement();
}
/**
* To do. This could faster be done without sorting (min and second min).
*/
protected static void sortAscendingByLevel(DoubleBuffer[] fullBuffers) {
- new ObjectArrayList(fullBuffers).quickSortFromTo(0,fullBuffers.length-1,
- new java.util.Comparator() {
- public int compare(Object o1, Object o2) {
- int l1 = ((DoubleBuffer) o1).level();
- int l2 = ((DoubleBuffer) o2).level();
- return l1<l2? -1: l1==l2? 0: +1;
- }
- }
- );
+ new ObjectArrayList(fullBuffers).quickSortFromTo(0,fullBuffers.length-1,
+ new java.util.Comparator() {
+ public int compare(Object o1, Object o2) {
+ int l1 = ((DoubleBuffer) o1).level();
+ int l2 = ((DoubleBuffer) o2).level();
+ return l1<l2? -1: l1==l2? 0: +1;
+ }
+ }
+ );
}
/**
* Returns a String representation of the receiver.
*/
public String toString() {
- StringBuffer buf = new StringBuffer(super.toString());
- buf.setLength(buf.length()-1);
- return buf + ", h="+currentTreeHeight+", hStartSampling="+treeHeightStartingSampling+", precomputeEpsilon="+precomputeEpsilon+")";
+ StringBuffer buf = new StringBuffer(super.toString());
+ buf.setLength(buf.length()-1);
+ return buf + ", h="+currentTreeHeight+", hStartSampling="+treeHeightStartingSampling+", precomputeEpsilon="+precomputeEpsilon+")";
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java Wed Nov 25 03:31:47 2009
@@ -16,13 +16,13 @@
* Makes this class non instantiable, but still let's others inherit from it.
*/
protected Utils() {
- throw new RuntimeException("Non instantiable");
+ throw new RuntimeException("Non instantiable");
}
/**
* Similar to Math.ceil(value), but adjusts small numerical rounding errors +- epsilon.
*/
public static long epsilonCeiling(double value) {
- double epsilon = 0.0000001;
- return (long) Math.ceil(value - epsilon);
+ double epsilon = 0.0000001;
+ return (long) Math.ceil(value - epsilon);
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html Wed Nov 25 03:31:47 2009
@@ -18,8 +18,8 @@
<dt>5. A space efficient value preserving bin of a 2-dim or d-dim histogram.
<dt>(All subject to an accuracy specified by the user.)
- Have a look at the documentation of class {@link cern.jet.stat.quantile.QuantileFinderFactory} and the interface
- {@link cern.jet.stat.quantile.DoubleQuantileFinder} to learn more.
+ Have a look at the documentation of class {@link org.apache.mahout.jet.stat.quantile.QuantileFinderFactory} and the interface
+ {@link org.apache.mahout.jet.stat.quantile.DoubleQuantileFinder} to learn more.
Most users will never need to know more than how to use these.
Actual implementations of the <tt>QuantileFinder</tt> interface are hidden.
They are indirectly constructed via the the factory.