You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/25 04:31:49 UTC

svn commit: r883972 [10/10] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ matrix/src/main/java/org/apache/mahout/jet/math/ matrix/src/main/java/org/apache/mahout/jet/random/ matrix/src/main/java/org/apache/mahout/jet/ra...

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderTest.java Wed Nov 25 03:31:47 2009
@@ -22,47 +22,47 @@
  * @param element the element to search for
  */
 protected static IntArrayList binaryMultiSearch(DoubleArrayList list, double element) {
-	int index = list.binarySearch(element);
-	if (index<0) return null; //not found
+  int index = list.binarySearch(element);
+  if (index<0) return null; //not found
 
-	int from = index-1;
-	while (from>=0 && list.get(from)==element) from--;
-	from++;
-	
-	int to = index+1;
-	while (to<list.size() && list.get(to)==element) to++;
-	to--;
-	
-	return new IntArrayList(new int[] {from,to});
+  int from = index-1;
+  while (from>=0 && list.get(from)==element) from--;
+  from++;
+  
+  int to = index+1;
+  while (to<list.size() && list.get(to)==element) to++;
+  to--;
+  
+  return new IntArrayList(new int[] {from,to});
 }
 /**
  * Observed epsilon
  */
 public static double epsilon(int size, double phi, double rank) {
-	double s = size;
-	//System.out.println("\n");
-	//System.out.println("s="+size+", rank="+rank+", phi="+phi+", eps="+Math.abs((rank)/s - phi));
-	//System.out.println("\n");
-	return Math.abs(rank/s - phi);
+  double s = size;
+  //System.out.println("\n");
+  //System.out.println("s="+size+", rank="+rank+", phi="+phi+", eps="+Math.abs((rank)/s - phi));
+  //System.out.println("\n");
+  return Math.abs(rank/s - phi);
 }
 /**
  * Observed epsilon
  */
 public static double epsilon(DoubleArrayList sortedList, double phi, double element) {
-	double rank = org.apache.mahout.jet.stat.Descriptive.rankInterpolated(sortedList,element);
-	return epsilon(sortedList.size(), phi, rank);
+  double rank = org.apache.mahout.jet.stat.Descriptive.rankInterpolated(sortedList,element);
+  return epsilon(sortedList.size(), phi, rank);
 }
 /**
  * Observed epsilon
  */
 public static double epsilon(DoubleArrayList sortedList, DoubleQuantileFinder finder, double phi) {
-	double element = finder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
-	return epsilon(sortedList, phi, element); 
+  double element = finder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0);
+  return epsilon(sortedList, phi, element); 
 }
 public static void main(String[] args) {
-	testBestBandKCalculation(args);
-	//testQuantileCalculation(args);
-	//testCollapse();
+  testBestBandKCalculation(args);
+  //testQuantileCalculation(args);
+  //testCollapse();
 }
 /**
  * This method was created in VisualAge.
@@ -71,26 +71,26 @@
  * @param phis double[]
  */
 public static double observedEpsilonAtPhi(double phi, ExactDoubleQuantileFinder exactFinder, DoubleQuantileFinder approxFinder) {
-	int N = (int) exactFinder.size();
-	
-	int exactRank = (int) Utils.epsilonCeiling(phi * N) - 1;
-	//System.out.println("exactRank="+exactRank);
-	exactFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); // just to ensure exactFinder is sorted
-	double approxElement = approxFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); 
-	//System.out.println("approxElem="+approxElement);
-	IntArrayList approxRanks = binaryMultiSearch(exactFinder.buffer, approxElement);
-	int from = approxRanks.get(0);
-	int to = approxRanks.get(1);
-
-	int distance;
-	if (from<=exactRank && exactRank<=to) distance = 0;
-	else {
-		if (from>exactRank) distance=Math.abs(from-exactRank);
-		else distance=Math.abs(exactRank-to);
-	}
+  int N = (int) exactFinder.size();
+  
+  int exactRank = (int) Utils.epsilonCeiling(phi * N) - 1;
+  //System.out.println("exactRank="+exactRank);
+  exactFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); // just to ensure exactFinder is sorted
+  double approxElement = approxFinder.quantileElements(new DoubleArrayList(new double[] {phi})).get(0); 
+  //System.out.println("approxElem="+approxElement);
+  IntArrayList approxRanks = binaryMultiSearch(exactFinder.buffer, approxElement);
+  int from = approxRanks.get(0);
+  int to = approxRanks.get(1);
+
+  int distance;
+  if (from<=exactRank && exactRank<=to) distance = 0;
+  else {
+    if (from>exactRank) distance=Math.abs(from-exactRank);
+    else distance=Math.abs(exactRank-to);
+  }
 
-	double epsilon = (double)distance / (double)N;
-	return epsilon;
+  double epsilon = (double)distance / (double)N;
+  return epsilon;
 }
 /**
  * This method was created in VisualAge.
@@ -99,341 +99,341 @@
  * @param phis double[]
  */
 public static DoubleArrayList observedEpsilonsAtPhis(DoubleArrayList phis, ExactDoubleQuantileFinder exactFinder, DoubleQuantileFinder approxFinder, double desiredEpsilon) {
-	DoubleArrayList epsilons = new DoubleArrayList(phis.size());
-	
-	for (int i=phis.size(); --i >=0;) {
-		double epsilon = observedEpsilonAtPhi(phis.get(i), exactFinder, approxFinder);
-		epsilons.add(epsilon);
-		if (epsilon>desiredEpsilon) System.out.println("Real epsilon = "+epsilon+" is larger than desired by "+(epsilon-desiredEpsilon));
-	}
-	return epsilons;
+  DoubleArrayList epsilons = new DoubleArrayList(phis.size());
+  
+  for (int i=phis.size(); --i >=0;) {
+    double epsilon = observedEpsilonAtPhi(phis.get(i), exactFinder, approxFinder);
+    epsilons.add(epsilon);
+    if (epsilon>desiredEpsilon) System.out.println("Real epsilon = "+epsilon+" is larger than desired by "+(epsilon-desiredEpsilon));
+  }
+  return epsilons;
 }
 /**
  * Not yet commented.
  */
 public static void test() {
-	String[] args = new String[20];
-	
-	String size="10000";
-	args[0]=size;
-	
-	//String b="5";
-	String b="12";
-	args[1]=b;
-	
-	String k="2290";
-	args[2]=k;
+  String[] args = new String[20];
+  
+  String size="10000";
+  args[0]=size;
+  
+  //String b="5";
+  String b="12";
+  args[1]=b;
+  
+  String k="2290";
+  args[2]=k;
 
-	String enableLogging="log";
-	args[3]=enableLogging;
+  String enableLogging="log";
+  args[3]=enableLogging;
 
-	String chunks="10";
-	args[4]=chunks;
+  String chunks="10";
+  args[4]=chunks;
 
-	String computeExactQuantilesAlso="exact";
-	args[5]=computeExactQuantilesAlso;
+  String computeExactQuantilesAlso="exact";
+  args[5]=computeExactQuantilesAlso;
 
-	String doShuffle="shuffle";
-	args[6]=doShuffle;
+  String doShuffle="shuffle";
+  args[6]=doShuffle;
 
-	String epsilon = "0.001";
-	args[7]=epsilon;
+  String epsilon = "0.001";
+  args[7]=epsilon;
 
-	String delta = "0.0001";
-	//String delta = "0.0001";
-	args[8]=delta;
+  String delta = "0.0001";
+  //String delta = "0.0001";
+  args[8]=delta;
 
-	String quantiles = "1";
-	args[9]=quantiles;
+  String quantiles = "1";
+  args[9]=quantiles;
 
-	String max_N = "-1";
-	args[10]=max_N;
+  String max_N = "-1";
+  args[10]=max_N;
 
 
-	testQuantileCalculation(args);
+  testQuantileCalculation(args);
 }
 /**
  * This method was created in VisualAge.
  */
 public static void testBestBandKCalculation(String[] args) {
-	//boolean known_N;
-	//if (args==null) known_N = false;
-	//else known_N = new Boolean(args[0]).booleanValue();
-
-	int[] quantiles = {100,10000};
-	//int[] quantiles = {1,100,10000};
-	
-	long[] sizes = {Long.MAX_VALUE, 1000000, 10000000, 100000000};
-	
-	double[] deltas = {0.0, 0.1, 0.00001};
-	//double[] deltas = {0.0, 0.001, 0.00001, 0.000001};
-	
-	//double[] epsilons = {0.0, 0.01, 0.001, 0.0001, 0.00001};
-	double[] epsilons = {0.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001};
-
-
-	
-	//if (! known_N) sizes = new long[] {0};
-	System.out.println("\n\n");
-	//if (known_N) 
-	//	System.out.println("Computing b's and k's for KNOWN N");
-	//else 
-	//	System.out.println("Computing b's and k's for UNKNOWN N");
-	System.out.println("mem [Math.round(elements/1000.0)]");
-	System.out.println("***********************************");
-	Timer timer = new Timer().start();
-
-	for (int q=0; q<quantiles.length; q++) {
-		int p = quantiles[q];
-		System.out.println("------------------------------");
-		System.out.println("computing for p = "+p);
-		for (int s=0; s<sizes.length; s++) {
-			long N = sizes[s];
-			System.out.println("   ------------------------------");
-			System.out.println("   computing for N = "+N);
-			for (int e=0; e<epsilons.length; e++) {
-				double epsilon = epsilons[e];
-				System.out.println("      ------------------------------");
-				System.out.println("      computing for e = "+epsilon);
-				for (int d=0; d<deltas.length; d++) {
-					double delta = deltas[d];
-					for (int knownCounter=0; knownCounter<2; knownCounter++) {
-						boolean known_N;
-						if (knownCounter ==0) known_N = true;
-						else known_N = false;
-
-						DoubleQuantileFinder finder = QuantileFinderFactory.newDoubleQuantileFinder(known_N, N, epsilon, delta, p, null);
-						//System.out.println(finder.getClass().getName());
-						/*
-						double[] returnSamplingRate = new double[1];
-						long[] result;
-						if (known_N) {
-							result = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
-						}
-						else {
-							result = QuantileFinderFactory.unknown_N_compute_B_and_K(epsilon, delta, p);
-							long b1 = result[0];
-							long k1 = result[1];
-
-							if (N>=0) {
-								long[] resultKnown = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
-								long b2 = resultKnown[0];
-								long k2 = resultKnown[1];
-				
-								if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
-									result = resultKnown;
-								}
-							}
-						}
-						
-
-						long b = result[0];
-						long k = result[1];
-						*/
-						String knownStr = known_N ? "  known" : "unknown";
-						long mem = finder.totalMemory();
-						if (mem==0) mem = N; 
-						//else if (mem==0 && !known_N && N<0) mem = Long.MAX_VALUE; // actually infinity
-						//else if (mem==0 && !known_N && N>=0) mem = N;
-						//System.out.print("         (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
-						System.out.print("         (known, d)=("+knownStr+", "+delta+") --> ");
-						//System.out.print("(mem,b,k,memF");
-						System.out.print("(MB,mem");
-						//if (known_N) System.out.print(",sampling");
-						//System.out.print(")=("+(Math.round(b*k/1000.0))+","+b+","+k+", "+Math.round(b*k*8/1024.0/1024.0));
-						//System.out.print(")=("+b*k/1000.0+","+b+","+k+", "+b*k*8/1024.0/1024.0+", "+Math.round(b*k*8/1024.0/1024.0));
-						System.out.print(")=("+mem*8.0/1024.0/1024.0+",  "+mem/1000.0+",  "+Math.round(mem*8.0/1024.0/1024.0));
-						//if (known_N) System.out.print(","+returnSamplingRate[0]);
-						System.out.println(")");
-					}
-				}
-			}
-		}
-	}
+  //boolean known_N;
+  //if (args==null) known_N = false;
+  //else known_N = new Boolean(args[0]).booleanValue();
+
+  int[] quantiles = {100,10000};
+  //int[] quantiles = {1,100,10000};
+  
+  long[] sizes = {Long.MAX_VALUE, 1000000, 10000000, 100000000};
+  
+  double[] deltas = {0.0, 0.1, 0.00001};
+  //double[] deltas = {0.0, 0.001, 0.00001, 0.000001};
+  
+  //double[] epsilons = {0.0, 0.01, 0.001, 0.0001, 0.00001};
+  double[] epsilons = {0.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001};
+
+
+  
+  //if (! known_N) sizes = new long[] {0};
+  System.out.println("\n\n");
+  //if (known_N) 
+  //  System.out.println("Computing b's and k's for KNOWN N");
+  //else 
+  //  System.out.println("Computing b's and k's for UNKNOWN N");
+  System.out.println("mem [Math.round(elements/1000.0)]");
+  System.out.println("***********************************");
+  Timer timer = new Timer().start();
+
+  for (int q=0; q<quantiles.length; q++) {
+    int p = quantiles[q];
+    System.out.println("------------------------------");
+    System.out.println("computing for p = "+p);
+    for (int s=0; s<sizes.length; s++) {
+      long N = sizes[s];
+      System.out.println("   ------------------------------");
+      System.out.println("   computing for N = "+N);
+      for (int e=0; e<epsilons.length; e++) {
+        double epsilon = epsilons[e];
+        System.out.println("      ------------------------------");
+        System.out.println("      computing for e = "+epsilon);
+        for (int d=0; d<deltas.length; d++) {
+          double delta = deltas[d];
+          for (int knownCounter=0; knownCounter<2; knownCounter++) {
+            boolean known_N;
+            if (knownCounter ==0) known_N = true;
+            else known_N = false;
+
+            DoubleQuantileFinder finder = QuantileFinderFactory.newDoubleQuantileFinder(known_N, N, epsilon, delta, p, null);
+            //System.out.println(finder.getClass().getName());
+            /*
+            double[] returnSamplingRate = new double[1];
+            long[] result;
+            if (known_N) {
+              result = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
+            }
+            else {
+              result = QuantileFinderFactory.unknown_N_compute_B_and_K(epsilon, delta, p);
+              long b1 = result[0];
+              long k1 = result[1];
+
+              if (N>=0) {
+                long[] resultKnown = QuantileFinderFactory.known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
+                long b2 = resultKnown[0];
+                long k2 = resultKnown[1];
+        
+                if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
+                  result = resultKnown;
+                }
+              }
+            }
+            
+
+            long b = result[0];
+            long k = result[1];
+            */
+            String knownStr = known_N ? "  known" : "unknown";
+            long mem = finder.totalMemory();
+            if (mem==0) mem = N; 
+            //else if (mem==0 && !known_N && N<0) mem = Long.MAX_VALUE; // actually infinity
+            //else if (mem==0 && !known_N && N>=0) mem = N;
+            //System.out.print("         (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
+            System.out.print("         (known, d)=("+knownStr+", "+delta+") --> ");
+            //System.out.print("(mem,b,k,memF");
+            System.out.print("(MB,mem");
+            //if (known_N) System.out.print(",sampling");
+            //System.out.print(")=("+(Math.round(b*k/1000.0))+","+b+","+k+", "+Math.round(b*k*8/1024.0/1024.0));
+            //System.out.print(")=("+b*k/1000.0+","+b+","+k+", "+b*k*8/1024.0/1024.0+", "+Math.round(b*k*8/1024.0/1024.0));
+            System.out.print(")=("+mem*8.0/1024.0/1024.0+",  "+mem/1000.0+",  "+Math.round(mem*8.0/1024.0/1024.0));
+            //if (known_N) System.out.print(","+returnSamplingRate[0]);
+            System.out.println(")");
+          }
+        }
+      }
+    }
+  }
 
-	timer.stop().display();
+  timer.stop().display();
 }
 /**
  * This method was created in VisualAge.
  */
 public static void testLocalVarDeclarationSpeed(int size) {
-	System.out.println("free="+Runtime.getRuntime().freeMemory());
-	System.out.println("total="+Runtime.getRuntime().totalMemory());
+  System.out.println("free="+Runtime.getRuntime().freeMemory());
+  System.out.println("total="+Runtime.getRuntime().totalMemory());
 
-	/*Timer timer = new Timer().start();
-	for (int i=0; i<size; i++) {
-		for (int j=0; j<size; j++) {
-			DoubleBuffer buffer=null;
-			int val=10;
-			double f=1.0f;
-		}
-	}
-	System.out.println(timer.stop());
-	*/
-	
-	Timer timer = new Timer().start();
-	DoubleBuffer buffer;
-	int val;
-	double f;
-	int j;
-
-	for (int i=0; i<size; i++) {
-		for (j=0; j<size; j++) {
-			buffer=null;
-			val=10;
-			f=1.0f;
-		}
-	}
-	System.out.println(timer.stop());
+  /*Timer timer = new Timer().start();
+  for (int i=0; i<size; i++) {
+    for (int j=0; j<size; j++) {
+      DoubleBuffer buffer=null;
+      int val=10;
+      double f=1.0f;
+    }
+  }
+  System.out.println(timer.stop());
+  */
+  
+  Timer timer = new Timer().start();
+  DoubleBuffer buffer;
+  int val;
+  double f;
+  int j;
+
+  for (int i=0; i<size; i++) {
+    for (j=0; j<size; j++) {
+      buffer=null;
+      val=10;
+      f=1.0f;
+    }
+  }
+  System.out.println(timer.stop());
 
-	System.out.println("free="+Runtime.getRuntime().freeMemory());
-	System.out.println("total="+Runtime.getRuntime().totalMemory());
+  System.out.println("free="+Runtime.getRuntime().freeMemory());
+  System.out.println("total="+Runtime.getRuntime().totalMemory());
 }
 /**
  */
 public static void testQuantileCalculation(String[] args) {
-	int size=Integer.parseInt(args[0]);
-	int b=Integer.parseInt(args[1]);
-	int k=Integer.parseInt(args[2]);	
-	//cern.it.util.Log.enableLogging(args[3].equals("log"));
-	int chunks=Integer.parseInt(args[4]);
-	boolean computeExactQuantilesAlso=args[5].equals("exact");
-	boolean doShuffle=args[6].equals("shuffle");
-	double epsilon = new Double(args[7]).doubleValue();
-	double delta = new Double(args[8]).doubleValue();
-	int quantiles = Integer.parseInt(args[9]);	
-	long max_N = Long.parseLong(args[10]);	
-
-
-
-	System.out.println("free="+Runtime.getRuntime().freeMemory());
-	System.out.println("total="+Runtime.getRuntime().totalMemory());
-
-	double[] phis = {0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0};
-	//int quantiles = phis.length;
-
-	Timer timer = new Timer();
-	Timer timer2 = new Timer();
-	DoubleQuantileFinder approxFinder;
-
-	approxFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, max_N, epsilon, delta, quantiles, null);
-	System.out.println(approxFinder);
-	//new UnknownApproximateDoubleQuantileFinder(b,k);
-	//approxFinder = new ApproximateDoubleQuantileFinder(b,k);
-	/*
-	double[] returnSamplingRate = new double[1];
-	long[] result = ApproximateQuantileFinder.computeBestBandK(size*chunks, epsilon, delta, quantiles, returnSamplingRate);
-	approxFinder = new ApproximateQuantileFinder((int) result[0], (int) result[1]);
-	System.out.println("epsilon="+epsilon);
-	System.out.println("delta="+delta);
-	System.out.println("samplingRate="+returnSamplingRate[0]);
-	*/
-
-		
-	DoubleQuantileFinder exactFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, -1, 0.0, delta, quantiles, null);
-	System.out.println(exactFinder);
-
-	DoubleArrayList list = new DoubleArrayList(size);
-
-	for (int chunk=0; chunk<chunks; chunk++) {
-		list.setSize(0);
-		int d = chunk*size;
-		timer2.start();
-		for (int i=0; i<size; i++) {
-			list.add((double)(i + d));
-		}
-		timer2.stop();
-		
-
-		
-		//System.out.println("unshuffled="+list);
-		if (doShuffle) {
-			Timer timer3 = new Timer().start();
-			list.shuffle();
-			System.out.println("shuffling took ");
-			timer3.stop().display();
-		}
-		//System.out.println("shuffled="+list);
-		//list.sort();
-		//System.out.println("sorted="+list);
-
-		timer.start();
-		approxFinder.addAllOf(list);
-		timer.stop();
-
-		if (computeExactQuantilesAlso) {
-			exactFinder.addAllOf(list);
-		}
-
-	}
-	System.out.println("list.add() took" + timer2);
-	System.out.println("approxFinder.add() took" + timer);
-
-	//System.out.println("free="+Runtime.getRuntime().freeMemory());
-	//System.out.println("total="+Runtime.getRuntime().totalMemory());
-
-	timer.reset().start();
-
-	//approxFinder.close();
-	DoubleArrayList approxQuantiles = approxFinder.quantileElements(new DoubleArrayList(phis)); 
-
-	timer.stop().display();
-	
-	System.out.println("Phis="+new DoubleArrayList(phis));
-	System.out.println("ApproxQuantiles="+approxQuantiles);
-
-	//System.out.println("MaxLevel of full buffers="+maxLevelOfFullBuffers(approxFinder.bufferSet));
-
-	//System.out.println("total buffers filled="+ approxFinder.totalBuffersFilled);
-	//System.out.println("free="+Runtime.getRuntime().freeMemory());
-	//System.out.println("total="+Runtime.getRuntime().totalMemory());
-
-
-	if (computeExactQuantilesAlso) {
-		System.out.println("Comparing with exact quantile computation...");
-
-		timer.reset().start();
-
-		//exactFinder.close();
-		DoubleArrayList exactQuantiles = exactFinder.quantileElements(new DoubleArrayList(phis));
-		timer.stop().display();
-
-		System.out.println("ExactQuantiles="+exactQuantiles);
-
-
-		//double[] errors1 = errors1(exactQuantiles.elements(), approxQuantiles.elements());
-		//System.out.println("Error1="+new DoubleArrayList(errors1));
-
-		/*
-		final DoubleArrayList buffer = new DoubleArrayList((int)exactFinder.size());
-		exactFinder.forEach(
-			new org.apache.mahout.matrix.function.DoubleFunction() {
-				public void apply(double element) {
-					buffer.add(element);
-				}
-			}
-		);
-		*/
-				
-		
-		DoubleArrayList observedEpsilons = observedEpsilonsAtPhis(new DoubleArrayList(phis), (ExactDoubleQuantileFinder) exactFinder, approxFinder, epsilon);
-		System.out.println("observedEpsilons="+observedEpsilons);
-
-		double element = 1000.0f;
-		
-
-		System.out.println("exact phi("+element+")="+exactFinder.phi(element));
-		System.out.println("apprx phi("+element+")="+approxFinder.phi(element));
-
-		System.out.println("exact elem(phi("+element+"))="+exactFinder.quantileElements(new DoubleArrayList(new double[] {exactFinder.phi(element)})));
-		System.out.println("apprx elem(phi("+element+"))="+approxFinder.quantileElements(new DoubleArrayList(new double[] {approxFinder.phi(element)})));		
-	}
+  int size=Integer.parseInt(args[0]);
+  int b=Integer.parseInt(args[1]);
+  int k=Integer.parseInt(args[2]);  
+  //cern.it.util.Log.enableLogging(args[3].equals("log"));
+  int chunks=Integer.parseInt(args[4]);
+  boolean computeExactQuantilesAlso=args[5].equals("exact");
+  boolean doShuffle=args[6].equals("shuffle");
+  double epsilon = new Double(args[7]).doubleValue();
+  double delta = new Double(args[8]).doubleValue();
+  int quantiles = Integer.parseInt(args[9]);  
+  long max_N = Long.parseLong(args[10]);  
+
+
+
+  System.out.println("free="+Runtime.getRuntime().freeMemory());
+  System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+  double[] phis = {0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1.0};
+  //int quantiles = phis.length;
+
+  Timer timer = new Timer();
+  Timer timer2 = new Timer();
+  DoubleQuantileFinder approxFinder;
+
+  approxFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, max_N, epsilon, delta, quantiles, null);
+  System.out.println(approxFinder);
+  //new UnknownApproximateDoubleQuantileFinder(b,k);
+  //approxFinder = new ApproximateDoubleQuantileFinder(b,k);
+  /*
+  double[] returnSamplingRate = new double[1];
+  long[] result = ApproximateQuantileFinder.computeBestBandK(size*chunks, epsilon, delta, quantiles, returnSamplingRate);
+  approxFinder = new ApproximateQuantileFinder((int) result[0], (int) result[1]);
+  System.out.println("epsilon="+epsilon);
+  System.out.println("delta="+delta);
+  System.out.println("samplingRate="+returnSamplingRate[0]);
+  */
+
+    
+  DoubleQuantileFinder exactFinder = QuantileFinderFactory.newDoubleQuantileFinder(false, -1, 0.0, delta, quantiles, null);
+  System.out.println(exactFinder);
+
+  DoubleArrayList list = new DoubleArrayList(size);
+
+  for (int chunk=0; chunk<chunks; chunk++) {
+    list.setSize(0);
+    int d = chunk*size;
+    timer2.start();
+    for (int i=0; i<size; i++) {
+      list.add((double)(i + d));
+    }
+    timer2.stop();
+    
+
+    
+    //System.out.println("unshuffled="+list);
+    if (doShuffle) {
+      Timer timer3 = new Timer().start();
+      list.shuffle();
+      System.out.println("shuffling took ");
+      timer3.stop().display();
+    }
+    //System.out.println("shuffled="+list);
+    //list.sort();
+    //System.out.println("sorted="+list);
+
+    timer.start();
+    approxFinder.addAllOf(list);
+    timer.stop();
+
+    if (computeExactQuantilesAlso) {
+      exactFinder.addAllOf(list);
+    }
+
+  }
+  System.out.println("list.add() took" + timer2);
+  System.out.println("approxFinder.add() took" + timer);
+
+  //System.out.println("free="+Runtime.getRuntime().freeMemory());
+  //System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+  timer.reset().start();
+
+  //approxFinder.close();
+  DoubleArrayList approxQuantiles = approxFinder.quantileElements(new DoubleArrayList(phis)); 
+
+  timer.stop().display();
+  
+  System.out.println("Phis="+new DoubleArrayList(phis));
+  System.out.println("ApproxQuantiles="+approxQuantiles);
+
+  //System.out.println("MaxLevel of full buffers="+maxLevelOfFullBuffers(approxFinder.bufferSet));
+
+  //System.out.println("total buffers filled="+ approxFinder.totalBuffersFilled);
+  //System.out.println("free="+Runtime.getRuntime().freeMemory());
+  //System.out.println("total="+Runtime.getRuntime().totalMemory());
+
+
+  if (computeExactQuantilesAlso) {
+    System.out.println("Comparing with exact quantile computation...");
+
+    timer.reset().start();
+
+    //exactFinder.close();
+    DoubleArrayList exactQuantiles = exactFinder.quantileElements(new DoubleArrayList(phis));
+    timer.stop().display();
+
+    System.out.println("ExactQuantiles="+exactQuantiles);
+
+
+    //double[] errors1 = errors1(exactQuantiles.elements(), approxQuantiles.elements());
+    //System.out.println("Error1="+new DoubleArrayList(errors1));
+
+    /*
+    final DoubleArrayList buffer = new DoubleArrayList((int)exactFinder.size());
+    exactFinder.forEach(
+      new org.apache.mahout.matrix.function.DoubleFunction() {
+        public void apply(double element) {
+          buffer.add(element);
+        }
+      }
+    );
+    */
+        
+    
+    DoubleArrayList observedEpsilons = observedEpsilonsAtPhis(new DoubleArrayList(phis), (ExactDoubleQuantileFinder) exactFinder, approxFinder, epsilon);
+    System.out.println("observedEpsilons="+observedEpsilons);
+
+    double element = 1000.0f;
+    
+
+    System.out.println("exact phi("+element+")="+exactFinder.phi(element));
+    System.out.println("apprx phi("+element+")="+approxFinder.phi(element));
+
+    System.out.println("exact elem(phi("+element+"))="+exactFinder.quantileElements(new DoubleArrayList(new double[] {exactFinder.phi(element)})));
+    System.out.println("apprx elem(phi("+element+"))="+approxFinder.quantileElements(new DoubleArrayList(new double[] {approxFinder.phi(element)})));    
+  }
 }
 /**
  * Not yet commented.
  */
 public static void testRank() {
-	DoubleArrayList list = new DoubleArrayList(new double[] {1.0f, 5.0f, 5.0f, 5.0f, 7.0f, 10.f});
-	//System.out.println(rankOfWithin(5.0f, list));
+  DoubleArrayList list = new DoubleArrayList(new double[] {1.0f, 5.0f, 5.0f, 5.0f, 7.0f, 10.f});
+  //System.out.println(rankOfWithin(5.0f, list));
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/UnknownDoubleQuantileEstimator.java Wed Nov 25 03:31:47 2009
@@ -32,16 +32,14 @@
  * Accepted for Proc. of the 1999 ACM SIGMOD Int. Conf. on Management of Data,
  * Paper (soon) available <A HREF="http://www-cad.eecs.berkeley.edu/~manku"> here</A>.
  *
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
  * @see QuantileFinderFactory
  * @see KnownApproximateDoubleQuantileFinder
  */
 class UnknownDoubleQuantileEstimator extends DoubleQuantileEstimator {
-	protected int currentTreeHeight;
-	final protected int treeHeightStartingSampling;
-	protected WeightedRandomSampler sampler;
-	protected double precomputeEpsilon;
+  protected int currentTreeHeight;
+  final protected int treeHeightStartingSampling;
+  protected WeightedRandomSampler sampler;
+  protected double precomputeEpsilon;
 /**
  * Constructs an approximate quantile finder with b buffers, each having k elements.
  * @param b the number of buffers
@@ -51,36 +49,36 @@
  * @param generator a uniform random number generator.
  */
 public UnknownDoubleQuantileEstimator(int b, int k, int h, double precomputeEpsilon, RandomEngine generator) {
-	this.sampler = new WeightedRandomSampler(1,generator);
-	setUp(b,k);
-	this.treeHeightStartingSampling = h;
-	this.precomputeEpsilon = precomputeEpsilon;
-	this.clear();
+  this.sampler = new WeightedRandomSampler(1,generator);
+  setUp(b,k);
+  this.treeHeightStartingSampling = h;
+  this.precomputeEpsilon = precomputeEpsilon;
+  this.clear();
 }
 /**
  * Not yet commented.
  */
 protected DoubleBuffer[] buffersToCollapse() {
-	DoubleBuffer[] fullBuffers = bufferSet._getFullOrPartialBuffers();
+  DoubleBuffer[] fullBuffers = bufferSet._getFullOrPartialBuffers();
 
-	sortAscendingByLevel(fullBuffers);
-		
-	// if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
-	int minLevel = fullBuffers[1].level();
-	if (fullBuffers[0].level() < minLevel) {
-		fullBuffers[0].level(minLevel);
-	}
+  sortAscendingByLevel(fullBuffers);
+    
+  // if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
+  int minLevel = fullBuffers[1].level();
+  if (fullBuffers[0].level() < minLevel) {
+    fullBuffers[0].level(minLevel);
+  }
 
-	return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
+  return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
 }
 /**
  * Removes all elements from the receiver.  The receiver will
  * be empty after this call returns, and its memory requirements will be close to zero.
  */
 public synchronized void clear() {
-	super.clear();
-	this.currentTreeHeight = 1;
-	this.sampler.setWeight(1);
+  super.clear();
+  this.currentTreeHeight = 1;
+  this.sampler.setWeight(1);
 }
 /**
  * Returns a deep copy of the receiver.
@@ -88,30 +86,30 @@
  * @return a deep copy of the receiver.
  */
 public Object clone() {
-	UnknownDoubleQuantileEstimator copy = (UnknownDoubleQuantileEstimator) super.clone();
-	if (this.sampler != null) copy.sampler = (WeightedRandomSampler) copy.sampler.clone();
-	return copy;
+  UnknownDoubleQuantileEstimator copy = (UnknownDoubleQuantileEstimator) super.clone();
+  if (this.sampler != null) copy.sampler = (WeightedRandomSampler) copy.sampler.clone();
+  return copy;
 }
 /**
  * Not yet commented.
  */
 protected void newBuffer() {
-	currentBufferToFill = bufferSet._getFirstEmptyBuffer();
-	if (currentBufferToFill==null) throw new RuntimeException("Oops, no empty buffer.");
+  currentBufferToFill = bufferSet._getFirstEmptyBuffer();
+  if (currentBufferToFill==null) throw new RuntimeException("Oops, no empty buffer.");
 
-	currentBufferToFill.level(currentTreeHeight - 1);
-	currentBufferToFill.weight(sampler.getWeight());
+  currentBufferToFill.level(currentTreeHeight - 1);
+  currentBufferToFill.weight(sampler.getWeight());
 }
 /**
  * Not yet commented.
  */
 protected void postCollapse(DoubleBuffer[] toCollapse) {
-	if (toCollapse.length == bufferSet.b()) { //delta for unknown finder
-		currentTreeHeight++;
-		if (currentTreeHeight>=treeHeightStartingSampling) {
-			sampler.setWeight(sampler.getWeight() * 2);
-		}
-	}
+  if (toCollapse.length == bufferSet.b()) { //delta for unknown finder
+    currentTreeHeight++;
+    if (currentTreeHeight>=treeHeightStartingSampling) {
+      sampler.setWeight(sampler.getWeight() * 2);
+    }
+  }
 }
 /**
  * Computes the specified quantile elements over the values previously added.
@@ -119,56 +117,56 @@
  * @return the approximate quantile elements.
  */
 public DoubleArrayList quantileElements(DoubleArrayList phis) {
-	if (precomputeEpsilon<=0.0) return super.quantileElements(phis);
-	
-	int quantilesToPrecompute = (int) Utils.epsilonCeiling(1.0 / precomputeEpsilon);
-	/*
-	if (phis.size() > quantilesToPrecompute) {
-		// illegal use case!
-		// we compute results, but loose explicit approximation guarantees.
-		return super.quantileElements(phis);
-	}
-	*/
+  if (precomputeEpsilon<=0.0) return super.quantileElements(phis);
+  
+  int quantilesToPrecompute = (int) Utils.epsilonCeiling(1.0 / precomputeEpsilon);
+  /*
+  if (phis.size() > quantilesToPrecompute) {
+    // illegal use case!
+    // we compute results, but loose explicit approximation guarantees.
+    return super.quantileElements(phis);
+  }
+  */
  
-	//select that quantile from the precomputed set that corresponds to a position closest to phi.
-	phis = phis.copy();
-	double e = precomputeEpsilon;
-	for (int index=phis.size(); --index >= 0;) {
-		double phi = phis.get(index);
-		int i = (int) Math.round( ((2.0*phi/e) - 1.0 ) / 2.0); // finds closest
-		i = Math.min(quantilesToPrecompute-1, Math.max(0,i));
-		double augmentedPhi = (e/2.0)*(1+2*i);
-		phis.set(index,augmentedPhi);				
-	}
-	
-	return super.quantileElements(phis);
+  //select that quantile from the precomputed set that corresponds to a position closest to phi.
+  phis = phis.copy();
+  double e = precomputeEpsilon;
+  for (int index=phis.size(); --index >= 0;) {
+    double phi = phis.get(index);
+    int i = (int) Math.round( ((2.0*phi/e) - 1.0 ) / 2.0); // finds closest
+    i = Math.min(quantilesToPrecompute-1, Math.max(0,i));
+    double augmentedPhi = (e/2.0)*(1+2*i);
+    phis.set(index,augmentedPhi);        
+  }
+  
+  return super.quantileElements(phis);
 }
 /**
  * Not yet commented.
  */
 protected boolean sampleNextElement() {
-	return sampler.sampleNextElement();
+  return sampler.sampleNextElement();
 }
 /**
  * To do. This could faster be done without sorting (min and second min).
  */
 protected static void sortAscendingByLevel(DoubleBuffer[] fullBuffers) {
-	new ObjectArrayList(fullBuffers).quickSortFromTo(0,fullBuffers.length-1,
-		new java.util.Comparator() {
-			public int compare(Object o1, Object o2) {
-				int l1 = ((DoubleBuffer) o1).level();
-				int l2 = ((DoubleBuffer) o2).level();
-				return l1<l2? -1: l1==l2? 0: +1;
-			}
-		}
-	);
+  new ObjectArrayList(fullBuffers).quickSortFromTo(0,fullBuffers.length-1,
+    new java.util.Comparator() {
+      public int compare(Object o1, Object o2) {
+        int l1 = ((DoubleBuffer) o1).level();
+        int l2 = ((DoubleBuffer) o2).level();
+        return l1<l2? -1: l1==l2? 0: +1;
+      }
+    }
+  );
 }
 /**
  * Returns a String representation of the receiver.
  */
 public String toString() {
-	StringBuffer buf = new StringBuffer(super.toString());
-	buf.setLength(buf.length()-1);
-	return buf + ", h="+currentTreeHeight+", hStartSampling="+treeHeightStartingSampling+", precomputeEpsilon="+precomputeEpsilon+")";
+  StringBuffer buf = new StringBuffer(super.toString());
+  buf.setLength(buf.length()-1);
+  return buf + ", h="+currentTreeHeight+", hStartSampling="+treeHeightStartingSampling+", precomputeEpsilon="+precomputeEpsilon+")";
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Utils.java Wed Nov 25 03:31:47 2009
@@ -16,13 +16,13 @@
  * Makes this class non instantiable, but still let's others inherit from it.
  */
 protected Utils() {
-	throw new RuntimeException("Non instantiable");
+  throw new RuntimeException("Non instantiable");
 }
 /**
  * Similar to Math.ceil(value), but adjusts small numerical rounding errors +- epsilon.
  */
 public static long epsilonCeiling(double value) {
-	double epsilon = 0.0000001;
-	return (long) Math.ceil(value - epsilon);
+  double epsilon = 0.0000001;
+  return (long) Math.ceil(value - epsilon);
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/package.html Wed Nov 25 03:31:47 2009
@@ -18,8 +18,8 @@
   <dt>5. A space efficient value preserving bin of a 2-dim or d-dim histogram.
   <dt>(All subject to an accuracy specified by the user.)
 
-    Have a look at the documentation of class {@link cern.jet.stat.quantile.QuantileFinderFactory} and the interface
-    {@link cern.jet.stat.quantile.DoubleQuantileFinder} to learn more.
+    Have a look at the documentation of class {@link org.apache.mahout.jet.stat.quantile.QuantileFinderFactory} and the interface
+    {@link org.apache.mahout.jet.stat.quantile.DoubleQuantileFinder} to learn more.
     Most users will never need to know more than how to use these.
     Actual implementations of the <tt>QuantileFinder</tt> interface are hidden.
     They are indirectly constructed via the the factory.