You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/25 04:31:49 UTC

svn commit: r883972 [9/10] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ matrix/src/main/java/org/apache/mahout/jet/math/ matrix/src/main/java/org/apache/mahout/jet/random/ matrix/src/main/java/org/apache/mahout/jet/ran...

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/KnownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/KnownDoubleQuantileEstimator.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/KnownDoubleQuantileEstimator.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/KnownDoubleQuantileEstimator.java Wed Nov 25 03:31:47 2009
@@ -32,19 +32,17 @@
  * Proc. of the 1998 ACM SIGMOD Int. Conf. on Management of Data,
  * Paper available <A HREF="http://www-cad.eecs.berkeley.edu/~manku"> here</A>.
  *
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
  * @see QuantileFinderFactory
  * @see UnknownApproximateDoubleQuantileFinder
  */
 class KnownDoubleQuantileEstimator extends DoubleQuantileEstimator {
-	protected double beta; //correction factor for phis
+  protected double beta; //correction factor for phis
 
-	protected boolean weHadMoreThanOneEmptyBuffer;
+  protected boolean weHadMoreThanOneEmptyBuffer;
 
-	protected RandomSamplingAssistant samplingAssistant;
-	protected double samplingRate; // see method sampleNextElement()
-	protected long N; // see method sampleNextElement()
+  protected RandomSamplingAssistant samplingAssistant;
+  protected double samplingRate; // see method sampleNextElement()
+  protected long N; // see method sampleNextElement()
 /**
  * Constructs an approximate quantile finder with b buffers, each having k elements.
  * @param b the number of buffers
@@ -54,68 +52,68 @@
  * @param generator a uniform random number generator.
  */
 public KnownDoubleQuantileEstimator(int b, int k, long N, double samplingRate, RandomEngine generator) {
-	this.samplingRate = samplingRate;
-	this.N = N;
+  this.samplingRate = samplingRate;
+  this.N = N;
 
-	if (this.samplingRate <= 1.0) {
-		this.samplingAssistant = null;
-	}
-	else {
-		this.samplingAssistant = new RandomSamplingAssistant(Arithmetic.floor(N/samplingRate), N, generator);
-	}
-	
-	setUp(b,k);
-	this.clear();
+  if (this.samplingRate <= 1.0) {
+    this.samplingAssistant = null;
+  }
+  else {
+    this.samplingAssistant = new RandomSamplingAssistant(Arithmetic.floor(N/samplingRate), N, generator);
+  }
+  
+  setUp(b,k);
+  this.clear();
 }
 /**
  * @param infinities the number of infinities to fill.
  * @param buffer the buffer into which the infinities shall be filled.
  */
 protected void addInfinities(int missingInfinities,  DoubleBuffer buffer) {
-	RandomSamplingAssistant oldAssistant = this.samplingAssistant;
-	this.samplingAssistant = null; // switch off sampler
-	//double[] infinities = new double[missingInfinities];
-	
-	boolean even=true;
-	for (int i=0; i<missingInfinities; i++) {
-		if (even) buffer.values.add(Double.MAX_VALUE);
-		else	  buffer.values.add(-Double.MAX_VALUE);
-		
-		//if (even) {infinities[i]=Double.MAX_VALUE;}
-		//else	  {infinities[i]=-Double.MAX_VALUE;}
-		
-		//if (even) {this.add(Double.MAX_VALUE);}
-		//else	  {this.add(-Double.MAX_VALUE);}
-		even = !even;
-	}
-	
-	//buffer.values.addAllOfFromTo(new DoubleArrayList(infinities),0,missingInfinities-1);
-	
-	//this.totalElementsFilled -= infinities;
-	
-	this.samplingAssistant = oldAssistant; // switch on sampler again
+  RandomSamplingAssistant oldAssistant = this.samplingAssistant;
+  this.samplingAssistant = null; // switch off sampler
+  //double[] infinities = new double[missingInfinities];
+  
+  boolean even=true;
+  for (int i=0; i<missingInfinities; i++) {
+    if (even) buffer.values.add(Double.MAX_VALUE);
+    else    buffer.values.add(-Double.MAX_VALUE);
+    
+    //if (even) {infinities[i]=Double.MAX_VALUE;}
+    //else    {infinities[i]=-Double.MAX_VALUE;}
+    
+    //if (even) {this.add(Double.MAX_VALUE);}
+    //else    {this.add(-Double.MAX_VALUE);}
+    even = !even;
+  }
+  
+  //buffer.values.addAllOfFromTo(new DoubleArrayList(infinities),0,missingInfinities-1);
+  
+  //this.totalElementsFilled -= infinities;
+  
+  this.samplingAssistant = oldAssistant; // switch on sampler again
 }
 /**
  * Not yet commented.
  */
 protected DoubleBuffer[] buffersToCollapse() {
-	int minLevel = bufferSet._getMinLevelOfFullOrPartialBuffers();
-	return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
+  int minLevel = bufferSet._getMinLevelOfFullOrPartialBuffers();
+  return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
 }
 /**
  * Removes all elements from the receiver.  The receiver will
  * be empty after this call returns, and its memory requirements will be close to zero.
  */
 public void clear() {
-	super.clear();
-	this.beta=1.0;
-	this.weHadMoreThanOneEmptyBuffer = false;
-	//this.setSamplingRate(samplingRate,N);
-
-	RandomSamplingAssistant assist = this.samplingAssistant;
-	if (assist != null) {
-		this.samplingAssistant = new RandomSamplingAssistant(Arithmetic.floor(N/samplingRate), N, assist.getRandomGenerator());
-	}
+  super.clear();
+  this.beta=1.0;
+  this.weHadMoreThanOneEmptyBuffer = false;
+  //this.setSamplingRate(samplingRate,N);
+
+  RandomSamplingAssistant assist = this.samplingAssistant;
+  if (assist != null) {
+    this.samplingAssistant = new RandomSamplingAssistant(Arithmetic.floor(N/samplingRate), N, assist.getRandomGenerator());
+  }
 }
 /**
  * Returns a deep copy of the receiver.
@@ -123,50 +121,50 @@
  * @return a deep copy of the receiver.
  */
 public Object clone() {
-	KnownDoubleQuantileEstimator copy = (KnownDoubleQuantileEstimator) super.clone();
-	if (this.samplingAssistant != null) copy.samplingAssistant = (RandomSamplingAssistant) copy.samplingAssistant.clone();
-	return copy;
+  KnownDoubleQuantileEstimator copy = (KnownDoubleQuantileEstimator) super.clone();
+  if (this.samplingAssistant != null) copy.samplingAssistant = (RandomSamplingAssistant) copy.samplingAssistant.clone();
+  return copy;
 }
 /**
  * Not yet commented.
  */
 protected void newBuffer() {
-	int numberOfEmptyBuffers = this.bufferSet._getNumberOfEmptyBuffers();
-	//DoubleBuffer[] emptyBuffers = this.bufferSet._getEmptyBuffers();
-	if (numberOfEmptyBuffers==0) throw new RuntimeException("Oops, no empty buffer.");
-
-	this.currentBufferToFill = this.bufferSet._getFirstEmptyBuffer();
-	if (numberOfEmptyBuffers==1 && ! this.weHadMoreThanOneEmptyBuffer) {
-		this.currentBufferToFill.level(this.bufferSet._getMinLevelOfFullOrPartialBuffers());
-	}
-	else {
-		this.weHadMoreThanOneEmptyBuffer = true;
-		this.currentBufferToFill.level(0);
-		/*
-		for (int i=0; i<emptyBuffers.length; i++) {
-			emptyBuffers[i].level = 0;			
-		}
-		*/
-	}
-	//currentBufferToFill.state = DoubleBuffer.PARTIAL;
-	this.currentBufferToFill.weight(1);
+  int numberOfEmptyBuffers = this.bufferSet._getNumberOfEmptyBuffers();
+  //DoubleBuffer[] emptyBuffers = this.bufferSet._getEmptyBuffers();
+  if (numberOfEmptyBuffers==0) throw new RuntimeException("Oops, no empty buffer.");
+
+  this.currentBufferToFill = this.bufferSet._getFirstEmptyBuffer();
+  if (numberOfEmptyBuffers==1 && ! this.weHadMoreThanOneEmptyBuffer) {
+    this.currentBufferToFill.level(this.bufferSet._getMinLevelOfFullOrPartialBuffers());
+  }
+  else {
+    this.weHadMoreThanOneEmptyBuffer = true;
+    this.currentBufferToFill.level(0);
+    /*
+    for (int i=0; i<emptyBuffers.length; i++) {
+      emptyBuffers[i].level = 0;      
+    }
+    */
+  }
+  //currentBufferToFill.state = DoubleBuffer.PARTIAL;
+  this.currentBufferToFill.weight(1);
 }
 /**
  * Not yet commented.
  */
 protected void postCollapse(DoubleBuffer[] toCollapse) {
-	this.weHadMoreThanOneEmptyBuffer = false;
+  this.weHadMoreThanOneEmptyBuffer = false;
 }
 /**
  */
 protected DoubleArrayList preProcessPhis(DoubleArrayList phis) {
-	if (beta>1.0) {
-		phis = phis.copy();
-		for (int i=phis.size(); --i >=0;) {
-			phis.set(i, (2*phis.get(i) + beta - 1) / (2*beta));
-		}		
-	}
-	return phis;
+  if (beta>1.0) {
+    phis = phis.copy();
+    for (int i=phis.size(); --i >=0;) {
+      phis.set(i, (2*phis.get(i) + beta - 1) / (2*beta));
+    }    
+  }
+  return phis;
 }
 /**
  * Computes the specified quantile elements over the values previously added.
@@ -174,41 +172,41 @@
  * @return the approximate quantile elements.
  */
 public DoubleArrayList quantileElements(DoubleArrayList phis) {
-	/*
-	* The KNOWN quantile finder reads off quantiles from FULL buffers only.
-	* Since there might be a partially full buffer, this method first satisfies this constraint by temporarily filling a few +infinity, -infinity elements to make up a full block.
-	* This is in full conformance with the explicit approximation guarantees.
- 	*
- 	* For those of you working on online apps:
-  	* The approximation guarantees are given for computing quantiles AFTER N elements have been filled, not for intermediate displays.
-	* If you have one thread filling and another thread displaying concurrently, you will note that in the very beginning the infinities will dominate the display.
- 	* This could confuse users, because, of course, they don't expect any infinities, even if they "disappear" after a short while.
- 	* To prevent panic exclude phi's close to zero or one in the early phases of processing.
-  	*/
-	DoubleBuffer partial = this.bufferSet._getPartialBuffer();
-	int missingValues = 0;
-	if (partial != null) { // any auxiliary infinities needed?
-		missingValues = bufferSet.k() - partial.size();
-		if (missingValues <= 0) throw new RuntimeException("Oops! illegal missing values.");
-
-		//System.out.println("adding "+missingValues+" infinity elements...");
-		this.addInfinities(missingValues,partial);
-
-		//determine beta (N + Infinity values = beta * N)
-		this.beta = (this.totalElementsFilled + missingValues) / (double)this.totalElementsFilled;
-		}
-	else {
-		this.beta=1.0; 
-	}
-	 
-	DoubleArrayList quantileElements = super.quantileElements(phis);
-
-	// restore state we were in before.
-	// remove the temporarily added infinities.
-	if (partial != null) removeInfinitiesFrom(missingValues, partial);
+  /*
+  * The KNOWN quantile finder reads off quantiles from FULL buffers only.
+  * Since there might be a partially full buffer, this method first satisfies this constraint by temporarily filling a few +infinity, -infinity elements to make up a full block.
+  * This is in full conformance with the explicit approximation guarantees.
+   *
+   * For those of you working on online apps:
+    * The approximation guarantees are given for computing quantiles AFTER N elements have been filled, not for intermediate displays.
+  * If you have one thread filling and another thread displaying concurrently, you will note that in the very beginning the infinities will dominate the display.
+   * This could confuse users, because, of course, they don't expect any infinities, even if they "disappear" after a short while.
+   * To prevent panic exclude phi's close to zero or one in the early phases of processing.
+    */
+  DoubleBuffer partial = this.bufferSet._getPartialBuffer();
+  int missingValues = 0;
+  if (partial != null) { // any auxiliary infinities needed?
+    missingValues = bufferSet.k() - partial.size();
+    if (missingValues <= 0) throw new RuntimeException("Oops! illegal missing values.");
+
+    //System.out.println("adding "+missingValues+" infinity elements...");
+    this.addInfinities(missingValues,partial);
+
+    //determine beta (N + Infinity values = beta * N)
+    this.beta = (this.totalElementsFilled + missingValues) / (double)this.totalElementsFilled;
+    }
+  else {
+    this.beta=1.0; 
+  }
+   
+  DoubleArrayList quantileElements = super.quantileElements(phis);
+
+  // restore state we were in before.
+  // remove the temporarily added infinities.
+  if (partial != null) removeInfinitiesFrom(missingValues, partial);
 
-	// now you can continue filling the remaining values, if any.
-	return quantileElements;
+  // now you can continue filling the remaining values, if any.
+  return quantileElements;
 }
 /**
  * Reading off quantiles requires to fill some +infinity, -infinity values to make a partial buffer become full.
@@ -220,42 +218,42 @@
  * @param buffer the buffer into which the infinities were filled.
  */
 protected void removeInfinitiesFrom(int infinities, DoubleBuffer buffer) {
-	int plusInf = 0;
-	int minusInf = 0;
-	// count them (this is not very clever but it's safe)
-	boolean even=true;
-	for (int i=0; i<infinities; i++) {
-		if (even) plusInf++;
-		else	  minusInf++;
-		even = !even;
-	}
-
-	buffer.values.removeFromTo(buffer.size()-plusInf, buffer.size()-1);
-	buffer.values.removeFromTo(0, minusInf-1);
-	//this.totalElementsFilled -= infinities;
+  int plusInf = 0;
+  int minusInf = 0;
+  // count them (this is not very clever but it's safe)
+  boolean even=true;
+  for (int i=0; i<infinities; i++) {
+    if (even) plusInf++;
+    else    minusInf++;
+    even = !even;
+  }
+
+  buffer.values.removeFromTo(buffer.size()-plusInf, buffer.size()-1);
+  buffer.values.removeFromTo(0, minusInf-1);
+  //this.totalElementsFilled -= infinities;
 }
 /**
  * Not yet commented.
  */
 protected boolean sampleNextElement() {
-	if (samplingAssistant == null) return true;
+  if (samplingAssistant == null) return true;
 
-	/*
-	 * This is a KNOWN N quantile finder!
-	 * One should not try to fill more than N elements,
-	 * because otherwise we can't give explicit approximation guarantees anymore.
-	 * Use an UNKNOWN quantile finder instead if your app may fill more than N elements.
-	 *
-	 * However, to make this class meaningful even under wired use cases, we actually do allow to fill more than N elements (without explicit approx. guarantees, of course).
-	 * Normally, elements beyond N will not get sampled because the sampler is exhausted. 
-	 * Therefore the histogram will no more change no matter how much you fill.
-	 * This might not be what the user expects.
-	 * Therefore we use a new (unexhausted) sampler with the same parametrization.
-	 *
-	 * If you want this class to ignore any elements beyong N, then comment the following line.
-	 */
-	//if ((totalElementsFilled-1) % N == 0) setSamplingRate(samplingRate, N); // delete if appropriate
+  /*
+   * This is a KNOWN N quantile finder!
+   * One should not try to fill more than N elements,
+   * because otherwise we can't give explicit approximation guarantees anymore.
+   * Use an UNKNOWN quantile finder instead if your app may fill more than N elements.
+   *
+   * However, to make this class meaningful even under wired use cases, we actually do allow to fill more than N elements (without explicit approx. guarantees, of course).
+   * Normally, elements beyond N will not get sampled because the sampler is exhausted. 
+   * Therefore the histogram will no more change no matter how much you fill.
+   * This might not be what the user expects.
+   * Therefore we use a new (unexhausted) sampler with the same parametrization.
+   *
+   * If you want this class to ignore any elements beyong N, then comment the following line.
+   */
+  //if ((totalElementsFilled-1) % N == 0) setSamplingRate(samplingRate, N); // delete if appropriate
 
-	return samplingAssistant.sampleNextElement();
+  return samplingAssistant.sampleNextElement();
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Quantile1Test.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Quantile1Test.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Quantile1Test.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/Quantile1Test.java Wed Nov 25 03:31:47 2009
@@ -17,82 +17,82 @@
 @Deprecated
 public class Quantile1Test {
     public static void main(String[] argv) {
-	/*
-	 * Get the number of examples from the first argument
-	 */
-	int numExamples = 0;
-	try {
-	    numExamples = Integer.parseInt(argv[0]);
-	}
-	catch (Exception e) {
-	    System.err.println("Unable to parse input line count argument");
-	    System.err.println(e.getMessage());
-	    System.exit(1);
-	}
-	System.out.println("Got numExamples=" + numExamples);
-
-	/*
-	 * Get N from the second argument
-	 */
-	long N = 0;
-	try {
-	    if (argv[1].equals("L")) {
-		N = Long.MAX_VALUE;
-	    } else if (argv[1].equals("I")) {
-		N = (long)Integer.MAX_VALUE;
-	    } else {
-		N = Long.parseLong(argv[1]);
-	    }
-	}
-	catch (Exception e) {
-	    System.err.println("Error parsing flag for N");
-	    System.err.println(e.getMessage());
-	    System.exit(1);
-	}
-	System.out.println("Got N=" + N);
-
-	/*
-	 * Set up the QuantileBin1D object
-	 *
-	DRand rand = new DRand(new Date());
-	QuantileBin1D qAccum = new QuantileBin1D(false,
-						 N,
-						 1.e-4,
-						 1.e-3,
-						 200,
-						 rand,
-						 false,
-						 false,
-						 2);
-
-	DynamicBin1D dbin = new DynamicBin1D();
-	
-	*
-	 * Use a new random number generator to generate numExamples
-	 * random gaussians, and add them to the QuantileBin1D
-	 *
-	Uniform dataRand = new Uniform(new DRand(7757));
-	for (int i = 1; i <= numExamples; i++) {
-		double gauss = dataRand.nextDouble();
-	    qAccum.add(gauss);
-	    dbin.add(gauss);
-	}
-	
-	*
-	 * print out the percentiles
-	 *
-	DecimalFormat fmt = new DecimalFormat("0.00");
-	System.out.println();
-	//int step = 1;
-	int step = 10;
-	for (int i = 1; i < 100; ) {
-	    double percent = ((double)i) * 0.01;
-	    double quantile = qAccum.quantile(percent);
-	    System.out.println(fmt.format(percent) + "  " + quantile + ",  " + dbin.quantile(percent)+ ",  " + (dbin.quantile(percent)-quantile));
-	    i = i + step;
-	    *
-	}
-	*/
+  /*
+   * Get the number of examples from the first argument
+   */
+  int numExamples = 0;
+  try {
+      numExamples = Integer.parseInt(argv[0]);
+  }
+  catch (Exception e) {
+      System.err.println("Unable to parse input line count argument");
+      System.err.println(e.getMessage());
+      System.exit(1);
+  }
+  System.out.println("Got numExamples=" + numExamples);
+
+  /*
+   * Get N from the second argument
+   */
+  long N = 0;
+  try {
+      if (argv[1].equals("L")) {
+    N = Long.MAX_VALUE;
+      } else if (argv[1].equals("I")) {
+    N = (long)Integer.MAX_VALUE;
+      } else {
+    N = Long.parseLong(argv[1]);
+      }
+  }
+  catch (Exception e) {
+      System.err.println("Error parsing flag for N");
+      System.err.println(e.getMessage());
+      System.exit(1);
+  }
+  System.out.println("Got N=" + N);
+
+  /*
+   * Set up the QuantileBin1D object
+   *
+  DRand rand = new DRand(new Date());
+  QuantileBin1D qAccum = new QuantileBin1D(false,
+             N,
+             1.e-4,
+             1.e-3,
+             200,
+             rand,
+             false,
+             false,
+             2);
+
+  DynamicBin1D dbin = new DynamicBin1D();
+  
+  *
+   * Use a new random number generator to generate numExamples
+   * random gaussians, and add them to the QuantileBin1D
+   *
+  Uniform dataRand = new Uniform(new DRand(7757));
+  for (int i = 1; i <= numExamples; i++) {
+    double gauss = dataRand.nextDouble();
+      qAccum.add(gauss);
+      dbin.add(gauss);
+  }
+  
+  *
+   * print out the percentiles
+   *
+  DecimalFormat fmt = new DecimalFormat("0.00");
+  System.out.println();
+  //int step = 1;
+  int step = 10;
+  for (int i = 1; i < 100; ) {
+      double percent = ((double)i) * 0.01;
+      double quantile = qAccum.quantile(percent);
+      System.out.println(fmt.format(percent) + "  " + quantile + ",  " + dbin.quantile(percent)+ ",  " + (dbin.quantile(percent)-quantile));
+      i = i + step;
+      *
+  }
+  */
 }
 }
 

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileCalc.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileCalc.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileCalc.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileCalc.java Wed Nov 25 03:31:47 2009
@@ -18,19 +18,19 @@
  * Tries to avoid numeric overflows. 
  * @return the binomial coefficient.
  */
-public static double binomial(long n, long k) {	
-	if (k==0 || k==n) {return 1.0;}
+public static double binomial(long n, long k) {  
+  if (k==0 || k==n) {return 1.0;}
 
-	// since binomial(n,k)==binomial(n,n-k), we can enforce the faster variant,
-	// which is also the variant minimizing number overflows.
-	if (k>n/2.0) k=n-k;
-	
-	double binomial=1.0;
-	long N=n-k+1;
-	for (long i=k; i>0; ) {
-		binomial *= ((double) N++) / (double) (i--);
-	}
-	return binomial;
+  // since binomial(n,k)==binomial(n,n-k), we can enforce the faster variant,
+  // which is also the variant minimizing number overflows.
+  if (k>n/2.0) k=n-k;
+  
+  double binomial=1.0;
+  long N=n-k+1;
+  for (long i=k; i>0; ) {
+    binomial *= ((double) N++) / (double) (i--);
+  }
+  return binomial;
 }
 /**
  * Returns the smallest <code>long &gt;= value</code>.
@@ -38,7 +38,7 @@
  * This method is safer than using (long) Math.ceil(value), because of possible rounding error.
  */
 public static long ceiling(double value) {
-	return Math.round(Math.ceil(value));
+  return Math.round(Math.ceil(value));
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -55,11 +55,11 @@
  * @return <tt>long[2]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>returnSamplingRate[0]</tt>=the required sampling rate.
  */
 public static long[] known_N_compute_B_and_K(long N, double epsilon, double delta, int quantiles, double[] returnSamplingRate) {
-	if (delta > 0.0) {
-		return known_N_compute_B_and_K_slow(N, epsilon, delta, quantiles, returnSamplingRate);
-	}
-	returnSamplingRate[0] = 1.0;
-	return known_N_compute_B_and_K_quick(N, epsilon);
+  if (delta > 0.0) {
+    return known_N_compute_B_and_K_slow(N, epsilon, delta, quantiles, returnSamplingRate);
+  }
+  returnSamplingRate[0] = 1.0;
+  return known_N_compute_B_and_K_quick(N, epsilon);
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -70,100 +70,100 @@
  * @param epsilon the approximation error which is guaranteed not to be exceeded (e.g. <tt>0.001</tt>) (<tt>0 &lt;= epsilon &lt;= 1</tt>). To get exact result, set <tt>epsilon=0.0</tt>;
  */
 protected static long[] known_N_compute_B_and_K_quick(long N, double epsilon) {
-	if (epsilon<=0.0) {
-		// no way around exact quantile search
-		long[] result = new long[2];
-		result[0]=1;
-		result[1]=N;
-		return result;
-	}
-
-	final int maxBuffers = 50;
-	final int maxHeight = 50;
-	final double N_double = (double) N;
-	final double c = N_double * epsilon * 2.0;
-	int[] heightMaximums = new int[maxBuffers-1];
-
-	// for each b, determine maximum height, i.e. the height for which x<=0 and x is a maximum
-	// with x = binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) - N * epsilon * 2.0
-	for (int b=2; b<=maxBuffers; b++) {
-		int h = 3;
-		
-		while ( h<=maxHeight && // skip heights until x<=0
-				(h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
-				((double) Math.round(binomial(b+h-3, h-3))) +
-				((double) Math.round(binomial(b+h-3, h-2))) - c
-				> 0.0
-			  ) {h++;}
-		//from now on x is monotonically growing...
-		while ( h<=maxHeight && // skip heights until x>0
-				(h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
-				((double) Math.round(binomial(b+h-3, h-3))) +
-				((double) Math.round(binomial(b+h-3, h-2))) - c
-				<= 0.0
-			  ) {h++;}
-		h--; //go back to last height
-
-		// was x>0 or did we loop without finding anything?
-		int hMax;
-		if (h>=maxHeight && 
-				(h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
-				((double) Math.round(binomial(b+h-3, h-3))) +
-				((double) Math.round(binomial(b+h-3, h-2))) - c
-				> 0.0) {
-			hMax=Integer.MIN_VALUE;
-		}
-		else {
-			hMax=h;
-		}
-				
-		heightMaximums[b-2]=hMax; //safe some space
-	} //end for
-
-
-	// for each b, determine the smallest k satisfying the constraints, i.e.
-	// for each b, determine kMin, with kMin = N/binomial(b+hMax-2,hMax-1)
-	long[] kMinimums = new long[maxBuffers-1];
-	for (int b=2; b<=maxBuffers; b++) {
-		int h=heightMaximums[b-2];
-		long kMin=Long.MAX_VALUE;
-		if (h>Integer.MIN_VALUE) {
-			double value = ((double)Math.round(binomial(b+h-2, h-1)));
-			long tmpK=ceiling(N_double/value);
-			if (tmpK<=Long.MAX_VALUE) {
-				kMin = tmpK;
-			}
-		}
-		kMinimums[b-2]=kMin;
-	}
-
-	// from all b's, determine b that minimizes b*kMin
-	long multMin = Long.MAX_VALUE;
-	int minB = -1;
-	for (int b=2; b<=maxBuffers; b++) {
-		if (kMinimums[b-2]<Long.MAX_VALUE) {
-			long mult = ((long)b) * ((long)kMinimums[b-2]);
-			if (mult<multMin) {
-				multMin=mult;
-				minB = b;
-			}
-		}
-	}
-
-	long b, k;
-	if (minB != -1) { // epsilon large enough?
-		b = minB;
-		k = kMinimums[minB-2];
-	}
-	else {     // epsilon is very small or zero.
-		b = 1; // the only possible solution without violating the 
-		k = N; // approximation guarantees is exact quantile search.
-	}
-
-	long[] result = new long[2];
-	result[0]=b;
-	result[1]=k;
-	return result;
+  if (epsilon<=0.0) {
+    // no way around exact quantile search
+    long[] result = new long[2];
+    result[0]=1;
+    result[1]=N;
+    return result;
+  }
+
+  final int maxBuffers = 50;
+  final int maxHeight = 50;
+  final double N_double = (double) N;
+  final double c = N_double * epsilon * 2.0;
+  int[] heightMaximums = new int[maxBuffers-1];
+
+  // for each b, determine maximum height, i.e. the height for which x<=0 and x is a maximum
+  // with x = binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) - N * epsilon * 2.0
+  for (int b=2; b<=maxBuffers; b++) {
+    int h = 3;
+    
+    while ( h<=maxHeight && // skip heights until x<=0
+        (h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
+        ((double) Math.round(binomial(b+h-3, h-3))) +
+        ((double) Math.round(binomial(b+h-3, h-2))) - c
+        > 0.0
+        ) {h++;}
+    //from now on x is monotonically growing...
+    while ( h<=maxHeight && // skip heights until x>0
+        (h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
+        ((double) Math.round(binomial(b+h-3, h-3))) +
+        ((double) Math.round(binomial(b+h-3, h-2))) - c
+        <= 0.0
+        ) {h++;}
+    h--; //go back to last height
+
+    // was x>0 or did we loop without finding anything?
+    int hMax;
+    if (h>=maxHeight && 
+        (h-2) * ((double)Math.round(binomial(b+h-2, h-1))) -
+        ((double) Math.round(binomial(b+h-3, h-3))) +
+        ((double) Math.round(binomial(b+h-3, h-2))) - c
+        > 0.0) {
+      hMax=Integer.MIN_VALUE;
+    }
+    else {
+      hMax=h;
+    }
+        
+    heightMaximums[b-2]=hMax; //safe some space
+  } //end for
+
+
+  // for each b, determine the smallest k satisfying the constraints, i.e.
+  // for each b, determine kMin, with kMin = N/binomial(b+hMax-2,hMax-1)
+  long[] kMinimums = new long[maxBuffers-1];
+  for (int b=2; b<=maxBuffers; b++) {
+    int h=heightMaximums[b-2];
+    long kMin=Long.MAX_VALUE;
+    if (h>Integer.MIN_VALUE) {
+      double value = ((double)Math.round(binomial(b+h-2, h-1)));
+      long tmpK=ceiling(N_double/value);
+      if (tmpK<=Long.MAX_VALUE) {
+        kMin = tmpK;
+      }
+    }
+    kMinimums[b-2]=kMin;
+  }
+
+  // from all b's, determine b that minimizes b*kMin
+  long multMin = Long.MAX_VALUE;
+  int minB = -1;
+  for (int b=2; b<=maxBuffers; b++) {
+    if (kMinimums[b-2]<Long.MAX_VALUE) {
+      long mult = ((long)b) * ((long)kMinimums[b-2]);
+      if (mult<multMin) {
+        multMin=mult;
+        minB = b;
+      }
+    }
+  }
+
+  long b, k;
+  if (minB != -1) { // epsilon large enough?
+    b = minB;
+    k = kMinimums[minB-2];
+  }
+  else {     // epsilon is very small or zero.
+    b = 1; // the only possible solution without violating the 
+    k = N; // approximation guarantees is exact quantile search.
+  }
+
+  long[] result = new long[2];
+  result[0]=b;
+  result[1]=k;
+  return result;
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -178,149 +178,149 @@
  * @return <tt>long[2]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>returnSamplingRate[0]</tt>=the required sampling rate.
  */
 protected static long[] known_N_compute_B_and_K_slow(long N, double epsilon, double delta, int quantiles, double[] returnSamplingRate) {
-	// delta can be set to zero, i.e., all quantiles should be approximate with probability 1	
-	if (epsilon<=0.0) {
-		// no way around exact quantile search
-		long[] result = new long[2];
-		result[0]=1;
-		result[1]=N;
-		returnSamplingRate[0]=1.0;
-		return result;
-	}
-
-
-	final int maxBuffers = 50;
-	final int maxHeight = 50;
-	final double N_double = N;
-
-	// One possibility is to use one buffer of size N
-	//
-	long ret_b = 1;
-	long ret_k = N;
-	double sampling_rate = 1.0;
-	long memory = N;
-
-
-	// Otherwise, there are at least two buffers (b >= 2)
-	// and the height of the tree is at least three (h >= 3)
-	//
-	// We restrict the search for b and h to MAX_BINOM, a large enough value for
-	// practical values of    epsilon >= 0.001   and    delta >= 0.00001
-	//
-	final double logarithm = Math.log(2.0*quantiles/delta);
-	final double c = 2.0 * epsilon * N_double;
-	for (long b=2 ; b<maxBuffers ; b++)
-		for (long h=3 ; h<maxHeight ; h++) {
-			double binomial = binomial(b+h-2, h-1);
-			long tmp = ceiling(N_double / binomial);
-			if ((b * tmp < memory) && 
-					((h-2) * binomial - binomial(b+h-3, h-3) + binomial(b+h-3, h-2)
-					<= c) ) {
-				ret_k = tmp ;
-				ret_b = b ;
-				memory = ret_k * b;
-				sampling_rate = 1.0 ;
-			}
-			if (delta > 0.0) {
-				double t = (h-2) * binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) ;
-				double u = logarithm / epsilon ;
-				double v = binomial (b+h-2, h-1) ;
-				double w = logarithm / (2.0*epsilon*epsilon) ;
-
-				// From our SIGMOD 98 paper, we have two equantions to satisfy:
-				// t  <= u * alpha/(1-alpha)^2
-				// kv >= w/(1-alpha)^2
-				//
-				// Denoting 1/(1-alpha)    by x,
-				// we see that the first inequality is equivalent to
-				// t/u <= x^2 - x
-				// which is satisfied by x >= 0.5 + 0.5 * sqrt (1 + 4t/u)
-				// Plugging in this value into second equation yields
-				// k >= wx^2/v
-
-				double x = 0.5 + 0.5 * Math.sqrt(1.0 + 4.0*t/u) ;
-				long k = ceiling(w*x*x/v) ;
-				if (b * k < memory) {
-					ret_k = k ;
-					ret_b = b ;
-					memory = b * k ;
-					sampling_rate = N_double*2.0*epsilon*epsilon / logarithm ;
-				}
-			}
-		}
-		
-	long[] result = new long[2];
-	result[0]=ret_b;
-	result[1]=ret_k;
-	returnSamplingRate[0]=sampling_rate;
-	return result;
+  // delta can be set to zero, i.e., all quantiles should be approximate with probability 1  
+  if (epsilon<=0.0) {
+    // no way around exact quantile search
+    long[] result = new long[2];
+    result[0]=1;
+    result[1]=N;
+    returnSamplingRate[0]=1.0;
+    return result;
+  }
+
+
+  final int maxBuffers = 50;
+  final int maxHeight = 50;
+  final double N_double = N;
+
+  // One possibility is to use one buffer of size N
+  //
+  long ret_b = 1;
+  long ret_k = N;
+  double sampling_rate = 1.0;
+  long memory = N;
+
+
+  // Otherwise, there are at least two buffers (b >= 2)
+  // and the height of the tree is at least three (h >= 3)
+  //
+  // We restrict the search for b and h to MAX_BINOM, a large enough value for
+  // practical values of    epsilon >= 0.001   and    delta >= 0.00001
+  //
+  final double logarithm = Math.log(2.0*quantiles/delta);
+  final double c = 2.0 * epsilon * N_double;
+  for (long b=2 ; b<maxBuffers ; b++)
+    for (long h=3 ; h<maxHeight ; h++) {
+      double binomial = binomial(b+h-2, h-1);
+      long tmp = ceiling(N_double / binomial);
+      if ((b * tmp < memory) && 
+          ((h-2) * binomial - binomial(b+h-3, h-3) + binomial(b+h-3, h-2)
+          <= c) ) {
+        ret_k = tmp ;
+        ret_b = b ;
+        memory = ret_k * b;
+        sampling_rate = 1.0 ;
+      }
+      if (delta > 0.0) {
+        double t = (h-2) * binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) ;
+        double u = logarithm / epsilon ;
+        double v = binomial (b+h-2, h-1) ;
+        double w = logarithm / (2.0*epsilon*epsilon) ;
+
+        // From our SIGMOD 98 paper, we have two equantions to satisfy:
+        // t  <= u * alpha/(1-alpha)^2
+        // kv >= w/(1-alpha)^2
+        //
+        // Denoting 1/(1-alpha)    by x,
+        // we see that the first inequality is equivalent to
+        // t/u <= x^2 - x
+        // which is satisfied by x >= 0.5 + 0.5 * sqrt (1 + 4t/u)
+        // Plugging in this value into second equation yields
+        // k >= wx^2/v
+
+        double x = 0.5 + 0.5 * Math.sqrt(1.0 + 4.0*t/u) ;
+        long k = ceiling(w*x*x/v) ;
+        if (b * k < memory) {
+          ret_k = k ;
+          ret_b = b ;
+          memory = b * k ;
+          sampling_rate = N_double*2.0*epsilon*epsilon / logarithm ;
+        }
+      }
+    }
+    
+  long[] result = new long[2];
+  result[0]=ret_b;
+  result[1]=ret_k;
+  returnSamplingRate[0]=sampling_rate;
+  return result;
 }
 public static void main(String[] args) {
-	test_B_and_K_Calculation(args);
+  test_B_and_K_Calculation(args);
 }
 /**
  * Computes b and k for different parameters.
  */
 public static void test_B_and_K_Calculation(String[] args) {
-	boolean known_N;
-	if (args==null) known_N = false;
-	else known_N = new Boolean(args[0]).booleanValue();
-
-	int[] quantiles = {1,1000};
-	
-	long[] sizes = {100000, 1000000, 10000000, 1000000000};
-	
-	double[] deltas = {0.0, 0.001, 0.0001, 0.00001};
-	
-	double[] epsilons = {0.0, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0000001};
-
-
-	
-	if (! known_N) sizes = new long[] {0};
-	System.out.println("\n\n");
-	if (known_N) 
-		System.out.println("Computing b's and k's for KNOWN N");
-	else 
-		System.out.println("Computing b's and k's for UNKNOWN N");
-	System.out.println("mem [elements/1024]");
-	System.out.println("***********************************");
-
-	for (int q=0; q<quantiles.length; q++) {
-		int p = quantiles[q];
-		System.out.println("------------------------------");
-		System.out.println("computing for p = "+p);
-		for (int s=0; s<sizes.length; s++) {
-			long N = sizes[s];
-			System.out.println("   ------------------------------");
-			System.out.println("   computing for N = "+N);
-			for (int d=0; d<deltas.length; d++) {
-				double delta = deltas[d];
-				System.out.println("      ------------------------------");
-				System.out.println("      computing for delta = "+delta);
-				for (int e=0; e<epsilons.length; e++) {
-					double epsilon = epsilons[e];
-					
-					double[] returnSamplingRate = new double[1];
-					long[] result;
-					if (known_N) {
-						result = known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
-					}
-					else {
-						result = unknown_N_compute_B_and_K(epsilon, delta, p);
-					}
-
-					long b = result[0];
-					long k = result[1];
-					System.out.print("         (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
-					System.out.print("(b,k,mem");
-					if (known_N) System.out.print(",sampling");
-					System.out.print(")=("+b+","+k+","+(b*k/1024));
-					if (known_N) System.out.print(","+returnSamplingRate[0]);
-					System.out.println(")");
-				}
-			}
-		}
-	}
+  boolean known_N;
+  if (args==null) known_N = false;
+  else known_N = new Boolean(args[0]).booleanValue();
+
+  int[] quantiles = {1,1000};
+  
+  long[] sizes = {100000, 1000000, 10000000, 1000000000};
+  
+  double[] deltas = {0.0, 0.001, 0.0001, 0.00001};
+  
+  double[] epsilons = {0.0, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0000001};
+
+
+  
+  if (! known_N) sizes = new long[] {0};
+  System.out.println("\n\n");
+  if (known_N) 
+    System.out.println("Computing b's and k's for KNOWN N");
+  else 
+    System.out.println("Computing b's and k's for UNKNOWN N");
+  System.out.println("mem [elements/1024]");
+  System.out.println("***********************************");
+
+  for (int q=0; q<quantiles.length; q++) {
+    int p = quantiles[q];
+    System.out.println("------------------------------");
+    System.out.println("computing for p = "+p);
+    for (int s=0; s<sizes.length; s++) {
+      long N = sizes[s];
+      System.out.println("   ------------------------------");
+      System.out.println("   computing for N = "+N);
+      for (int d=0; d<deltas.length; d++) {
+        double delta = deltas[d];
+        System.out.println("      ------------------------------");
+        System.out.println("      computing for delta = "+delta);
+        for (int e=0; e<epsilons.length; e++) {
+          double epsilon = epsilons[e];
+          
+          double[] returnSamplingRate = new double[1];
+          long[] result;
+          if (known_N) {
+            result = known_N_compute_B_and_K(N, epsilon, delta, p, returnSamplingRate);
+          }
+          else {
+            result = unknown_N_compute_B_and_K(epsilon, delta, p);
+          }
+
+          long b = result[0];
+          long k = result[1];
+          System.out.print("         (e,d,N,p)=("+epsilon+","+delta+","+N+","+p+") --> ");
+          System.out.print("(b,k,mem");
+          if (known_N) System.out.print(",sampling");
+          System.out.print(")=("+b+","+k+","+(b*k/1024));
+          if (known_N) System.out.print(","+returnSamplingRate[0]);
+          System.out.println(")");
+        }
+      }
+    }
+  }
 
 }
 /**
@@ -333,117 +333,117 @@
  * @return <tt>long[3]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>long[2]</tt>=the tree height where sampling shall start.
  */
 public static long[] unknown_N_compute_B_and_K(double epsilon, double delta, int quantiles) {
-	// delta can be set to zero, i.e., all quantiles should be approximate with probability 1	
-	if (epsilon<=0.0 || delta<=0.0) {
-		// no way around exact quantile search
-		long[] result = new long[3];
-		result[0]=1;
-		result[1]=Long.MAX_VALUE;
-		result[2]=Long.MAX_VALUE;
-		return result;
-	}
-
-	int max_b = 50;
-	int max_h = 50;
-	int max_H = 50;
-	int max_Iterations = 2;
-
-	long best_b = Long.MAX_VALUE;
-	long best_k = Long.MAX_VALUE;
-	long best_h = Long.MAX_VALUE;
-	long best_memory = Long.MAX_VALUE;
-
-	double pow = Math.pow(2.0, max_H);
-	double logDelta =  Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon);
-	//double logDelta =  Math.log(2.0/(quantiles*delta)) / (2.0*epsilon*epsilon);
-
-	while (best_b == Long.MAX_VALUE && max_Iterations-- > 0) { //until we find a solution
-		// identify that combination of b and h that minimizes b*k.
-		// exhaustive search.
-		for (int b=2; b<=max_b; b++) {
-			for (int h=2; h<=max_h; h++) {
-				double Ld = binomial(b+h-2, h-1);
-				double Ls = binomial(b+h-3,h-1);
-				
-				// now we have k>=c*(1-alpha)^-2.
-				// let's compute c.
-				//double c = Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon*Math.min(Ld, 8.0*Ls/3.0));
-				double c = logDelta / Math.min(Ld, 8.0*Ls/3.0);
-
-				// now we have k>=d/alpha.
-				// let's compute d.				
-				double beta = Ld/Ls;
-				double cc = (beta-2.0)*(max_H-2.0) / (beta + pow - 2.0);
-				double d = (h+3+cc) / (2.0*epsilon);
-				
-				/*
-				double d = (Ld*(h+max_H-1.0)  +  Ls*((h+1)*pow - 2.0*(h+max_H)))   /   (Ld + Ls*(pow-2.0));
-				d = (d + 2.0) / (2.0*epsilon);
-				*/
-
-				// now we have c*(1-alpha)^-2 == d/alpha.
-				// we solve this equation for alpha yielding two solutions
-				// alpha_1,2 = (c + 2*d  +-  Sqrt(c*c + 4*c*d))/(2*d)				
-				double f = c*c + 4.0*c*d;
-				if (f<0.0) continue; // non real solution to equation
-				double root = Math.sqrt(f);
-				double alpha_one = (c + 2.0*d + root)/(2.0*d);
-				double alpha_two = (c + 2.0*d - root)/(2.0*d);
-
-				// any alpha must satisfy 0<alpha<1 to yield valid solutions
-				boolean alpha_one_OK = false;
-				boolean alpha_two_OK = false;
-				if (0.0 < alpha_one && alpha_one < 1.0) alpha_one_OK = true;
-				if (0.0 < alpha_two && alpha_two < 1.0) alpha_two_OK = true;
-				if (alpha_one_OK || alpha_two_OK) {
-					double alpha = alpha_one;
-					if (alpha_one_OK && alpha_two_OK) {
-						// take the alpha that minimizes d/alpha
-						alpha = Math.max(alpha_one, alpha_two);
-					}
-					else if (alpha_two_OK) {
-						alpha = alpha_two;
-					}
-
-					// now we have k=Ceiling(Max(d/alpha, (h+1)/(2*epsilon)))
-					long k = ceiling(Math.max(d/alpha, (h+1)/(2.0*epsilon)));
-					if (k>0) { // valid solution?
-						long memory = b*k;
-						if (memory < best_memory) { 
-							// found a solution requiring less memory
-							best_k = k;
-							best_b = b;
-							best_h = h;
-							best_memory = memory;
-						}
-					}
-				}
-			} //end for h
-		} //end for b
-		
-		if (best_b == Long.MAX_VALUE) {
-			System.out.println("Warning: Computing b and k looks like a lot of work!");
-			// no solution found so far. very unlikely. Anyway, try again.
-			max_b *= 2;
-			max_h *= 2;
-			max_H *= 2;
-		}
-	} //end while
-
-	long[] result = new long[3];
-	if (best_b == Long.MAX_VALUE) {
-		// no solution found.
-		// no way around exact quantile search.
-		result[0]=1;
-		result[1]=Long.MAX_VALUE;
-		result[2]=Long.MAX_VALUE;
-	}
-	else {
-		result[0]=best_b;
-		result[1]=best_k;
-		result[2]=best_h;
-	}
+  // delta can be set to zero, i.e., all quantiles should be approximate with probability 1  
+  if (epsilon<=0.0 || delta<=0.0) {
+    // no way around exact quantile search
+    long[] result = new long[3];
+    result[0]=1;
+    result[1]=Long.MAX_VALUE;
+    result[2]=Long.MAX_VALUE;
+    return result;
+  }
+
+  int max_b = 50;
+  int max_h = 50;
+  int max_H = 50;
+  int max_Iterations = 2;
+
+  long best_b = Long.MAX_VALUE;
+  long best_k = Long.MAX_VALUE;
+  long best_h = Long.MAX_VALUE;
+  long best_memory = Long.MAX_VALUE;
+
+  double pow = Math.pow(2.0, max_H);
+  double logDelta =  Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon);
+  //double logDelta =  Math.log(2.0/(quantiles*delta)) / (2.0*epsilon*epsilon);
+
+  while (best_b == Long.MAX_VALUE && max_Iterations-- > 0) { //until we find a solution
+    // identify that combination of b and h that minimizes b*k.
+    // exhaustive search.
+    for (int b=2; b<=max_b; b++) {
+      for (int h=2; h<=max_h; h++) {
+        double Ld = binomial(b+h-2, h-1);
+        double Ls = binomial(b+h-3,h-1);
+        
+        // now we have k>=c*(1-alpha)^-2.
+        // let's compute c.
+        //double c = Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon*Math.min(Ld, 8.0*Ls/3.0));
+        double c = logDelta / Math.min(Ld, 8.0*Ls/3.0);
+
+        // now we have k>=d/alpha.
+        // let's compute d.        
+        double beta = Ld/Ls;
+        double cc = (beta-2.0)*(max_H-2.0) / (beta + pow - 2.0);
+        double d = (h+3+cc) / (2.0*epsilon);
+        
+        /*
+        double d = (Ld*(h+max_H-1.0)  +  Ls*((h+1)*pow - 2.0*(h+max_H)))   /   (Ld + Ls*(pow-2.0));
+        d = (d + 2.0) / (2.0*epsilon);
+        */
+
+        // now we have c*(1-alpha)^-2 == d/alpha.
+        // we solve this equation for alpha yielding two solutions
+        // alpha_1,2 = (c + 2*d  +-  Sqrt(c*c + 4*c*d))/(2*d)        
+        double f = c*c + 4.0*c*d;
+        if (f<0.0) continue; // non real solution to equation
+        double root = Math.sqrt(f);
+        double alpha_one = (c + 2.0*d + root)/(2.0*d);
+        double alpha_two = (c + 2.0*d - root)/(2.0*d);
+
+        // any alpha must satisfy 0<alpha<1 to yield valid solutions
+        boolean alpha_one_OK = false;
+        boolean alpha_two_OK = false;
+        if (0.0 < alpha_one && alpha_one < 1.0) alpha_one_OK = true;
+        if (0.0 < alpha_two && alpha_two < 1.0) alpha_two_OK = true;
+        if (alpha_one_OK || alpha_two_OK) {
+          double alpha = alpha_one;
+          if (alpha_one_OK && alpha_two_OK) {
+            // take the alpha that minimizes d/alpha
+            alpha = Math.max(alpha_one, alpha_two);
+          }
+          else if (alpha_two_OK) {
+            alpha = alpha_two;
+          }
+
+          // now we have k=Ceiling(Max(d/alpha, (h+1)/(2*epsilon)))
+          long k = ceiling(Math.max(d/alpha, (h+1)/(2.0*epsilon)));
+          if (k>0) { // valid solution?
+            long memory = b*k;
+            if (memory < best_memory) { 
+              // found a solution requiring less memory
+              best_k = k;
+              best_b = b;
+              best_h = h;
+              best_memory = memory;
+            }
+          }
+        }
+      } //end for h
+    } //end for b
+    
+    if (best_b == Long.MAX_VALUE) {
+      System.out.println("Warning: Computing b and k looks like a lot of work!");
+      // no solution found so far. very unlikely. Anyway, try again.
+      max_b *= 2;
+      max_h *= 2;
+      max_H *= 2;
+    }
+  } //end while
+
+  long[] result = new long[3];
+  if (best_b == Long.MAX_VALUE) {
+    // no solution found.
+    // no way around exact quantile search.
+    result[0]=1;
+    result[1]=Long.MAX_VALUE;
+    result[2]=Long.MAX_VALUE;
+  }
+  else {
+    result[0]=best_b;
+    result[1]=best_k;
+    result[2]=best_h;
+  }
 
-	return result;
+  return result;
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderFactory.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderFactory.java?rev=883972&r1=883971&r2=883972&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderFactory.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/jet/stat/quantile/QuantileFinderFactory.java Wed Nov 25 03:31:47 2009
@@ -83,8 +83,6 @@
  *</pre><p>
  *
  *
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
  * @see KnownDoubleQuantileEstimator
  * @see UnknownDoubleQuantileEstimator
  */ 
@@ -113,26 +111,26 @@
  * @return <tt>long[2]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>returnSamplingRate[0]</tt>=the required sampling rate.
  */
 public static long[] known_N_compute_B_and_K(long N, double epsilon, double delta, int quantiles, double[] returnSamplingRate) {
-	returnSamplingRate[0] = 1.0;
-	if (epsilon<=0.0) {
-		// no way around exact quantile search
-		long[] result = new long[2];
-		result[0]=1;
-		result[1]=N;
-		return result;
-	}
-	if (epsilon>=1.0 || delta>=1.0) {
-		// can make any error we wish
-		long[] result = new long[2];
-		result[0]=2;
-		result[1]=1;
-		return result;
-	}
-
-	if (delta > 0.0) {
-		return known_N_compute_B_and_K_slow(N, epsilon, delta, quantiles, returnSamplingRate);
-	}
-	return known_N_compute_B_and_K_quick(N, epsilon);
+  returnSamplingRate[0] = 1.0;
+  if (epsilon<=0.0) {
+    // no way around exact quantile search
+    long[] result = new long[2];
+    result[0]=1;
+    result[1]=N;
+    return result;
+  }
+  if (epsilon>=1.0 || delta>=1.0) {
+    // can make any error we wish
+    long[] result = new long[2];
+    result[0]=2;
+    result[1]=1;
+    return result;
+  }
+
+  if (delta > 0.0) {
+    return known_N_compute_B_and_K_slow(N, epsilon, delta, quantiles, returnSamplingRate);
+  }
+  return known_N_compute_B_and_K_quick(N, epsilon);
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -143,92 +141,92 @@
  * @param epsilon the approximation error which is guaranteed not to be exceeded (e.g. <tt>0.001</tt>) (<tt>0 &lt;= epsilon &lt;= 1</tt>). To get exact result, set <tt>epsilon=0.0</tt>;
  */
 protected static long[] known_N_compute_B_and_K_quick(long N, double epsilon) {
-	final int maxBuffers = 50;
-	final int maxHeight = 50;
-	final double N_double = (double) N;
-	final double c = N_double * epsilon * 2.0;
-	int[] heightMaximums = new int[maxBuffers-1];
-
-	// for each b, determine maximum height, i.e. the height for which x<=0 and x is a maximum
-	// with x = binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) - N * epsilon * 2.0
-	for (int b=2; b<=maxBuffers; b++) {
-		int h = 3;
-		
-		while ( h<=maxHeight && // skip heights until x<=0
-				(h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
-				(Arithmetic.binomial(b+h-3, h-3)) +
-				(Arithmetic.binomial(b+h-3, h-2)) - c
-				> 0.0
-			  ) {h++;}
-		//from now on x is monotonically growing...
-		while ( h<=maxHeight && // skip heights until x>0
-				(h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
-				(Arithmetic.binomial(b+h-3, h-3)) +
-				(Arithmetic.binomial(b+h-3, h-2)) - c
-				<= 0.0
-			  ) {h++;}
-		h--; //go back to last height
-
-		// was x>0 or did we loop without finding anything?
-		int hMax;
-		if (h>=maxHeight && 
-				(h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
-				(Arithmetic.binomial(b+h-3, h-3)) +
-				(Arithmetic.binomial(b+h-3, h-2)) - c
-				> 0.0) {
-			hMax=Integer.MIN_VALUE;
-		}
-		else {
-			hMax=h;
-		}
-				
-		heightMaximums[b-2]=hMax; //safe some space
-	} //end for
-
-
-	// for each b, determine the smallest k satisfying the constraints, i.e.
-	// for each b, determine kMin, with kMin = N/binomial(b+hMax-2,hMax-1)
-	long[] kMinimums = new long[maxBuffers-1];
-	for (int b=2; b<=maxBuffers; b++) {
-		int h=heightMaximums[b-2];
-		long kMin=Long.MAX_VALUE;
-		if (h>Integer.MIN_VALUE) {
-			double value = (Arithmetic.binomial(b+h-2, h-1));
-			long tmpK=(long)(Math.ceil(N_double/value));
-			if (tmpK<=Long.MAX_VALUE) {
-				kMin = tmpK;
-			}
-		}
-		kMinimums[b-2]=kMin;
-	}
-
-	// from all b's, determine b that minimizes b*kMin
-	long multMin = Long.MAX_VALUE;
-	int minB = -1;
-	for (int b=2; b<=maxBuffers; b++) {
-		if (kMinimums[b-2]<Long.MAX_VALUE) {
-			long mult = ((long)b) * ((long)kMinimums[b-2]);
-			if (mult<multMin) {
-				multMin=mult;
-				minB = b;
-			}
-		}
-	}
-
-	long b, k;
-	if (minB != -1) { // epsilon large enough?
-		b = minB;
-		k = kMinimums[minB-2];
-	}
-	else {     // epsilon is very small or zero.
-		b = 1; // the only possible solution without violating the 
-		k = N; // approximation guarantees is exact quantile search.
-	}
-
-	long[] result = new long[2];
-	result[0]=b;
-	result[1]=k;
-	return result;
+  final int maxBuffers = 50;
+  final int maxHeight = 50;
+  final double N_double = (double) N;
+  final double c = N_double * epsilon * 2.0;
+  int[] heightMaximums = new int[maxBuffers-1];
+
+  // for each b, determine maximum height, i.e. the height for which x<=0 and x is a maximum
+  // with x = binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) - N * epsilon * 2.0
+  for (int b=2; b<=maxBuffers; b++) {
+    int h = 3;
+    
+    while ( h<=maxHeight && // skip heights until x<=0
+        (h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
+        (Arithmetic.binomial(b+h-3, h-3)) +
+        (Arithmetic.binomial(b+h-3, h-2)) - c
+        > 0.0
+        ) {h++;}
+    //from now on x is monotonically growing...
+    while ( h<=maxHeight && // skip heights until x>0
+        (h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
+        (Arithmetic.binomial(b+h-3, h-3)) +
+        (Arithmetic.binomial(b+h-3, h-2)) - c
+        <= 0.0
+        ) {h++;}
+    h--; //go back to last height
+
+    // was x>0 or did we loop without finding anything?
+    int hMax;
+    if (h>=maxHeight && 
+        (h-2) * (Arithmetic.binomial(b+h-2, h-1)) -
+        (Arithmetic.binomial(b+h-3, h-3)) +
+        (Arithmetic.binomial(b+h-3, h-2)) - c
+        > 0.0) {
+      hMax=Integer.MIN_VALUE;
+    }
+    else {
+      hMax=h;
+    }
+        
+    heightMaximums[b-2]=hMax; //safe some space
+  } //end for
+
+
+  // for each b, determine the smallest k satisfying the constraints, i.e.
+  // for each b, determine kMin, with kMin = N/binomial(b+hMax-2,hMax-1)
+  long[] kMinimums = new long[maxBuffers-1];
+  for (int b=2; b<=maxBuffers; b++) {
+    int h=heightMaximums[b-2];
+    long kMin=Long.MAX_VALUE;
+    if (h>Integer.MIN_VALUE) {
+      double value = (Arithmetic.binomial(b+h-2, h-1));
+      long tmpK=(long)(Math.ceil(N_double/value));
+      if (tmpK<=Long.MAX_VALUE) {
+        kMin = tmpK;
+      }
+    }
+    kMinimums[b-2]=kMin;
+  }
+
+  // from all b's, determine b that minimizes b*kMin
+  long multMin = Long.MAX_VALUE;
+  int minB = -1;
+  for (int b=2; b<=maxBuffers; b++) {
+    if (kMinimums[b-2]<Long.MAX_VALUE) {
+      long mult = ((long)b) * ((long)kMinimums[b-2]);
+      if (mult<multMin) {
+        multMin=mult;
+        minB = b;
+      }
+    }
+  }
+
+  long b, k;
+  if (minB != -1) { // epsilon large enough?
+    b = minB;
+    k = kMinimums[minB-2];
+  }
+  else {     // epsilon is very small or zero.
+    b = 1; // the only possible solution without violating the 
+    k = N; // approximation guarantees is exact quantile search.
+  }
+
+  long[] result = new long[2];
+  result[0]=b;
+  result[1]=k;
+  return result;
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -243,71 +241,71 @@
  * @return <tt>long[2]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>returnSamplingRate[0]</tt>=the required sampling rate.
  */
 protected static long[] known_N_compute_B_and_K_slow(long N, double epsilon, double delta, int quantiles, double[] returnSamplingRate) {
-	final int maxBuffers = 50;
-	final int maxHeight = 50;
-	final double N_double = N;
-
-	// One possibility is to use one buffer of size N
-	//
-	long ret_b = 1;
-	long ret_k = N;
-	double sampling_rate = 1.0;
-	long memory = N;
-
-
-	// Otherwise, there are at least two buffers (b >= 2)
-	// and the height of the tree is at least three (h >= 3)
-	//
-	// We restrict the search for b and h to MAX_BINOM, a large enough value for
-	// practical values of    epsilon >= 0.001   and    delta >= 0.00001
-	//
-	final double logarithm = Math.log(2.0*quantiles/delta);
-	final double c = 2.0 * epsilon * N_double;
-	for (long b=2 ; b<maxBuffers ; b++)
-		for (long h=3 ; h<maxHeight ; h++) {
-			double binomial = Arithmetic.binomial(b+h-2, h-1);
-			long tmp = (long) Math.ceil(N_double / binomial);
-			if ((b * tmp < memory) && 
-					((h-2) * binomial - Arithmetic.binomial(b+h-3, h-3) + Arithmetic.binomial(b+h-3, h-2)
-					<= c) ) {
-				ret_k = tmp ;
-				ret_b = b ;
-				memory = ret_k * b;
-				sampling_rate = 1.0 ;
-			}
-			if (delta > 0.0) {
-				double t = (h-2) * Arithmetic.binomial(b+h-2, h-1) - Arithmetic.binomial(b+h-3, h-3) + Arithmetic.binomial(b+h-3, h-2) ;
-				double u = logarithm / epsilon ;
-				double v = Arithmetic.binomial (b+h-2, h-1) ;
-				double w = logarithm / (2.0*epsilon*epsilon) ;
-
-				// From our SIGMOD 98 paper, we have two equantions to satisfy:
-				// t  <= u * alpha/(1-alpha)^2
-				// kv >= w/(1-alpha)^2
-				//
-				// Denoting 1/(1-alpha)    by x,
-				// we see that the first inequality is equivalent to
-				// t/u <= x^2 - x
-				// which is satisfied by x >= 0.5 + 0.5 * sqrt (1 + 4t/u)
-				// Plugging in this value into second equation yields
-				// k >= wx^2/v
-
-				double x = 0.5 + 0.5 * Math.sqrt(1.0 + 4.0*t/u) ;
-				long k = (long) Math.ceil(w*x*x/v) ;
-				if (b * k < memory) {
-					ret_k = k ;
-					ret_b = b ;
-					memory = b * k ;
-					sampling_rate = N_double*2.0*epsilon*epsilon / logarithm ;
-				}
-			}
-		}
-		
-	long[] result = new long[2];
-	result[0]=ret_b;
-	result[1]=ret_k;
-	returnSamplingRate[0]=sampling_rate;
-	return result;
+  final int maxBuffers = 50;
+  final int maxHeight = 50;
+  final double N_double = N;
+
+  // One possibility is to use one buffer of size N
+  //
+  long ret_b = 1;
+  long ret_k = N;
+  double sampling_rate = 1.0;
+  long memory = N;
+
+
+  // Otherwise, there are at least two buffers (b >= 2)
+  // and the height of the tree is at least three (h >= 3)
+  //
+  // We restrict the search for b and h to MAX_BINOM, a large enough value for
+  // practical values of    epsilon >= 0.001   and    delta >= 0.00001
+  //
+  final double logarithm = Math.log(2.0*quantiles/delta);
+  final double c = 2.0 * epsilon * N_double;
+  for (long b=2 ; b<maxBuffers ; b++)
+    for (long h=3 ; h<maxHeight ; h++) {
+      double binomial = Arithmetic.binomial(b+h-2, h-1);
+      long tmp = (long) Math.ceil(N_double / binomial);
+      if ((b * tmp < memory) && 
+          ((h-2) * binomial - Arithmetic.binomial(b+h-3, h-3) + Arithmetic.binomial(b+h-3, h-2)
+          <= c) ) {
+        ret_k = tmp ;
+        ret_b = b ;
+        memory = ret_k * b;
+        sampling_rate = 1.0 ;
+      }
+      if (delta > 0.0) {
+        double t = (h-2) * Arithmetic.binomial(b+h-2, h-1) - Arithmetic.binomial(b+h-3, h-3) + Arithmetic.binomial(b+h-3, h-2) ;
+        double u = logarithm / epsilon ;
+        double v = Arithmetic.binomial (b+h-2, h-1) ;
+        double w = logarithm / (2.0*epsilon*epsilon) ;
+
+        // From our SIGMOD 98 paper, we have two equantions to satisfy:
+        // t  <= u * alpha/(1-alpha)^2
+        // kv >= w/(1-alpha)^2
+        //
+        // Denoting 1/(1-alpha)    by x,
+        // we see that the first inequality is equivalent to
+        // t/u <= x^2 - x
+        // which is satisfied by x >= 0.5 + 0.5 * sqrt (1 + 4t/u)
+        // Plugging in this value into second equation yields
+        // k >= wx^2/v
+
+        double x = 0.5 + 0.5 * Math.sqrt(1.0 + 4.0*t/u) ;
+        long k = (long) Math.ceil(w*x*x/v) ;
+        if (b * k < memory) {
+          ret_k = k ;
+          ret_b = b ;
+          memory = b * k ;
+          sampling_rate = N_double*2.0*epsilon*epsilon / logarithm ;
+        }
+      }
+    }
+    
+  long[] result = new long[2];
+  result[0]=ret_b;
+  result[1]=ret_k;
+  returnSamplingRate[0]=sampling_rate;
+  return result;
 }
 /**
  * Returns a quantile finder that minimizes the amount of memory needed under the user provided constraints.
@@ -318,8 +316,8 @@
  *
  * @param known_N specifies whether the number of elements over which quantiles are to be computed is known or not.
  * @param N if <tt>known_N==true</tt>, the number of elements over which quantiles are to be computed.
- *			if <tt>known_N==false</tt>, the upper limit on the number of elements over which quantiles are to be computed. 
- * 			If such an upper limit is a-priori unknown, then set <tt>N = Long.MAX_VALUE</tt>.
+ *      if <tt>known_N==false</tt>, the upper limit on the number of elements over which quantiles are to be computed. 
+ *       If such an upper limit is a-priori unknown, then set <tt>N = Long.MAX_VALUE</tt>.
  * @param epsilon the approximation error which is guaranteed not to be exceeded (e.g. <tt>0.001</tt>) (<tt>0 &lt;= epsilon &lt;= 1</tt>). To get exact result, set <tt>epsilon=0.0</tt>;
  * @param delta the probability that the approximation error is more than than epsilon (e.g. 0.0001) (0 &lt;= delta &lt;= 1). To avoid probabilistic answers, set <tt>delta=0.0</tt>.
  * @param quantiles the number of quantiles to be computed (e.g. <tt>100</tt>) (<tt>quantiles &gt;= 1</tt>). If unknown in advance, set this number large, e.g. <tt>quantiles &gt;= 10000</tt>.
@@ -327,62 +325,62 @@
  * @return the quantile finder minimizing memory requirements under the given constraints.
  */
 public static DoubleQuantileFinder newDoubleQuantileFinder(boolean known_N, long N, double epsilon, double delta, int quantiles, RandomEngine generator) {
-	//boolean known_N = true;
-	//if (N==Long.MAX_VALUE) known_N = false;
-	// check parameters.
-	// if they are illegal, keep quite and return an exact finder.
-	if (epsilon <= 0.0 || N < 1000) return new ExactDoubleQuantileFinder();
-	if (epsilon > 1) epsilon = 1;
-	if (delta < 0) delta = 0;
-	if (delta > 1) delta = 1;
-	if (quantiles < 1) quantiles = 1;
-	if (quantiles > N) N = quantiles;
-	
-	KnownDoubleQuantileEstimator finder;
-	if (known_N) {
-		double[] samplingRate = new double[1];
-		long[] resultKnown = known_N_compute_B_and_K(N, epsilon, delta, quantiles, samplingRate);
-		long b = resultKnown[0];
-		long k = resultKnown[1];
-		if (b == 1) return new ExactDoubleQuantileFinder();
-		return new KnownDoubleQuantileEstimator((int)b,(int)k, N, samplingRate[0], generator);
-	}
-	else {
-		long[] resultUnknown = unknown_N_compute_B_and_K(epsilon, delta, quantiles);
-		long b1 = resultUnknown[0];
-		long k1 = resultUnknown[1];
-		long h1 = resultUnknown[2];
-		double preComputeEpsilon = -1.0;
-		if (resultUnknown[3] == 1) preComputeEpsilon = epsilon;
-
-		//if (N==Long.MAX_VALUE) { // no maximum N provided by user.
-		
-		// if (true) fixes bug reported by LarryPeranich@fairisaac.com
-		if (true) { // no maximum N provided by user.
-			if (b1 == 1) return new ExactDoubleQuantileFinder();
-			return new UnknownDoubleQuantileEstimator((int)b1,(int)k1, (int)h1, preComputeEpsilon, generator);
-		}
-
-		// determine whether UnknownFinder or KnownFinder with maximum N requires less memory.
-		double[] samplingRate = new double[1];
-		
-		// IMPORTANT: for known finder, switch sampling off (delta == 0) !!!
-		// with knownN-sampling we can only guarantee the errors if the input sequence has EXACTLY N elements.
-		// with knownN-no sampling we can also guarantee the errors for sequences SMALLER than N elements.
-		long[] resultKnown = known_N_compute_B_and_K(N, epsilon, 0, quantiles, samplingRate);
-		
-		long b2 = resultKnown[0];
-		long k2 = resultKnown[1];
-		
-		if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
-			if (b2 == 1) return new ExactDoubleQuantileFinder();
-			return new KnownDoubleQuantileEstimator((int)b2,(int)k2, N, samplingRate[0], generator);
-		}
-
-		// the UnknownFinder is smaller
-		if (b1 == 1) return new ExactDoubleQuantileFinder();
-		return new UnknownDoubleQuantileEstimator((int)b1,(int)k1, (int)h1, preComputeEpsilon, generator);
-	}
+  //boolean known_N = true;
+  //if (N==Long.MAX_VALUE) known_N = false;
+  // check parameters.
+  // if they are illegal, keep quite and return an exact finder.
+  if (epsilon <= 0.0 || N < 1000) return new ExactDoubleQuantileFinder();
+  if (epsilon > 1) epsilon = 1;
+  if (delta < 0) delta = 0;
+  if (delta > 1) delta = 1;
+  if (quantiles < 1) quantiles = 1;
+  if (quantiles > N) N = quantiles;
+  
+  KnownDoubleQuantileEstimator finder;
+  if (known_N) {
+    double[] samplingRate = new double[1];
+    long[] resultKnown = known_N_compute_B_and_K(N, epsilon, delta, quantiles, samplingRate);
+    long b = resultKnown[0];
+    long k = resultKnown[1];
+    if (b == 1) return new ExactDoubleQuantileFinder();
+    return new KnownDoubleQuantileEstimator((int)b,(int)k, N, samplingRate[0], generator);
+  }
+  else {
+    long[] resultUnknown = unknown_N_compute_B_and_K(epsilon, delta, quantiles);
+    long b1 = resultUnknown[0];
+    long k1 = resultUnknown[1];
+    long h1 = resultUnknown[2];
+    double preComputeEpsilon = -1.0;
+    if (resultUnknown[3] == 1) preComputeEpsilon = epsilon;
+
+    //if (N==Long.MAX_VALUE) { // no maximum N provided by user.
+    
+    // if (true) fixes bug reported by LarryPeranich@fairisaac.com
+    if (true) { // no maximum N provided by user.
+      if (b1 == 1) return new ExactDoubleQuantileFinder();
+      return new UnknownDoubleQuantileEstimator((int)b1,(int)k1, (int)h1, preComputeEpsilon, generator);
+    }
+
+    // determine whether UnknownFinder or KnownFinder with maximum N requires less memory.
+    double[] samplingRate = new double[1];
+    
+    // IMPORTANT: for known finder, switch sampling off (delta == 0) !!!
+    // with knownN-sampling we can only guarantee the errors if the input sequence has EXACTLY N elements.
+    // with knownN-no sampling we can also guarantee the errors for sequences SMALLER than N elements.
+    long[] resultKnown = known_N_compute_B_and_K(N, epsilon, 0, quantiles, samplingRate);
+    
+    long b2 = resultKnown[0];
+    long k2 = resultKnown[1];
+    
+    if (b2 * k2 < b1 * k1) { // the KnownFinder is smaller
+      if (b2 == 1) return new ExactDoubleQuantileFinder();
+      return new KnownDoubleQuantileEstimator((int)b2,(int)k2, N, samplingRate[0], generator);
+    }
+
+    // the UnknownFinder is smaller
+    if (b1 == 1) return new ExactDoubleQuantileFinder();
+    return new UnknownDoubleQuantileEstimator((int)b1,(int)k1, (int)h1, preComputeEpsilon, generator);
+  }
 }
 /**
  * Convenience method that computes phi's for equi-depth histograms.
@@ -390,9 +388,9 @@
  * @return the equi-depth phi's
  */
 public static org.apache.mahout.matrix.list.DoubleArrayList newEquiDepthPhis(int quantiles) {
-	org.apache.mahout.matrix.list.DoubleArrayList phis = new org.apache.mahout.matrix.list.DoubleArrayList(quantiles-1);
-	for (int i=1; i<=quantiles-1; i++) phis.add(i / (double)quantiles);
-	return phis;
+  org.apache.mahout.matrix.list.DoubleArrayList phis = new org.apache.mahout.matrix.list.DoubleArrayList(quantiles-1);
+  for (int i=1; i<=quantiles-1; i++) phis.add(i / (double)quantiles);
+  return phis;
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -404,30 +402,30 @@
  * @return <tt>long[4]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>long[2]</tt>=the tree height where sampling shall start, <tt>long[3]==1</tt> if precomputing is better, otherwise 0;
  */
 public static long[] unknown_N_compute_B_and_K(double epsilon, double delta, int quantiles) {
-	return unknown_N_compute_B_and_K_raw(epsilon,delta,quantiles);
-	// move stuff from _raw(..) here and delete _raw(...)
-	
-	/*
-	long[] result_1 = unknown_N_compute_B_and_K_raw(epsilon,delta,quantiles);
-	long b1 = result_1[0];
-	long k1 = result_1[1];
-
-	
-	int quantilesToPrecompute = (int) Doubles.ceiling(1.0 / epsilon);
-	
-	if (quantiles>quantilesToPrecompute) {
-		// try if precomputing quantiles requires less memory.
-		long[] result_2 = unknown_N_compute_B_and_K_raw(epsilon/2.0,delta,quantilesToPrecompute);
-		
-		long b2 = result_2[0];
-		long k2 = result_2[1];
-		if (b2*k2 < b1*k1) {
-			result_2[3] = 1; //precomputation is better
-			result_1 = result_2;
-		}
-	}
-	return result_1;
-	*/
+  return unknown_N_compute_B_and_K_raw(epsilon,delta,quantiles);
+  // move stuff from _raw(..) here and delete _raw(...)
+  
+  /*
+  long[] result_1 = unknown_N_compute_B_and_K_raw(epsilon,delta,quantiles);
+  long b1 = result_1[0];
+  long k1 = result_1[1];
+
+  
+  int quantilesToPrecompute = (int) Doubles.ceiling(1.0 / epsilon);
+  
+  if (quantiles>quantilesToPrecompute) {
+    // try if precomputing quantiles requires less memory.
+    long[] result_2 = unknown_N_compute_B_and_K_raw(epsilon/2.0,delta,quantilesToPrecompute);
+    
+    long b2 = result_2[0];
+    long k2 = result_2[1];
+    if (b2*k2 < b1*k1) {
+      result_2[3] = 1; //precomputation is better
+      result_1 = result_2;
+    }
+  }
+  return result_1;
+  */
 }
 /**
  * Computes the number of buffers and number of values per buffer such that
@@ -440,136 +438,136 @@
  * @return <tt>long[4]</tt> - <tt>long[0]</tt>=the number of buffers, <tt>long[1]</tt>=the number of elements per buffer, <tt>long[2]</tt>=the tree height where sampling shall start, <tt>long[3]==1</tt> if precomputing is better, otherwise 0;
  */
 protected static long[] unknown_N_compute_B_and_K_raw(double epsilon, double delta, int quantiles) {
-	// delta can be set to zero, i.e., all quantiles should be approximate with probability 1	
-	if (epsilon<=0.0) {
-		long[] result = new long[4];
-		result[0]=1;
-		result[1]=Long.MAX_VALUE;
-		result[2]=Long.MAX_VALUE;
-		result[3]=0;
-		return result;
-	}
-	if (epsilon>=1.0 || delta>=1.0) {
-		// can make any error we wish
-		long[] result = new long[4];
-		result[0]=2;
-		result[1]=1;
-		result[2]=3;
-		result[3]=0;
-		return result;
-	}
-	if (delta<=0.0) {
-		// no way around exact quantile search
-		long[] result = new long[4];
-		result[0]=1;
-		result[1]=Long.MAX_VALUE;
-		result[2]=Long.MAX_VALUE;
-		result[3]=0;
-		return result;
-	}
-
-	int max_b = 50;
-	int max_h = 50;
-	int max_H = 50;
-	int max_Iterations = 2;
-
-	long best_b = Long.MAX_VALUE;
-	long best_k = Long.MAX_VALUE;
-	long best_h = Long.MAX_VALUE;
-	long best_memory = Long.MAX_VALUE;
-
-	double pow = Math.pow(2.0, max_H);
-	double logDelta =  Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon);
-	//double logDelta =  Math.log(2.0/(quantiles*delta)) / (2.0*epsilon*epsilon);
-
-	while (best_b == Long.MAX_VALUE && max_Iterations-- > 0) { //until we find a solution
-		// identify that combination of b and h that minimizes b*k.
-		// exhaustive search.
-		for (int b=2; b<=max_b; b++) {
-			for (int h=2; h<=max_h; h++) {
-				double Ld = Arithmetic.binomial(b+h-2, h-1);
-				double Ls = Arithmetic.binomial(b+h-3,h-1);
-				
-				// now we have k>=c*(1-alpha)^-2.
-				// let's compute c.
-				//double c = Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon*Math.min(Ld, 8.0*Ls/3.0));
-				double c = logDelta / Math.min(Ld, 8.0*Ls/3.0);
-
-				// now we have k>=d/alpha.
-				// let's compute d.				
-				double beta = Ld/Ls;
-				double cc = (beta-2.0)*(max_H-2.0) / (beta + pow - 2.0);
-				double d = (h+3+cc) / (2.0*epsilon);
-				
-				/*
-				double d = (Ld*(h+max_H-1.0)  +  Ls*((h+1)*pow - 2.0*(h+max_H)))   /   (Ld + Ls*(pow-2.0));
-				d = (d + 2.0) / (2.0*epsilon);
-				*/
-
-				// now we have c*(1-alpha)^-2 == d/alpha.
-				// we solve this equation for alpha yielding two solutions
-				// alpha_1,2 = (c + 2*d  +-  Sqrt(c*c + 4*c*d))/(2*d)				
-				double f = c*c + 4.0*c*d;
-				if (f<0.0) continue; // non real solution to equation
-				double root = Math.sqrt(f);
-				double alpha_one = (c + 2.0*d + root)/(2.0*d);
-				double alpha_two = (c + 2.0*d - root)/(2.0*d);
-
-				// any alpha must satisfy 0<alpha<1 to yield valid solutions
-				boolean alpha_one_OK = false;
-				boolean alpha_two_OK = false;
-				if (0.0 < alpha_one && alpha_one < 1.0) alpha_one_OK = true;
-				if (0.0 < alpha_two && alpha_two < 1.0) alpha_two_OK = true;
-				if (alpha_one_OK || alpha_two_OK) {
-					double alpha = alpha_one;
-					if (alpha_one_OK && alpha_two_OK) {
-						// take the alpha that minimizes d/alpha
-						alpha = Math.max(alpha_one, alpha_two);
-					}
-					else if (alpha_two_OK) {
-						alpha = alpha_two;
-					}
-
-					// now we have k=Ceiling(Max(d/alpha, (h+1)/(2*epsilon)))
-					long k = (long) Math.ceil(Math.max(d/alpha, (h+1)/(2.0*epsilon)));
-					if (k>0) { // valid solution?
-						long memory = b*k;
-						if (memory < best_memory) { 
-							// found a solution requiring less memory
-							best_k = k;
-							best_b = b;
-							best_h = h;
-							best_memory = memory;
-						}
-					}
-				}
-			} //end for h
-		} //end for b
-		
-		if (best_b == Long.MAX_VALUE) {
-			System.out.println("Warning: Computing b and k looks like a lot of work!");
-			// no solution found so far. very unlikely. Anyway, try again.
-			max_b *= 2;
-			max_h *= 2;
-			max_H *= 2;
-		}
-	} //end while
-
-	long[] result = new long[4];
-	result[3]=0;
-	if (best_b == Long.MAX_VALUE) {
-		// no solution found.
-		// no way around exact quantile search.
-		result[0]=1;
-		result[1]=Long.MAX_VALUE;
-		result[2]=Long.MAX_VALUE;
-	}
-	else {
-		result[0]=best_b;
-		result[1]=best_k;
-		result[2]=best_h;
-	}
+  // delta can be set to zero, i.e., all quantiles should be approximate with probability 1  
+  if (epsilon<=0.0) {
+    long[] result = new long[4];
+    result[0]=1;
+    result[1]=Long.MAX_VALUE;
+    result[2]=Long.MAX_VALUE;
+    result[3]=0;
+    return result;
+  }
+  if (epsilon>=1.0 || delta>=1.0) {
+    // can make any error we wish
+    long[] result = new long[4];
+    result[0]=2;
+    result[1]=1;
+    result[2]=3;
+    result[3]=0;
+    return result;
+  }
+  if (delta<=0.0) {
+    // no way around exact quantile search
+    long[] result = new long[4];
+    result[0]=1;
+    result[1]=Long.MAX_VALUE;
+    result[2]=Long.MAX_VALUE;
+    result[3]=0;
+    return result;
+  }
+
+  int max_b = 50;
+  int max_h = 50;
+  int max_H = 50;
+  int max_Iterations = 2;
+
+  long best_b = Long.MAX_VALUE;
+  long best_k = Long.MAX_VALUE;
+  long best_h = Long.MAX_VALUE;
+  long best_memory = Long.MAX_VALUE;
+
+  double pow = Math.pow(2.0, max_H);
+  double logDelta =  Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon);
+  //double logDelta =  Math.log(2.0/(quantiles*delta)) / (2.0*epsilon*epsilon);
+
+  while (best_b == Long.MAX_VALUE && max_Iterations-- > 0) { //until we find a solution
+    // identify that combination of b and h that minimizes b*k.
+    // exhaustive search.
+    for (int b=2; b<=max_b; b++) {
+      for (int h=2; h<=max_h; h++) {
+        double Ld = Arithmetic.binomial(b+h-2, h-1);
+        double Ls = Arithmetic.binomial(b+h-3,h-1);
+        
+        // now we have k>=c*(1-alpha)^-2.
+        // let's compute c.
+        //double c = Math.log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon*Math.min(Ld, 8.0*Ls/3.0));
+        double c = logDelta / Math.min(Ld, 8.0*Ls/3.0);
+
+        // now we have k>=d/alpha.
+        // let's compute d.        
+        double beta = Ld/Ls;
+        double cc = (beta-2.0)*(max_H-2.0) / (beta + pow - 2.0);
+        double d = (h+3+cc) / (2.0*epsilon);
+        
+        /*
+        double d = (Ld*(h+max_H-1.0)  +  Ls*((h+1)*pow - 2.0*(h+max_H)))   /   (Ld + Ls*(pow-2.0));
+        d = (d + 2.0) / (2.0*epsilon);
+        */
+
+        // now we have c*(1-alpha)^-2 == d/alpha.
+        // we solve this equation for alpha yielding two solutions
+        // alpha_1,2 = (c + 2*d  +-  Sqrt(c*c + 4*c*d))/(2*d)        
+        double f = c*c + 4.0*c*d;
+        if (f<0.0) continue; // non real solution to equation
+        double root = Math.sqrt(f);
+        double alpha_one = (c + 2.0*d + root)/(2.0*d);
+        double alpha_two = (c + 2.0*d - root)/(2.0*d);
+
+        // any alpha must satisfy 0<alpha<1 to yield valid solutions
+        boolean alpha_one_OK = false;
+        boolean alpha_two_OK = false;
+        if (0.0 < alpha_one && alpha_one < 1.0) alpha_one_OK = true;
+        if (0.0 < alpha_two && alpha_two < 1.0) alpha_two_OK = true;
+        if (alpha_one_OK || alpha_two_OK) {
+          double alpha = alpha_one;
+          if (alpha_one_OK && alpha_two_OK) {
+            // take the alpha that minimizes d/alpha
+            alpha = Math.max(alpha_one, alpha_two);
+          }
+          else if (alpha_two_OK) {
+            alpha = alpha_two;
+          }
+
+          // now we have k=Ceiling(Max(d/alpha, (h+1)/(2*epsilon)))
+          long k = (long) Math.ceil(Math.max(d/alpha, (h+1)/(2.0*epsilon)));
+          if (k>0) { // valid solution?
+            long memory = b*k;
+            if (memory < best_memory) { 
+              // found a solution requiring less memory
+              best_k = k;
+              best_b = b;
+              best_h = h;
+              best_memory = memory;
+            }
+          }
+        }
+      } //end for h
+    } //end for b
+    
+    if (best_b == Long.MAX_VALUE) {
+      System.out.println("Warning: Computing b and k looks like a lot of work!");
+      // no solution found so far. very unlikely. Anyway, try again.
+      max_b *= 2;
+      max_h *= 2;
+      max_H *= 2;
+    }
+  } //end while
+
+  long[] result = new long[4];
+  result[3]=0;
+  if (best_b == Long.MAX_VALUE) {
+    // no solution found.
+    // no way around exact quantile search.
+    result[0]=1;
+    result[1]=Long.MAX_VALUE;
+    result[2]=Long.MAX_VALUE;
+  }
+  else {
+    result[0]=best_b;
+    result[1]=best_k;
+    result[2]=best_h;
+  }
 
-	return result;
+  return result;
 }
 }