You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/25 04:36:02 UTC

svn commit: r883973 [14/14] - in /lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix: ./ bitvector/ buffer/ function/ list/ list/adapter/ map/

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java Wed Nov 25 03:35:59 2009
@@ -21,47 +21,47 @@
 
 @author wolfgang.hoschek@cern.ch
 @version 1.0, 09/24/99
-@see	    java.util.HashMap
+@see      java.util.HashMap
 */
 /** 
  * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
  */
 @Deprecated
 public class OpenIntObjectHashMap extends AbstractIntObjectMap {
-	 /**
-	 * The hash table keys.
-	 * @serial
-	 */
-	protected int table[];
-
-	 /**
-	 * The hash table values.
-	 * @serial
-	 */
-	protected Object values[];
-
-	/**
-	 * The state of each hash table entry (FREE, FULL, REMOVED).
-	 * @serial
-	 */
-	protected byte state[];
-	
-	/**
-	 * The number of table entries in state==FREE.
-	 * @serial
-	 */
-	protected int freeEntries;
-
-	
-	protected static final byte FREE = 0;
-	protected static final byte FULL = 1;
-	protected static final byte REMOVED = 2;
+   /**
+   * The hash table keys.
+   * @serial
+   */
+  protected int table[];
+
+   /**
+   * The hash table values.
+   * @serial
+   */
+  protected Object values[];
+
+  /**
+   * The state of each hash table entry (FREE, FULL, REMOVED).
+   * @serial
+   */
+  protected byte state[];
+  
+  /**
+   * The number of table entries in state==FREE.
+   * @serial
+   */
+  protected int freeEntries;
+
+  
+  protected static final byte FREE = 0;
+  protected static final byte FULL = 1;
+  protected static final byte REMOVED = 2;
 
 /**
  * Constructs an empty map with default capacity and default load factors.
  */
 public OpenIntObjectHashMap() {
-	this(defaultCapacity);
+  this(defaultCapacity);
 }
 /**
  * Constructs an empty map with the specified initial capacity and default load factors.
@@ -71,7 +71,7 @@
  *             than zero.
  */
 public OpenIntObjectHashMap(int initialCapacity) {
-	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+  this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
 }
 /**
  * Constructs an empty map with
@@ -80,22 +80,22 @@
  * @param      initialCapacity   the initial capacity.
  * @param      minLoadFactor        the minimum load factor.
  * @param      maxLoadFactor        the maximum load factor.
- * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws  IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
  */
 public OpenIntObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
-	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+  setUp(initialCapacity,minLoadFactor,maxLoadFactor);
 }
 /**
  * Removes all (key,value) associations from the receiver.
  * Implicitly calls <tt>trimToSize()</tt>.
  */
 public void clear() {
-	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
-   	new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+  new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+     new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
 
-	this.distinct = 0;
-	this.freeEntries = table.length; // delta
-	trimToSize();
+  this.distinct = 0;
+  this.freeEntries = table.length; // delta
+  trimToSize();
 }
 /**
  * Returns a deep copy of the receiver.
@@ -103,11 +103,11 @@
  * @return  a deep copy of the receiver.
  */
 public Object clone() {
-	OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
-	copy.table = (int[]) copy.table.clone();
-	copy.values = (Object[]) copy.values.clone();
-	copy.state = (byte[]) copy.state.clone();
-	return copy;
+  OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
+  copy.table = (int[]) copy.table.clone();
+  copy.values = (Object[]) copy.values.clone();
+  copy.state = (byte[]) copy.state.clone();
+  return copy;
 }
 /**
  * Returns <tt>true</tt> if the receiver contains the specified key.
@@ -115,7 +115,7 @@
  * @return <tt>true</tt> if the receiver contains the specified key.
  */
 public boolean containsKey(int key) {
-	return indexOfKey(key) >= 0;
+  return indexOfKey(key) >= 0;
 }
 /**
  * Returns <tt>true</tt> if the receiver contains the specified value.
@@ -123,7 +123,7 @@
  * @return <tt>true</tt> if the receiver contains the specified value.
  */
 public boolean containsValue(Object value) {
-	return indexOfValue(value) >= 0;
+  return indexOfValue(value) >= 0;
 }
 /**
  * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
@@ -136,10 +136,10 @@
  * @param   minCapacity   the desired minimum capacity.
  */
 public void ensureCapacity(int minCapacity) {
-	if (table.length < minCapacity) {
-		int newCapacity = nextPrime(minCapacity);
-		rehash(newCapacity);
-	}
+  if (table.length < minCapacity) {
+    int newCapacity = nextPrime(minCapacity);
+    rehash(newCapacity);
+  }
 }
 /**
  * Applies a procedure to each key of the receiver, if any.
@@ -152,10 +152,10 @@
  * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
  */
 public boolean forEachKey(IntProcedure procedure) {
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
-	}
-	return true;
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+  }
+  return true;
 }
 /**
  * Applies a procedure to each (key,value) pair of the receiver, if any.
@@ -165,10 +165,10 @@
  * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
  */
 public boolean forEachPair(final IntObjectProcedure procedure) {
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
-	}
-	return true;
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+  }
+  return true;
 }
 /**
  * Returns the value associated with the specified key.
@@ -178,9 +178,9 @@
  * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
  */
 public Object get(int key) {
-	int i = indexOfKey(key);
-	if (i<0) return null; //not contained
-	return values[i];
+  int i = indexOfKey(key);
+  if (i<0) return null; //not contained
+  return values[i];
 }
 /**
  * @param key the key to be added to the receiver.
@@ -190,86 +190,86 @@
  * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
  */
 protected int indexOfInsertion(int key) {
-	final int tab[] = table;
-	final byte stat[] = state;
-	final int length = tab.length;
-
-	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
-	int i = hash % length;
-	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
-	//int decrement = (hash / length) % length;
-	if (decrement == 0) decrement = 1;
-
-	// stop if we find a removed or free slot, or if we find the key itself
-	// do NOT skip over removed slots (yes, open addressing is like that...)
-	while (stat[i] == FULL && tab[i] != key) {
-		i -= decrement;
-		//hashCollisions++;
-		if (i<0) i+=length;
-	}
-	
-	if (stat[i] == REMOVED) {
-		// stop if we find a free slot, or if we find the key itself.
-		// do skip over removed slots (yes, open addressing is like that...)
-		// assertion: there is at least one FREE slot.
-		int j = i;
-		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
-			i -= decrement;
-			//hashCollisions++;
-			if (i<0) i+=length;
-		}
-		if (stat[i] == FREE) i = j;
-	}
-	
-	
-	if (stat[i] == FULL) {
-		// key already contained at slot i.
-		// return a negative number identifying the slot.
-		return -i-1;
-	}
-	// not already contained, should be inserted at slot i.
-	// return a number >= 0 identifying the slot.
-	return i; 
+  final int tab[] = table;
+  final byte stat[] = state;
+  final int length = tab.length;
+
+  final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+  int i = hash % length;
+  int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+  //int decrement = (hash / length) % length;
+  if (decrement == 0) decrement = 1;
+
+  // stop if we find a removed or free slot, or if we find the key itself
+  // do NOT skip over removed slots (yes, open addressing is like that...)
+  while (stat[i] == FULL && tab[i] != key) {
+    i -= decrement;
+    //hashCollisions++;
+    if (i<0) i+=length;
+  }
+  
+  if (stat[i] == REMOVED) {
+    // stop if we find a free slot, or if we find the key itself.
+    // do skip over removed slots (yes, open addressing is like that...)
+    // assertion: there is at least one FREE slot.
+    int j = i;
+    while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+      i -= decrement;
+      //hashCollisions++;
+      if (i<0) i+=length;
+    }
+    if (stat[i] == FREE) i = j;
+  }
+  
+  
+  if (stat[i] == FULL) {
+    // key already contained at slot i.
+    // return a negative number identifying the slot.
+    return -i-1;
+  }
+  // not already contained, should be inserted at slot i.
+  // return a number >= 0 identifying the slot.
+  return i; 
 }
 /**
  * @param key the key to be searched in the receiver.
  * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
  */
 protected int indexOfKey(int key) {
-	final int tab[] = table;
-	final byte stat[] = state;
-	final int length = tab.length;
-
-	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
-	int i = hash % length;
-	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
-	//int decrement = (hash / length) % length;
-	if (decrement == 0) decrement = 1;
-
-	// stop if we find a free slot, or if we find the key itself.
-	// do skip over removed slots (yes, open addressing is like that...)
-	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
-		i -= decrement;
-		//hashCollisions++;
-		if (i<0) i+=length;
-	}
-	
-	if (stat[i] == FREE) return -1; // not found
-	return i; //found, return index where key is contained
+  final int tab[] = table;
+  final byte stat[] = state;
+  final int length = tab.length;
+
+  final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+  int i = hash % length;
+  int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+  //int decrement = (hash / length) % length;
+  if (decrement == 0) decrement = 1;
+
+  // stop if we find a free slot, or if we find the key itself.
+  // do skip over removed slots (yes, open addressing is like that...)
+  while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+    i -= decrement;
+    //hashCollisions++;
+    if (i<0) i+=length;
+  }
+  
+  if (stat[i] == FREE) return -1; // not found
+  return i; //found, return index where key is contained
 }
 /**
  * @param value the value to be searched in the receiver.
  * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
  */
 protected int indexOfValue(Object value) {
-	final Object val[] = values;
-	final byte stat[] = state;
+  final Object val[] = values;
+  final byte stat[] = state;
 
-	for (int i=stat.length; --i >= 0;) {
-		if (stat[i]==FULL && val[i]==value) return i;
-	}
+  for (int i=stat.length; --i >= 0;) {
+    if (stat[i]==FULL && val[i]==value) return i;
+  }
 
-	return -1; // not found
+  return -1; // not found
 }
 /**
  * Returns the first key the given value is associated with.
@@ -278,13 +278,13 @@
  *
  * @param value the value to search for.
  * @return the first key for which holds <tt>get(key) == value</tt>; 
- *		   returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ *       returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
  */
 public int keyOf(Object value) {
-	//returns the first key found; there may be more matching keys, however.
-	int i = indexOfValue(value);
-	if (i<0) return Integer.MIN_VALUE;
-	return table[i];
+  //returns the first key found; there may be more matching keys, however.
+  int i = indexOfValue(value);
+  if (i<0) return Integer.MIN_VALUE;
+  return table[i];
 }
 /**
  * Fills all keys contained in the receiver into the specified list.
@@ -297,16 +297,16 @@
  * @param list the list to be filled, can have any size.
  */
 public void keys(IntArrayList list) {
-	list.setSize(distinct);
-	int[] elements = list.elements();
-	
-	int[] tab = table;
-	byte[] stat = state;
-	
-	int j=0;
-	for (int i = tab.length ; i-- > 0 ;) {
-		if (stat[i]==FULL) elements[j++]=tab[i];
-	}
+  list.setSize(distinct);
+  int[] elements = list.elements();
+  
+  int[] tab = table;
+  byte[] stat = state;
+  
+  int j=0;
+  for (int i = tab.length ; i-- > 0 ;) {
+    if (stat[i]==FULL) elements[j++]=tab[i];
+  }
 }
 /**
 Fills all pairs satisfying a given condition into the specified lists.
@@ -318,7 +318,7 @@
 <br>
 <pre>
 IntObjectProcedure condition = new IntObjectProcedure() { // match even keys only
-	public boolean apply(int key, Object value) { return key%2==0; }
+  public boolean apply(int key, Object value) { return key%2==0; }
 }
 keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
 </pre>
@@ -328,15 +328,15 @@
 @param valueList the list to be filled with values, can have any size.
 */
 public void pairsMatching(final IntObjectProcedure condition, final IntArrayList keyList, final ObjectArrayList valueList) {
-	keyList.clear();
-	valueList.clear();
-	
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL && condition.apply(table[i],values[i])) {
-			keyList.add(table[i]);
-			valueList.add(values[i]);
-		}
-	}
+  keyList.clear();
+  valueList.clear();
+  
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL && condition.apply(table[i],values[i])) {
+      keyList.add(table[i]);
+      valueList.add(values[i]);
+    }
+  }
 }
 /**
  * Associates the given key with the given value.
@@ -348,31 +348,31 @@
  *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
  */
 public boolean put(int key, Object value) {
-	int i = indexOfInsertion(key);	
-	if (i<0) { //already contained
-		i = -i -1;
-		this.values[i]=value;
-		return false;
-	}
-
-	if (this.distinct > this.highWaterMark) {
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-		return put(key, value);
-	}
-
-	this.table[i]=key;
-	this.values[i]=value;
-	if (this.state[i]==FREE) this.freeEntries--;
-	this.state[i]=FULL;
-	this.distinct++;
-
-	if (this.freeEntries < 1) { //delta
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-	}
+  int i = indexOfInsertion(key);  
+  if (i<0) { //already contained
+    i = -i -1;
+    this.values[i]=value;
+    return false;
+  }
+
+  if (this.distinct > this.highWaterMark) {
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+    return put(key, value);
+  }
+
+  this.table[i]=key;
+  this.values[i]=value;
+  if (this.state[i]==FREE) this.freeEntries--;
+  this.state[i]=FULL;
+  this.distinct++;
+
+  if (this.freeEntries < 1) { //delta
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+  }
 
-	return true;
+  return true;
 }
 /**
  * Rehashes the contents of the receiver into a new table
@@ -381,34 +381,34 @@
  * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
  */
 protected void rehash(int newCapacity) {
-	int oldCapacity = table.length;
-	//if (oldCapacity == newCapacity) return;
-	
-	int oldTable[] = table;
-	Object oldValues[] = values;
-	byte oldState[] = state;
-
-	int newTable[] = new int[newCapacity];
-	Object newValues[] = new Object[newCapacity];
-	byte newState[] = new byte[newCapacity];
-
-	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
-	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
-	this.table = newTable;
-	this.values = newValues;
-	this.state = newState;
-	this.freeEntries = newCapacity-this.distinct; // delta
-	
-	for (int i = oldCapacity ; i-- > 0 ;) {
-		if (oldState[i]==FULL) {
-			int element = oldTable[i];
-			int index = indexOfInsertion(element);
-			newTable[index]=element;
-			newValues[index]=oldValues[i];
-			newState[index]=FULL;
-		}
-	}
+  int oldCapacity = table.length;
+  //if (oldCapacity == newCapacity) return;
+  
+  int oldTable[] = table;
+  Object oldValues[] = values;
+  byte oldState[] = state;
+
+  int newTable[] = new int[newCapacity];
+  Object newValues[] = new Object[newCapacity];
+  byte newState[] = new byte[newCapacity];
+
+  this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+  this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+  this.table = newTable;
+  this.values = newValues;
+  this.state = newState;
+  this.freeEntries = newCapacity-this.distinct; // delta
+  
+  for (int i = oldCapacity ; i-- > 0 ;) {
+    if (oldState[i]==FULL) {
+      int element = oldTable[i];
+      int index = indexOfInsertion(element);
+      newTable[index]=element;
+      newValues[index]=oldValues[i];
+      newState[index]=FULL;
+    }
+  }
 }
 /**
  * Removes the given key with its associated element from the receiver, if present.
@@ -417,19 +417,19 @@
  * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
  */
 public boolean removeKey(int key) {
-	int i = indexOfKey(key);
-	if (i<0) return false; // key not contained
+  int i = indexOfKey(key);
+  if (i<0) return false; // key not contained
 
-	this.state[i]=REMOVED;
-	this.values[i]=null; // delta
-	this.distinct--;
-
-	if (this.distinct < this.lowWaterMark) {
-		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-	}
-	
-	return true;	
+  this.state[i]=REMOVED;
+  this.values[i]=null; // delta
+  this.distinct--;
+
+  if (this.distinct < this.lowWaterMark) {
+    int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+  }
+  
+  return true;  
 }
 /**
  * Initializes the receiver.
@@ -437,32 +437,32 @@
  * @param      initialCapacity   the initial capacity of the receiver.
  * @param      minLoadFactor        the minLoadFactor of the receiver.
  * @param      maxLoadFactor        the maxLoadFactor of the receiver.
- * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws  IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
  */
 protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
-	int capacity = initialCapacity;
-	super.setUp(capacity, minLoadFactor, maxLoadFactor);
-	capacity = nextPrime(capacity);
-	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
-	
-	this.table = new int[capacity];
-	this.values = new Object[capacity];
-	this.state = new byte[capacity];
-
-	// memory will be exhausted long before this pathological case happens, anyway.
-	this.minLoadFactor = minLoadFactor;
-	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
-	else this.maxLoadFactor = maxLoadFactor;
-
-	this.distinct = 0;
-	this.freeEntries = capacity; // delta
-	
-	// lowWaterMark will be established upon first expansion.
-	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
-	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
-	// See ensureCapacity(...)
-	this.lowWaterMark = 0; 
-	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+  int capacity = initialCapacity;
+  super.setUp(capacity, minLoadFactor, maxLoadFactor);
+  capacity = nextPrime(capacity);
+  if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+  
+  this.table = new int[capacity];
+  this.values = new Object[capacity];
+  this.state = new byte[capacity];
+
+  // memory will be exhausted long before this pathological case happens, anyway.
+  this.minLoadFactor = minLoadFactor;
+  if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+  else this.maxLoadFactor = maxLoadFactor;
+
+  this.distinct = 0;
+  this.freeEntries = capacity; // delta
+  
+  // lowWaterMark will be established upon first expansion.
+  // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+  // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+  // See ensureCapacity(...)
+  this.lowWaterMark = 0; 
+  this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
 }
 /**
  * Trims the capacity of the receiver to be the receiver's current 
@@ -470,12 +470,12 @@
  * storage of the receiver.
  */
 public void trimToSize() {
-	// * 1.2 because open addressing's performance exponentially degrades beyond that point
-	// so that even rehashing the table can take very long
-	int newCapacity = nextPrime((int)(1 + 1.2*size()));
-	if (table.length > newCapacity) {
-		rehash(newCapacity);
-	}
+  // * 1.2 because open addressing's performance exponentially degrades beyond that point
+  // so that even rehashing the table can take very long
+  int newCapacity = nextPrime((int)(1 + 1.2*size()));
+  if (table.length > newCapacity) {
+    rehash(newCapacity);
+  }
 }
 /**
  * Fills all values contained in the receiver into the specified list.
@@ -488,15 +488,15 @@
  * @param list the list to be filled, can have any size.
  */
 public void values(ObjectArrayList list) {
-	list.setSize(distinct);
-	Object[] elements = list.elements();
-	
-	Object[] val = values;
-	byte[] stat = state;
-	
-	int j=0;
-	for (int i = stat.length ; i-- > 0 ;) {
-		if (stat[i]==FULL) elements[j++]=val[i];
-	}
+  list.setSize(distinct);
+  Object[] elements = list.elements();
+  
+  Object[] val = values;
+  byte[] stat = state;
+  
+  int j=0;
+  for (int i = stat.length ; i-- > 0 ;) {
+    if (stat[i]==FULL) elements[j++]=val[i];
+  }
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java Wed Nov 25 03:35:59 2009
@@ -21,47 +21,47 @@
 
 @author wolfgang.hoschek@cern.ch
 @version 1.0, 09/24/99
-@see	    java.util.HashMap
+@see      java.util.HashMap
 */
 /** 
  * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
  */
 @Deprecated
 public class OpenLongObjectHashMap extends AbstractLongObjectMap {
-	 /**
-	 * The hash table keys.
-	 * @serial
-	 */
-	protected long table[];
-
-	 /**
-	 * The hash table values.
-	 * @serial
-	 */
-	protected Object values[];
-
-	/**
-	 * The state of each hash table entry (FREE, FULL, REMOVED).
-	 * @serial
-	 */
-	protected byte state[];
-
-	/**
-	 * The number of table entries in state==FREE.
-	 * @serial
-	 */
-	protected int freeEntries;
-	
-	
-	protected static final byte FREE = 0;
-	protected static final byte FULL = 1;
-	protected static final byte REMOVED = 2;
+   /**
+   * The hash table keys.
+   * @serial
+   */
+  protected long table[];
+
+   /**
+   * The hash table values.
+   * @serial
+   */
+  protected Object values[];
+
+  /**
+   * The state of each hash table entry (FREE, FULL, REMOVED).
+   * @serial
+   */
+  protected byte state[];
+
+  /**
+   * The number of table entries in state==FREE.
+   * @serial
+   */
+  protected int freeEntries;
+  
+  
+  protected static final byte FREE = 0;
+  protected static final byte FULL = 1;
+  protected static final byte REMOVED = 2;
 
 /**
  * Constructs an empty map with default capacity and default load factors.
  */
 public OpenLongObjectHashMap() {
-	this(defaultCapacity);
+  this(defaultCapacity);
 }
 /**
  * Constructs an empty map with the specified initial capacity and default load factors.
@@ -71,7 +71,7 @@
  *             than zero.
  */
 public OpenLongObjectHashMap(int initialCapacity) {
-	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+  this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
 }
 /**
  * Constructs an empty map with
@@ -80,22 +80,22 @@
  * @param      initialCapacity   the initial capacity.
  * @param      minLoadFactor        the minimum load factor.
  * @param      maxLoadFactor        the maximum load factor.
- * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws  IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
  */
 public OpenLongObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
-	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+  setUp(initialCapacity,minLoadFactor,maxLoadFactor);
 }
 /**
  * Removes all (key,value) associations from the receiver.
  * Implicitly calls <tt>trimToSize()</tt>.
  */
 public void clear() {
-	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
-   	new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+  new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+     new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
 
-	this.distinct = 0;
-	this.freeEntries = table.length; // delta
-	trimToSize();
+  this.distinct = 0;
+  this.freeEntries = table.length; // delta
+  trimToSize();
 }
 /**
  * Returns a deep copy of the receiver.
@@ -103,11 +103,11 @@
  * @return  a deep copy of the receiver.
  */
 public Object clone() {
-	OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
-	copy.table = (long[]) copy.table.clone();
-	copy.values = (Object[]) copy.values.clone();
-	copy.state = (byte[]) copy.state.clone();
-	return copy;
+  OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
+  copy.table = (long[]) copy.table.clone();
+  copy.values = (Object[]) copy.values.clone();
+  copy.state = (byte[]) copy.state.clone();
+  return copy;
 }
 /**
  * Returns <tt>true</tt> if the receiver contains the specified key.
@@ -115,7 +115,7 @@
  * @return <tt>true</tt> if the receiver contains the specified key.
  */
 public boolean containsKey(long key) {
-	return indexOfKey(key) >= 0;
+  return indexOfKey(key) >= 0;
 }
 /**
  * Returns <tt>true</tt> if the receiver contains the specified value.
@@ -123,7 +123,7 @@
  * @return <tt>true</tt> if the receiver contains the specified value.
  */
 public boolean containsValue(Object value) {
-	return indexOfValue(value) >= 0;
+  return indexOfValue(value) >= 0;
 }
 /**
  * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
@@ -136,10 +136,10 @@
  * @param   minCapacity   the desired minimum capacity.
  */
 public void ensureCapacity(int minCapacity) {
-	if (table.length < minCapacity) {
-		int newCapacity = nextPrime(minCapacity);
-		rehash(newCapacity);
-	}
+  if (table.length < minCapacity) {
+    int newCapacity = nextPrime(minCapacity);
+    rehash(newCapacity);
+  }
 }
 /**
  * Applies a procedure to each key of the receiver, if any.
@@ -152,10 +152,10 @@
  * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
  */
 public boolean forEachKey(LongProcedure procedure) {
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
-	}
-	return true;
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+  }
+  return true;
 }
 /**
  * Applies a procedure to each (key,value) pair of the receiver, if any.
@@ -165,10 +165,10 @@
  * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
  */
 public boolean forEachPair(final LongObjectProcedure procedure) {
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
-	}
-	return true;
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+  }
+  return true;
 }
 /**
  * Returns the value associated with the specified key.
@@ -178,9 +178,9 @@
  * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
  */
 public Object get(long key) {
-	int i = indexOfKey(key);
-	if (i<0) return null; //not contained
-	return values[i];
+  int i = indexOfKey(key);
+  if (i<0) return null; //not contained
+  return values[i];
 }
 /**
  * @param key the key to be added to the receiver.
@@ -190,86 +190,86 @@
  * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
  */
 protected int indexOfInsertion(long key) {
-	final long tab[] = table;
-	final byte stat[] = state;
-	final int length = tab.length;
-
-	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
-	int i = hash % length;
-	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
-	//int decrement = (hash / length) % length;
-	if (decrement == 0) decrement = 1;
-
-	// stop if we find a removed or free slot, or if we find the key itself
-	// do NOT skip over removed slots (yes, open addressing is like that...)
-	while (stat[i] == FULL && tab[i] != key) {
-		i -= decrement;
-		//hashCollisions++;
-		if (i<0) i+=length;
-	}
-	
-	if (stat[i] == REMOVED) {
-		// stop if we find a free slot, or if we find the key itself.
-		// do skip over removed slots (yes, open addressing is like that...)
-		// assertion: there is at least one FREE slot.
-		int j = i;
-		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
-			i -= decrement;
-			//hashCollisions++;
-			if (i<0) i+=length;
-		}
-		if (stat[i] == FREE) i = j;
-	}
-	
-	
-	if (stat[i] == FULL) {
-		// key already contained at slot i.
-		// return a negative number identifying the slot.
-		return -i-1;
-	}
-	// not already contained, should be inserted at slot i.
-	// return a number >= 0 identifying the slot.
-	return i; 
+  final long tab[] = table;
+  final byte stat[] = state;
+  final int length = tab.length;
+
+  final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+  int i = hash % length;
+  int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+  //int decrement = (hash / length) % length;
+  if (decrement == 0) decrement = 1;
+
+  // stop if we find a removed or free slot, or if we find the key itself
+  // do NOT skip over removed slots (yes, open addressing is like that...)
+  while (stat[i] == FULL && tab[i] != key) {
+    i -= decrement;
+    //hashCollisions++;
+    if (i<0) i+=length;
+  }
+  
+  if (stat[i] == REMOVED) {
+    // stop if we find a free slot, or if we find the key itself.
+    // do skip over removed slots (yes, open addressing is like that...)
+    // assertion: there is at least one FREE slot.
+    int j = i;
+    while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+      i -= decrement;
+      //hashCollisions++;
+      if (i<0) i+=length;
+    }
+    if (stat[i] == FREE) i = j;
+  }
+  
+  
+  if (stat[i] == FULL) {
+    // key already contained at slot i.
+    // return a negative number identifying the slot.
+    return -i-1;
+  }
+  // not already contained, should be inserted at slot i.
+  // return a number >= 0 identifying the slot.
+  return i; 
 }
 /**
  * @param key the key to be searched in the receiver.
  * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
  */
 protected int indexOfKey(long key) {
-	final long tab[] = table;
-	final byte stat[] = state;
-	final int length = tab.length;
-
-	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
-	int i = hash % length;
-	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
-	//int decrement = (hash / length) % length;
-	if (decrement == 0) decrement = 1;
-
-	// stop if we find a free slot, or if we find the key itself.
-	// do skip over removed slots (yes, open addressing is like that...)
-	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
-		i -= decrement;
-		//hashCollisions++;
-		if (i<0) i+=length;
-	}
-	
-	if (stat[i] == FREE) return -1; // not found
-	return i; //found, return index where key is contained
+  final long tab[] = table;
+  final byte stat[] = state;
+  final int length = tab.length;
+
+  final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+  int i = hash % length;
+  int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+  //int decrement = (hash / length) % length;
+  if (decrement == 0) decrement = 1;
+
+  // stop if we find a free slot, or if we find the key itself.
+  // do skip over removed slots (yes, open addressing is like that...)
+  while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+    i -= decrement;
+    //hashCollisions++;
+    if (i<0) i+=length;
+  }
+  
+  if (stat[i] == FREE) return -1; // not found
+  return i; //found, return index where key is contained
 }
 /**
  * @param value the value to be searched in the receiver.
  * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
  */
 protected int indexOfValue(Object value) {
-	final Object val[] = values;
-	final byte stat[] = state;
+  final Object val[] = values;
+  final byte stat[] = state;
 
-	for (int i=stat.length; --i >= 0;) {
-		if (stat[i]==FULL && val[i]==value) return i;
-	}
+  for (int i=stat.length; --i >= 0;) {
+    if (stat[i]==FULL && val[i]==value) return i;
+  }
 
-	return -1; // not found
+  return -1; // not found
 }
 /**
  * Returns the first key the given value is associated with.
@@ -278,13 +278,13 @@
  *
  * @param value the value to search for.
  * @return the first key for which holds <tt>get(key) == value</tt>; 
- *		   returns <tt>Long.MIN_VALUE</tt> if no such key exists.
+ *       returns <tt>Long.MIN_VALUE</tt> if no such key exists.
  */
 public long keyOf(Object value) {
-	//returns the first key found; there may be more matching keys, however.
-	int i = indexOfValue(value);
-	if (i<0) return Long.MIN_VALUE;
-	return table[i];
+  //returns the first key found; there may be more matching keys, however.
+  int i = indexOfValue(value);
+  if (i<0) return Long.MIN_VALUE;
+  return table[i];
 }
 /**
  * Fills all keys contained in the receiver into the specified list.
@@ -297,16 +297,16 @@
  * @param list the list to be filled, can have any size.
  */
 public void keys(LongArrayList list) {
-	list.setSize(distinct);
-	long[] elements = list.elements();
-	
-	long[] tab = table;
-	byte[] stat = state;
-	
-	int j=0;
-	for (int i = tab.length ; i-- > 0 ;) {
-		if (stat[i]==FULL) elements[j++]=tab[i];
-	}
+  list.setSize(distinct);
+  long[] elements = list.elements();
+  
+  long[] tab = table;
+  byte[] stat = state;
+  
+  int j=0;
+  for (int i = tab.length ; i-- > 0 ;) {
+    if (stat[i]==FULL) elements[j++]=tab[i];
+  }
 }
 /**
 Fills all pairs satisfying a given condition into the specified lists.
@@ -318,7 +318,7 @@
 <br>
 <pre>
 LongObjectProcedure condition = new LongObjectProcedure() { // match even keys only
-	public boolean apply(long key, Object value) { return key%2==0; }
+  public boolean apply(long key, Object value) { return key%2==0; }
 }
 keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
 </pre>
@@ -328,15 +328,15 @@
 @param valueList the list to be filled with values, can have any size.
 */
 public void pairsMatching(final LongObjectProcedure condition, final LongArrayList keyList, final ObjectArrayList valueList) {
-	keyList.clear();
-	valueList.clear();
-	
-	for (int i = table.length ; i-- > 0 ;) {
-		if (state[i]==FULL && condition.apply(table[i],values[i])) {
-			keyList.add(table[i]);
-			valueList.add(values[i]);
-		}
-	}
+  keyList.clear();
+  valueList.clear();
+  
+  for (int i = table.length ; i-- > 0 ;) {
+    if (state[i]==FULL && condition.apply(table[i],values[i])) {
+      keyList.add(table[i]);
+      valueList.add(values[i]);
+    }
+  }
 }
 /**
  * Associates the given key with the given value.
@@ -348,31 +348,31 @@
  *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
  */
 public boolean put(long key, Object value) {
-	int i = indexOfInsertion(key);	
-	if (i<0) { //already contained
-		i = -i -1;
-		this.values[i]=value;
-		return false;
-	}
-
-	if (this.distinct > this.highWaterMark) {
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-		return put(key, value);
-	}
-
-	this.table[i]=key;
-	this.values[i]=value;
-	if (this.state[i]==FREE) this.freeEntries--;
-	this.state[i]=FULL;
-	this.distinct++;
-
-	if (this.freeEntries < 1) { //delta
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-	}
+  int i = indexOfInsertion(key);  
+  if (i<0) { //already contained
+    i = -i -1;
+    this.values[i]=value;
+    return false;
+  }
+
+  if (this.distinct > this.highWaterMark) {
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+    return put(key, value);
+  }
+
+  this.table[i]=key;
+  this.values[i]=value;
+  if (this.state[i]==FREE) this.freeEntries--;
+  this.state[i]=FULL;
+  this.distinct++;
+
+  if (this.freeEntries < 1) { //delta
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+  }
 
-	return true;
+  return true;
 }
 /**
  * Rehashes the contents of the receiver into a new table
@@ -381,34 +381,34 @@
  * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
  */
 protected void rehash(int newCapacity) {
-	int oldCapacity = table.length;
-	//if (oldCapacity == newCapacity) return;
-	
-	long oldTable[] = table;
-	Object oldValues[] = values;
-	byte oldState[] = state;
-
-	long newTable[] = new long[newCapacity];
-	Object newValues[] = new Object[newCapacity];
-	byte newState[] = new byte[newCapacity];
-
-	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
-	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
-	this.table = newTable;
-	this.values = newValues;
-	this.state = newState;
-	this.freeEntries = newCapacity-this.distinct; // delta
-	
-	for (int i = oldCapacity ; i-- > 0 ;) {
-		if (oldState[i]==FULL) {
-			long element = oldTable[i];
-			int index = indexOfInsertion(element);
-			newTable[index]=element;
-			newValues[index]=oldValues[i];
-			newState[index]=FULL;
-		}
-	}
+  int oldCapacity = table.length;
+  //if (oldCapacity == newCapacity) return;
+  
+  long oldTable[] = table;
+  Object oldValues[] = values;
+  byte oldState[] = state;
+
+  long newTable[] = new long[newCapacity];
+  Object newValues[] = new Object[newCapacity];
+  byte newState[] = new byte[newCapacity];
+
+  this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+  this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+  this.table = newTable;
+  this.values = newValues;
+  this.state = newState;
+  this.freeEntries = newCapacity-this.distinct; // delta
+  
+  for (int i = oldCapacity ; i-- > 0 ;) {
+    if (oldState[i]==FULL) {
+      long element = oldTable[i];
+      int index = indexOfInsertion(element);
+      newTable[index]=element;
+      newValues[index]=oldValues[i];
+      newState[index]=FULL;
+    }
+  }
 }
 /**
  * Removes the given key with its associated element from the receiver, if present.
@@ -417,19 +417,19 @@
  * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
  */
 public boolean removeKey(long key) {
-	int i = indexOfKey(key);
-	if (i<0) return false; // key not contained
+  int i = indexOfKey(key);
+  if (i<0) return false; // key not contained
 
-	this.state[i]=REMOVED;
-	this.values[i]=null; // delta
-	this.distinct--;
-
-	if (this.distinct < this.lowWaterMark) {
-		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-	}
-	
-	return true;	
+  this.state[i]=REMOVED;
+  this.values[i]=null; // delta
+  this.distinct--;
+
+  if (this.distinct < this.lowWaterMark) {
+    int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+  }
+  
+  return true;  
 }
 /**
  * Initializes the receiver.
@@ -437,32 +437,32 @@
  * @param      initialCapacity   the initial capacity of the receiver.
  * @param      minLoadFactor        the minLoadFactor of the receiver.
  * @param      maxLoadFactor        the maxLoadFactor of the receiver.
- * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws  IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
  */
 protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
-	int capacity = initialCapacity;
-	super.setUp(capacity, minLoadFactor, maxLoadFactor);
-	capacity = nextPrime(capacity);
-	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
-	
-	this.table = new long[capacity];
-	this.values = new Object[capacity];
-	this.state = new byte[capacity];
-
-	// memory will be exhausted long before this pathological case happens, anyway.
-	this.minLoadFactor = minLoadFactor;
-	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
-	else this.maxLoadFactor = maxLoadFactor;
-
-	this.distinct = 0;
-	this.freeEntries = capacity; // delta
-	
-	// lowWaterMark will be established upon first expansion.
-	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
-	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
-	// See ensureCapacity(...)
-	this.lowWaterMark = 0; 
-	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+  int capacity = initialCapacity;
+  super.setUp(capacity, minLoadFactor, maxLoadFactor);
+  capacity = nextPrime(capacity);
+  if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+  
+  this.table = new long[capacity];
+  this.values = new Object[capacity];
+  this.state = new byte[capacity];
+
+  // memory will be exhausted long before this pathological case happens, anyway.
+  this.minLoadFactor = minLoadFactor;
+  if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+  else this.maxLoadFactor = maxLoadFactor;
+
+  this.distinct = 0;
+  this.freeEntries = capacity; // delta
+  
+  // lowWaterMark will be established upon first expansion.
+  // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+  // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+  // See ensureCapacity(...)
+  this.lowWaterMark = 0; 
+  this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
 }
 /**
  * Trims the capacity of the receiver to be the receiver's current 
@@ -470,12 +470,12 @@
  * storage of the receiver.
  */
 public void trimToSize() {
-	// * 1.2 because open addressing's performance exponentially degrades beyond that point
-	// so that even rehashing the table can take very long
-	int newCapacity = nextPrime((int)(1 + 1.2*size()));
-	if (table.length > newCapacity) {
-		rehash(newCapacity);
-	}
+  // * 1.2 because open addressing's performance exponentially degrades beyond that point
+  // so that even rehashing the table can take very long
+  int newCapacity = nextPrime((int)(1 + 1.2*size()));
+  if (table.length > newCapacity) {
+    rehash(newCapacity);
+  }
 }
 /**
  * Fills all values contained in the receiver into the specified list.
@@ -488,15 +488,15 @@
  * @param list the list to be filled, can have any size.
  */
 public void values(ObjectArrayList list) {
-	list.setSize(distinct);
-	Object[] elements = list.elements();
-	
-	Object[] val = values;
-	byte[] stat = state;
-	
-	int j=0;
-	for (int i = stat.length ; i-- > 0 ;) {
-		if (stat[i]==FULL) elements[j++]=val[i];
-	}
+  list.setSize(distinct);
+  Object[] elements = list.elements();
+  
+  Object[] val = values;
+  byte[] stat = state;
+  
+  int j=0;
+  for (int i = stat.length ; i-- > 0 ;) {
+    if (stat[i]==FULL) elements[j++]=val[i];
+  }
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java Wed Nov 25 03:35:59 2009
@@ -13,126 +13,124 @@
  * <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
  * Memory requirements: 1 KB static memory.
  *
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
  */
 /** 
  * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
  */
 @Deprecated
 public class PrimeFinder extends Object {
-	/**
-	 * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
-	 */
-	public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
+  /**
+   * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
+   */
+  public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
 
-	/**
-	 * The prime number list consists of 11 chunks.
-	 * Each chunk contains prime numbers.
-	 * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
-	 * The next element is P3, for which the same holds with respect to P2, and so on.
-	 *
-	 * Chunks are chosen such that for any desired capacity >= 1000 
-	 * the list includes a prime number <= desired capacity * 1.11 (11%).
-	 * For any desired capacity >= 200 
-	 * the list includes a prime number <= desired capacity * 1.16 (16%).
-	 * For any desired capacity >= 16
-	 * the list includes a prime number <= desired capacity * 1.21 (21%).
-	 * 
-	 * Therefore, primes can be retrieved which are quite close to any desired capacity,
-	 * which in turn avoids wasting memory.
-	 * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
-	 * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
-	 *	
-	 * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
-	 * If your hashtable has such a growthfactor then,  
-	 * after initially "rounding to a prime" upon hashtable construction, 
-	 * it will later expand to prime capacities such that there exist no better primes.
-	 *
-	 * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
-	 * If you are stingy, then delete every second or fourth chunk.
-	 */
-	
-	private static final int[] primeCapacities = {
-		//chunk #0
-		largestPrime,
-		
-		//chunk #1
-		5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
-		  411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
-		  210719881,421439783,842879579,1685759167,
-		  
-		//chunk #2
-		433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
-		  3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
-		  1854585413,
-		  
-		//chunk #3
-		953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
-		  7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
-		  2004663929,
-		  
-		//chunk #4
-		1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
-		  8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
-		  
-		//chunk #5
-		31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
-		  1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
-		  587742049,1175484103,
-		  
-		//chunk #6
-		599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
-		  4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
-		  
-		//chunk #7
-		311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
-		  2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
-		  1344393353,
-		  
-		//chunk #8
-		3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
-		  701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
-		  359339171,718678369,1437356741,
-		  
-		//chunk #9
-		43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
-		  1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
-		  759155483,1518310967,
-		  
-		//chunk #10
-		379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
-		  3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
-		  1600153859
-		/*
-		// some more chunks for the low range [3..1000]
-		//chunk #11
-		13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
-		539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
-		276172823,552345671,1104691373,
-		
-		//chunk #12
-		19,41,83,167,337,677,
-		//1361,2729,5471,10949,21911,43853,87719,175447,350899,
-		//701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
-		//359339171,718678369,1437356741,
-		
-		//chunk #13
-		53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
-		234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
-		60045577,120091177,240182359,480364727,960729461,1921458943
-		*/
-		};
-		
+  /**
+   * The prime number list consists of 11 chunks.
+   * Each chunk contains prime numbers.
+   * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
+   * The next element is P3, for which the same holds with respect to P2, and so on.
+   *
+   * Chunks are chosen such that for any desired capacity >= 1000 
+   * the list includes a prime number <= desired capacity * 1.11 (11%).
+   * For any desired capacity >= 200 
+   * the list includes a prime number <= desired capacity * 1.16 (16%).
+   * For any desired capacity >= 16
+   * the list includes a prime number <= desired capacity * 1.21 (21%).
+   * 
+   * Therefore, primes can be retrieved which are quite close to any desired capacity,
+   * which in turn avoids wasting memory.
+   * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
+   * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
+   *  
+   * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
+   * If your hashtable has such a growthfactor then,  
+   * after initially "rounding to a prime" upon hashtable construction, 
+   * it will later expand to prime capacities such that there exist no better primes.
+   *
+   * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
+   * If you are stingy, then delete every second or fourth chunk.
+   */
+  
+  private static final int[] primeCapacities = {
+    //chunk #0
+    largestPrime,
+    
+    //chunk #1
+    5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
+      411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
+      210719881,421439783,842879579,1685759167,
+      
+    //chunk #2
+    433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
+      3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
+      1854585413,
+      
+    //chunk #3
+    953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
+      7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
+      2004663929,
+      
+    //chunk #4
+    1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
+      8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
+      
+    //chunk #5
+    31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
+      1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
+      587742049,1175484103,
+      
+    //chunk #6
+    599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
+      4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
+      
+    //chunk #7
+    311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
+      2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
+      1344393353,
+      
+    //chunk #8
+    3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
+      701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+      359339171,718678369,1437356741,
+      
+    //chunk #9
+    43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
+      1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
+      759155483,1518310967,
+      
+    //chunk #10
+    379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
+      3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
+      1600153859
+    /*
+    // some more chunks for the low range [3..1000]
+    //chunk #11
+    13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
+    539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
+    276172823,552345671,1104691373,
+    
+    //chunk #12
+    19,41,83,167,337,677,
+    //1361,2729,5471,10949,21911,43853,87719,175447,350899,
+    //701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+    //359339171,718678369,1437356741,
+    
+    //chunk #13
+    53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
+    234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
+    60045577,120091177,240182359,480364727,960729461,1921458943
+    */
+    };
+    
 
-	static { //initializer
-		// The above prime numbers are formatted for human readability.
-		// To find numbers fast, we sort them once and for all.
-		
-		java.util.Arrays.sort(primeCapacities);
-		//new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
-	}
-	
+  static { //initializer
+    // The above prime numbers are formatted for human readability.
+    // To find numbers fast, we sort them once and for all.
+    
+    java.util.Arrays.sort(primeCapacities);
+    //new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
+  }
+  
 /**
  * Makes this class non instantiable, but still let's others inherit from it.
  */
@@ -145,10 +143,10 @@
  * from=1000, to=Integer.MAX_VALUE
  */
 protected static void main(String args[]) {
-	int from = Integer.parseInt(args[0]);	
-	int to = Integer.parseInt(args[1]);
-	
-	statistics(from,to);
+  int from = Integer.parseInt(args[0]);  
+  int to = Integer.parseInt(args[1]);
+  
+  statistics(from,to);
 }
 /**
  * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code> (within 11% if <code>desiredCapacity &gt;= 1000</code>).
@@ -156,43 +154,43 @@
  * @return the capacity which should be used for a hashtable.
  */
 public static int nextPrime(int desiredCapacity) {
-	int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
-	//int i = new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
-	if (i<0) {
-		// desired capacity not found, choose next prime greater than desired capacity
-		i = -i -1; // remember the semantics of binarySearch...
-	}
-	return primeCapacities[i];
+  int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
+  //int i = new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
+  if (i<0) {
+    // desired capacity not found, choose next prime greater than desired capacity
+    i = -i -1; // remember the semantics of binarySearch...
+  }
+  return primeCapacities[i];
 }
 /**
  * Tests correctness.
  */
 protected static void statistics(int from, int to) {
-	// check that primes contain no accidental errors
-	for (int i=0; i<primeCapacities.length-1; i++) {
-		if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
-	}
-	
-	double accDeviation = 0.0;
-	double maxDeviation = - 1.0;
+  // check that primes contain no accidental errors
+  for (int i=0; i<primeCapacities.length-1; i++) {
+    if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
+  }
+  
+  double accDeviation = 0.0;
+  double maxDeviation = - 1.0;
 
-	for (int i=from; i<=to; i++) {
-		int primeCapacity = nextPrime(i);
-		//System.out.println(primeCapacity);
-		double deviation = (primeCapacity - i) / (double)i;
-		
-		if (deviation > maxDeviation) {
-			maxDeviation = deviation;
-			System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
-		}
+  for (int i=from; i<=to; i++) {
+    int primeCapacity = nextPrime(i);
+    //System.out.println(primeCapacity);
+    double deviation = (primeCapacity - i) / (double)i;
+    
+    if (deviation > maxDeviation) {
+      maxDeviation = deviation;
+      System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
+    }
 
-		accDeviation += deviation;
-	}
-	long width = 1 + (long)to - (long)from;
-	
-	double meanDeviation = accDeviation/width;
-	System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
-	System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
-	System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
+    accDeviation += deviation;
+  }
+  long width = 1 + (long)to - (long)from;
+  
+  double meanDeviation = accDeviation/width;
+  System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
+  System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
+  System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java Wed Nov 25 03:35:59 2009
@@ -19,15 +19,15 @@
  
 @author wolfgang.hoschek@cern.ch
 @version 1.0, 09/24/99
-@see	    java.util.HashMap
+@see      java.util.HashMap
 */
 class QuickOpenIntIntHashMap extends OpenIntIntHashMap {
-	public int totalProbesSaved = 0; // benchmark only
+  public int totalProbesSaved = 0; // benchmark only
 /**
  * Constructs an empty map with default capacity and default load factors.
  */
 public QuickOpenIntIntHashMap() {
-	this(defaultCapacity);
+  this(defaultCapacity);
 }
 /**
  * Constructs an empty map with the specified initial capacity and default load factors.
@@ -37,7 +37,7 @@
  *             than zero.
  */
 public QuickOpenIntIntHashMap(int initialCapacity) {
-	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+  this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
 }
 /**
  * Constructs an empty map with
@@ -46,10 +46,10 @@
  * @param      initialCapacity   the initial capacity.
  * @param      minLoadFactor        the minimum load factor.
  * @param      maxLoadFactor        the maximum load factor.
- * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws  IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
  */
 public QuickOpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
-	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+  setUp(initialCapacity,minLoadFactor,maxLoadFactor);
 }
 /**
  * Associates the given key with the given value.
@@ -61,109 +61,109 @@
  *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
  */
 public boolean put(int key, int value) {
-	/* 
-	   This is open addressing with double hashing, using "Brent's variation".
-	   Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
-	   (It does not improve unsuccessful searches.)
-	   See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
-	
-	   h1(key) = hash % M
-	   h2(key) = decrement = Max(1, hash/M % M)
-	   M is prime = capacity = table.length
-	   probing positions are table[(h1-j*h2) % M] for j=0,1,...
-	   (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
-	*/
-	
-	int key0;
-	final int tab[] = table;
-	final byte stat[] = state;
-	final int length = tab.length;
-
-	int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
-	int i = hash % length;
-	int decrement = (hash / length) % length;
-	if (decrement == 0) decrement = 1;
-	//System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
-
-	// stop if we find a removed or free slot, or if we find the key itself
-	// do NOT skip over removed slots (yes, open addressing is like that...)
-	//int comp = comparisons;
-	int t = 0;  // the number of probes
-	int p0 = i; // the first position to probe
-	while (stat[i] == FULL && tab[i] != key) {
-		t++;
-		i -= decrement;
-		//hashCollisions++;
-		if (i<0) i+=length;		
-	}
-	//if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
-	if (stat[i] == FULL) {
-		// key already contained at slot i.
-		this.values[i]=value;
-		return false;
-	}
-	// not already contained, should be inserted at slot i.
-	
-	if (this.distinct > this.highWaterMark) {
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		
-		//System.out.print("grow rehashing ");
-		//System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
-		
-		rehash(newCapacity);
-		return put(key, value);
-	}
-
-	/*
-	Brent's variation does a local reorganization to reduce probes. It essentially means:
-	We test whether it is possible to move the association we probed first (table[p0]) out of the way.
-	If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
-	However, future probes for the key that we move out of the way will increase.
-	Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
-	For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
-	If the first probe cannot be moved out of the way, we try the next probe (p1).
-	Now we safe more than we loose if t>=3.
-	We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
-
-	Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
-	*/
-	while (t>1) {
-		//System.out.println("t="+t);
-		key0 = tab[p0];
-		hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
-		decrement = (hash / length) % length;
-		if (decrement == 0) decrement = 1;
-		int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
-		if (pc<0) pc += length;
-		
-		if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
-			p0 = pc;
-			t--;
-		}
-		else { // free or removed slot found, now move...
-			//System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
-			this.totalProbesSaved += (t-1);
-			tab[pc] = key0;
-			stat[pc] = FULL;
-			values[pc] = values[p0];
-			i = p0; // prepare to insert: table[p0]=key
-			t = 0; // break loop 
-		}
-	}
-
-	//System.out.println("inserting at i="+i);
-	this.table[i]=key;
-	this.values[i]=value;
-	if (this.state[i]==FREE) this.freeEntries--;
-	this.state[i]=FULL;
-	this.distinct++;
-
-	if (this.freeEntries < 1) { //delta
-		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-		rehash(newCapacity);
-	}
+  /* 
+     This is open addressing with double hashing, using "Brent's variation".
+     Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+     (It does not improve unsuccessful searches.)
+     See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+  
+     h1(key) = hash % M
+     h2(key) = decrement = Max(1, hash/M % M)
+     M is prime = capacity = table.length
+     probing positions are table[(h1-j*h2) % M] for j=0,1,...
+     (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
+  */
+  
+  int key0;
+  final int tab[] = table;
+  final byte stat[] = state;
+  final int length = tab.length;
+
+  int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+  int i = hash % length;
+  int decrement = (hash / length) % length;
+  if (decrement == 0) decrement = 1;
+  //System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
+
+  // stop if we find a removed or free slot, or if we find the key itself
+  // do NOT skip over removed slots (yes, open addressing is like that...)
+  //int comp = comparisons;
+  int t = 0;  // the number of probes
+  int p0 = i; // the first position to probe
+  while (stat[i] == FULL && tab[i] != key) {
+    t++;
+    i -= decrement;
+    //hashCollisions++;
+    if (i<0) i+=length;    
+  }
+  //if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
+  if (stat[i] == FULL) {
+    // key already contained at slot i.
+    this.values[i]=value;
+    return false;
+  }
+  // not already contained, should be inserted at slot i.
+  
+  if (this.distinct > this.highWaterMark) {
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    
+    //System.out.print("grow rehashing ");
+    //System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+    
+    rehash(newCapacity);
+    return put(key, value);
+  }
+
+  /*
+  Brent's variation does a local reorganization to reduce probes. It essentially means:
+  We test whether it is possible to move the association we probed first (table[p0]) out of the way.
+  If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
+  However, future probes for the key that we move out of the way will increase.
+  Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
+  For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
+  If the first probe cannot be moved out of the way, we try the next probe (p1).
+  Now we safe more than we loose if t>=3.
+  We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
+
+  Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
+  */
+  while (t>1) {
+    //System.out.println("t="+t);
+    key0 = tab[p0];
+    hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
+    decrement = (hash / length) % length;
+    if (decrement == 0) decrement = 1;
+    int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
+    if (pc<0) pc += length;
+    
+    if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
+      p0 = pc;
+      t--;
+    }
+    else { // free or removed slot found, now move...
+      //System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
+      this.totalProbesSaved += (t-1);
+      tab[pc] = key0;
+      stat[pc] = FULL;
+      values[pc] = values[p0];
+      i = p0; // prepare to insert: table[p0]=key
+      t = 0; // break loop 
+    }
+  }
+
+  //System.out.println("inserting at i="+i);
+  this.table[i]=key;
+  this.values[i]=value;
+  if (this.state[i]==FREE) this.freeEntries--;
+  this.state[i]=FULL;
+  this.distinct++;
+
+  if (this.freeEntries < 1) { //delta
+    int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+    rehash(newCapacity);
+  }
 
-	return true;
+  return true;
 }
 /**
  * Rehashes the contents of the receiver into a new table
@@ -172,39 +172,39 @@
  * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
  */
 protected void rehash(int newCapacity) {
-	int oldCapacity = table.length;
-	//if (oldCapacity == newCapacity) return;
-	
-	int oldTable[] = table;
-	int oldValues[] = values;
-	byte oldState[] = state;
-
-	int newTable[] = new int[newCapacity];
-	int newValues[] = new int[newCapacity];
-	byte newState[] = new byte[newCapacity];
-
-	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
-	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
-	this.table = newTable;
-	this.values = newValues;
-	this.state = newState;
-	this.freeEntries = newCapacity-this.distinct; // delta
-
-	int tmp = this.distinct;
-	this.distinct = Integer.MIN_VALUE; // switch of watermarks
-	for (int i = oldCapacity ; i-- > 0 ;) {
-		if (oldState[i]==FULL) {
-			put(oldTable[i], oldValues[i]);
-			/*
-			int element = oldTable[i];
-			int index = indexOfInsertion(element);
-			newTable[index]=element;
-			newValues[index]=oldValues[i];
-			newState[index]=FULL;
-			*/
-		}
-	}
-	this.distinct = tmp;
+  int oldCapacity = table.length;
+  //if (oldCapacity == newCapacity) return;
+  
+  int oldTable[] = table;
+  int oldValues[] = values;
+  byte oldState[] = state;
+
+  int newTable[] = new int[newCapacity];
+  int newValues[] = new int[newCapacity];
+  byte newState[] = new byte[newCapacity];
+
+  this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+  this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+  this.table = newTable;
+  this.values = newValues;
+  this.state = newState;
+  this.freeEntries = newCapacity-this.distinct; // delta
+
+  int tmp = this.distinct;
+  this.distinct = Integer.MIN_VALUE; // switch of watermarks
+  for (int i = oldCapacity ; i-- > 0 ;) {
+    if (oldState[i]==FULL) {
+      put(oldTable[i], oldValues[i]);
+      /*
+      int element = oldTable[i];
+      int index = indexOfInsertion(element);
+      newTable[index]=element;
+      newValues[index]=oldValues[i];
+      newState[index]=FULL;
+      */
+    }
+  }
+  this.distinct = tmp;
 }
 }

Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html Wed Nov 25 03:35:59 2009
@@ -88,17 +88,17 @@
   <li><b>Open</b> uses extendible open addressing with double hashing.
 </ul>
 <p>Class naming follows the schema <tt>&lt;Implementation&gt;&lt;KeyType&gt;&lt;ValueType&gt;HashMap</tt>.
-  For example, a {@link cern.colt.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
-  associations and is implemented with open addressing. A {@link cern.colt.map.OpenIntObjectHashMap}
+  For example, a {@link org.apache.mahout.matrix.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
+  associations and is implemented with open addressing. A {@link org.apache.mahout.matrix.map.OpenIntObjectHashMap}
   holds <tt>(int-->Object)</tt> associations and is implemented with open addressing.
 </p>
 
 <p>The classes for maps of a given (key,value) type are derived from a common
   abstract base class tagged <tt>Abstract&lt;KeyType&gt;&lt;ValueType&gt;</tt><tt>Map</tt>.
   For example, all maps operating on <tt>(int-->double)</tt> associations are
-  derived from {@link cern.colt.map.AbstractIntDoubleMap}, which in turn is derived
+  derived from {@link org.apache.mahout.matrix.map.AbstractIntDoubleMap}, which in turn is derived
   from an abstract base class tying together all maps regardless of assocation
-  type, {@link cern.colt.map.AbstractMap}. The abstract base classes provide skeleton
+  type, {@link org.apache.mahout.matrix.map.AbstractMap}. The abstract base classes provide skeleton
   implementations for all but few methods. Experimental layouts (such as chaining,
   open addressing, extensible hashing, red-black-trees, etc.) can easily be implemented
   and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree