You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/11/23 16:14:38 UTC

svn commit: r883365 [25/47] - in /lucene/mahout/trunk: ./ examples/ matrix/ matrix/src/ matrix/src/main/ matrix/src/main/java/ matrix/src/main/java/org/ matrix/src/main/java/org/apache/ matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,512 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.IntIntProcedure;
+import org.apache.mahout.colt.function.IntProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(int-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+ 
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenIntIntHashMap extends AbstractIntIntMap {
+	 /**
+	 * The hash table keys.
+	 * @serial
+	 */
+	protected int table[];
+
+	 /**
+	 * The hash table values.
+	 * @serial
+	 */
+	protected int values[];
+
+	/**
+	 * The state of each hash table entry (FREE, FULL, REMOVED).
+	 * @serial
+	 */
+	protected byte state[];
+
+	/**
+	 * The number of table entries in state==FREE.
+	 * @serial
+	 */
+	protected int freeEntries;
+
+	
+	protected static final byte FREE = 0;
+	protected static final byte FULL = 1;
+	protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenIntIntHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public OpenIntIntHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+   	//new IntArrayList(values).fillFromToWith(0, state.length-1, 0); // delta
+
+	this.distinct = 0;
+	this.freeEntries = table.length; // delta
+	trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	OpenIntIntHashMap copy = (OpenIntIntHashMap) super.clone();
+	copy.table = (int[]) copy.table.clone();
+	copy.values = (int[]) copy.values.clone();
+	copy.state = (byte[]) copy.state.clone();
+	return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(int key) {
+	return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(int value) {
+	return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	if (table.length < minCapacity) {
+		int newCapacity = nextPrime(minCapacity);
+		rehash(newCapacity);
+	}
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachKey(IntProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+*
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final IntIntProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+	}
+	return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(int)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
+ */
+public int get(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return 0; //not contained
+	return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(int key) {
+	//System.out.println("key="+key);
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] == FULL && tab[i] != key) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == REMOVED) {
+		// stop if we find a free slot, or if we find the key itself.
+		// do skip over removed slots (yes, open addressing is like that...)
+		// assertion: there is at least one FREE slot.
+		int j = i;
+		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+			i -= decrement;
+			//hashCollisions++;
+			if (i<0) i+=length;
+		}
+		if (stat[i] == FREE) i = j;
+	}
+	
+	
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		// return a negative number identifying the slot.
+		return -i-1;
+	}
+	// not already contained, should be inserted at slot i.
+	// return a number >= 0 identifying the slot.
+	return i; 
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(int key) {
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a free slot, or if we find the key itself.
+	// do skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == FREE) return -1; // not found
+	return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(int value) {
+	final int val[] = values;
+	final byte stat[] = state;
+
+	for (int i=stat.length; --i >= 0;) {
+		if (stat[i]==FULL && val[i]==value) return i;
+	}
+
+	return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(int)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ */
+public int keyOf(int value) {
+	//returns the first key found; there may be more matching keys, however.
+	int i = indexOfValue(value);
+	if (i<0) return Integer.MIN_VALUE;
+	return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(IntArrayList list) {
+	list.setSize(distinct);
+	int[] elements = list.elements();
+	
+	int[] tab = table;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = tab.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=tab[i];
+	}
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+IntIntProcedure condition = new IntIntProcedure() { // match even keys only
+	public boolean apply(int key, int value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final IntIntProcedure condition, final IntArrayList keyList, final IntArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL && condition.apply(table[i],values[i])) {
+			keyList.add(table[i]);
+			valueList.add(values[i]);
+		}
+	}
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, int value) {
+	int i = indexOfInsertion(key);	
+	if (i<0) { //already contained
+		i = -i -1;
+		this.values[i]=value;
+		return false;
+	}
+
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		
+		//System.out.print("grow rehashing ");
+		//System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+	
+	int oldTable[] = table;
+	int oldValues[] = values;
+	byte oldState[] = state;
+
+	int newTable[] = new int[newCapacity];
+	int newValues[] = new int[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+	
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {
+			int element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+		}
+	}
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return false; // key not contained
+
+	this.state[i]=REMOVED;
+	//this.values[i]=0; // delta
+	this.distinct--;
+
+	if (this.distinct < this.lowWaterMark) {
+		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+		/*
+		if (table.length != newCapacity) {
+			System.out.print("shrink rehashing ");
+			System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		}
+		*/
+		rehash(newCapacity);
+	}
+	
+	return true;	
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor        the minLoadFactor of the receiver.
+ * @param      maxLoadFactor        the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	int capacity = initialCapacity;
+	super.setUp(capacity, minLoadFactor, maxLoadFactor);
+	capacity = nextPrime(capacity);
+	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+	
+	this.table = new int[capacity];
+	this.values = new int[capacity];
+	this.state = new byte[capacity];
+
+	// memory will be exhausted long before this pathological case happens, anyway.
+	this.minLoadFactor = minLoadFactor;
+	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+	else this.maxLoadFactor = maxLoadFactor;
+
+	this.distinct = 0;
+	this.freeEntries = capacity; // delta
+	
+	// lowWaterMark will be established upon first expansion.
+	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+	// See ensureCapacity(...)
+	this.lowWaterMark = 0; 
+	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	// * 1.2 because open addressing's performance exponentially degrades beyond that point
+	// so that even rehashing the table can take very long
+	int newCapacity = nextPrime((int)(1 + 1.2*size()));
+	if (table.length > newCapacity) {
+		rehash(newCapacity);
+	}
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(IntArrayList list) {
+	list.setSize(distinct);
+	int[] elements = list.elements();
+	
+	int[] val = values;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = stat.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=val[i];
+	}
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,502 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.IntObjectProcedure;
+import org.apache.mahout.colt.function.IntProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+import org.apache.mahout.colt.list.ObjectArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(int-->Object)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenIntObjectHashMap extends AbstractIntObjectMap {
+	 /**
+	 * The hash table keys.
+	 * @serial
+	 */
+	protected int table[];
+
+	 /**
+	 * The hash table values.
+	 * @serial
+	 */
+	protected Object values[];
+
+	/**
+	 * The state of each hash table entry (FREE, FULL, REMOVED).
+	 * @serial
+	 */
+	protected byte state[];
+	
+	/**
+	 * The number of table entries in state==FREE.
+	 * @serial
+	 */
+	protected int freeEntries;
+
+	
+	protected static final byte FREE = 0;
+	protected static final byte FULL = 1;
+	protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenIntObjectHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public OpenIntObjectHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenIntObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+   	new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+
+	this.distinct = 0;
+	this.freeEntries = table.length; // delta
+	trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
+	copy.table = (int[]) copy.table.clone();
+	copy.values = (Object[]) copy.values.clone();
+	copy.state = (byte[]) copy.state.clone();
+	return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(int key) {
+	return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(Object value) {
+	return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	if (table.length < minCapacity) {
+		int newCapacity = nextPrime(minCapacity);
+		rehash(newCapacity);
+	}
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachKey(IntProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final IntObjectProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+	}
+	return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(int)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
+ */
+public Object get(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return null; //not contained
+	return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(int key) {
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] == FULL && tab[i] != key) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == REMOVED) {
+		// stop if we find a free slot, or if we find the key itself.
+		// do skip over removed slots (yes, open addressing is like that...)
+		// assertion: there is at least one FREE slot.
+		int j = i;
+		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+			i -= decrement;
+			//hashCollisions++;
+			if (i<0) i+=length;
+		}
+		if (stat[i] == FREE) i = j;
+	}
+	
+	
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		// return a negative number identifying the slot.
+		return -i-1;
+	}
+	// not already contained, should be inserted at slot i.
+	// return a number >= 0 identifying the slot.
+	return i; 
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(int key) {
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a free slot, or if we find the key itself.
+	// do skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == FREE) return -1; // not found
+	return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(Object value) {
+	final Object val[] = values;
+	final byte stat[] = state;
+
+	for (int i=stat.length; --i >= 0;) {
+		if (stat[i]==FULL && val[i]==value) return i;
+	}
+
+	return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(Object)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ */
+public int keyOf(Object value) {
+	//returns the first key found; there may be more matching keys, however.
+	int i = indexOfValue(value);
+	if (i<0) return Integer.MIN_VALUE;
+	return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(IntArrayList list) {
+	list.setSize(distinct);
+	int[] elements = list.elements();
+	
+	int[] tab = table;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = tab.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=tab[i];
+	}
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+IntObjectProcedure condition = new IntObjectProcedure() { // match even keys only
+	public boolean apply(int key, Object value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final IntObjectProcedure condition, final IntArrayList keyList, final ObjectArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL && condition.apply(table[i],values[i])) {
+			keyList.add(table[i]);
+			valueList.add(values[i]);
+		}
+	}
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, Object value) {
+	int i = indexOfInsertion(key);	
+	if (i<0) { //already contained
+		i = -i -1;
+		this.values[i]=value;
+		return false;
+	}
+
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+	
+	int oldTable[] = table;
+	Object oldValues[] = values;
+	byte oldState[] = state;
+
+	int newTable[] = new int[newCapacity];
+	Object newValues[] = new Object[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+	
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {
+			int element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+		}
+	}
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return false; // key not contained
+
+	this.state[i]=REMOVED;
+	this.values[i]=null; // delta
+	this.distinct--;
+
+	if (this.distinct < this.lowWaterMark) {
+		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+	
+	return true;	
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor        the minLoadFactor of the receiver.
+ * @param      maxLoadFactor        the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	int capacity = initialCapacity;
+	super.setUp(capacity, minLoadFactor, maxLoadFactor);
+	capacity = nextPrime(capacity);
+	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+	
+	this.table = new int[capacity];
+	this.values = new Object[capacity];
+	this.state = new byte[capacity];
+
+	// memory will be exhausted long before this pathological case happens, anyway.
+	this.minLoadFactor = minLoadFactor;
+	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+	else this.maxLoadFactor = maxLoadFactor;
+
+	this.distinct = 0;
+	this.freeEntries = capacity; // delta
+	
+	// lowWaterMark will be established upon first expansion.
+	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+	// See ensureCapacity(...)
+	this.lowWaterMark = 0; 
+	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	// * 1.2 because open addressing's performance exponentially degrades beyond that point
+	// so that even rehashing the table can take very long
+	int newCapacity = nextPrime((int)(1 + 1.2*size()));
+	if (table.length > newCapacity) {
+		rehash(newCapacity);
+	}
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(ObjectArrayList list) {
+	list.setSize(distinct);
+	Object[] elements = list.elements();
+	
+	Object[] val = values;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = stat.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=val[i];
+	}
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,502 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.LongObjectProcedure;
+import org.apache.mahout.colt.function.LongProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.LongArrayList;
+import org.apache.mahout.colt.list.ObjectArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(long-->Object)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenLongObjectHashMap extends AbstractLongObjectMap {
+	 /**
+	 * The hash table keys.
+	 * @serial
+	 */
+	protected long table[];
+
+	 /**
+	 * The hash table values.
+	 * @serial
+	 */
+	protected Object values[];
+
+	/**
+	 * The state of each hash table entry (FREE, FULL, REMOVED).
+	 * @serial
+	 */
+	protected byte state[];
+
+	/**
+	 * The number of table entries in state==FREE.
+	 * @serial
+	 */
+	protected int freeEntries;
+	
+	
+	protected static final byte FREE = 0;
+	protected static final byte FULL = 1;
+	protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenLongObjectHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public OpenLongObjectHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenLongObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+   	new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+
+	this.distinct = 0;
+	this.freeEntries = table.length; // delta
+	trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
+	copy.table = (long[]) copy.table.clone();
+	copy.values = (Object[]) copy.values.clone();
+	copy.state = (byte[]) copy.state.clone();
+	return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(long key) {
+	return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(Object value) {
+	return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	if (table.length < minCapacity) {
+		int newCapacity = nextPrime(minCapacity);
+		rehash(newCapacity);
+	}
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachKey(LongProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final LongObjectProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+	}
+	return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(long)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
+ */
+public Object get(long key) {
+	int i = indexOfKey(key);
+	if (i<0) return null; //not contained
+	return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(long key) {
+	final long tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] == FULL && tab[i] != key) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == REMOVED) {
+		// stop if we find a free slot, or if we find the key itself.
+		// do skip over removed slots (yes, open addressing is like that...)
+		// assertion: there is at least one FREE slot.
+		int j = i;
+		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+			i -= decrement;
+			//hashCollisions++;
+			if (i<0) i+=length;
+		}
+		if (stat[i] == FREE) i = j;
+	}
+	
+	
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		// return a negative number identifying the slot.
+		return -i-1;
+	}
+	// not already contained, should be inserted at slot i.
+	// return a number >= 0 identifying the slot.
+	return i; 
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(long key) {
+	final long tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a free slot, or if we find the key itself.
+	// do skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == FREE) return -1; // not found
+	return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(Object value) {
+	final Object val[] = values;
+	final byte stat[] = state;
+
+	for (int i=stat.length; --i >= 0;) {
+		if (stat[i]==FULL && val[i]==value) return i;
+	}
+
+	return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(Object)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Long.MIN_VALUE</tt> if no such key exists.
+ */
+public long keyOf(Object value) {
+	//returns the first key found; there may be more matching keys, however.
+	int i = indexOfValue(value);
+	if (i<0) return Long.MIN_VALUE;
+	return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(LongArrayList list) {
+	list.setSize(distinct);
+	long[] elements = list.elements();
+	
+	long[] tab = table;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = tab.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=tab[i];
+	}
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+LongObjectProcedure condition = new LongObjectProcedure() { // match even keys only
+	public boolean apply(long key, Object value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final LongObjectProcedure condition, final LongArrayList keyList, final ObjectArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL && condition.apply(table[i],values[i])) {
+			keyList.add(table[i]);
+			valueList.add(values[i]);
+		}
+	}
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(long key, Object value) {
+	int i = indexOfInsertion(key);	
+	if (i<0) { //already contained
+		i = -i -1;
+		this.values[i]=value;
+		return false;
+	}
+
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+	
+	long oldTable[] = table;
+	Object oldValues[] = values;
+	byte oldState[] = state;
+
+	long newTable[] = new long[newCapacity];
+	Object newValues[] = new Object[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+	
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {
+			long element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+		}
+	}
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(long key) {
+	int i = indexOfKey(key);
+	if (i<0) return false; // key not contained
+
+	this.state[i]=REMOVED;
+	this.values[i]=null; // delta
+	this.distinct--;
+
+	if (this.distinct < this.lowWaterMark) {
+		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+	
+	return true;	
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor        the minLoadFactor of the receiver.
+ * @param      maxLoadFactor        the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	int capacity = initialCapacity;
+	super.setUp(capacity, minLoadFactor, maxLoadFactor);
+	capacity = nextPrime(capacity);
+	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+	
+	this.table = new long[capacity];
+	this.values = new Object[capacity];
+	this.state = new byte[capacity];
+
+	// memory will be exhausted long before this pathological case happens, anyway.
+	this.minLoadFactor = minLoadFactor;
+	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+	else this.maxLoadFactor = maxLoadFactor;
+
+	this.distinct = 0;
+	this.freeEntries = capacity; // delta
+	
+	// lowWaterMark will be established upon first expansion.
+	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+	// See ensureCapacity(...)
+	this.lowWaterMark = 0; 
+	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	// * 1.2 because open addressing's performance exponentially degrades beyond that point
+	// so that even rehashing the table can take very long
+	int newCapacity = nextPrime((int)(1 + 1.2*size()));
+	if (table.length > newCapacity) {
+		rehash(newCapacity);
+	}
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(ObjectArrayList list) {
+	list.setSize(distinct);
+	Object[] elements = list.elements();
+	
+	Object[] val = values;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = stat.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=val[i];
+	}
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,198 @@
+package org.apache.mahout.colt.map;
+
+/**
+ * Not of interest for users; only for implementors of hashtables.
+ * Used to keep hash table capacities prime numbers.
+ *
+ * <p>Choosing prime numbers as hash table capacities is a good idea to keep them working fast,
+ * particularly under hash table expansions.
+ *
+ * <p>However, JDK 1.2, JGL 3.1 and many other toolkits do nothing to keep capacities prime.
+ * This class provides efficient means to choose prime capacities.
+ *
+ * <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
+ * Memory requirements: 1 KB static memory.
+ *
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class PrimeFinder extends Object {
+	/**
+	 * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
+	 */
+	public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
+
+	/**
+	 * The prime number list consists of 11 chunks.
+	 * Each chunk contains prime numbers.
+	 * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
+	 * The next element is P3, for which the same holds with respect to P2, and so on.
+	 *
+	 * Chunks are chosen such that for any desired capacity >= 1000 
+	 * the list includes a prime number <= desired capacity * 1.11 (11%).
+	 * For any desired capacity >= 200 
+	 * the list includes a prime number <= desired capacity * 1.16 (16%).
+	 * For any desired capacity >= 16
+	 * the list includes a prime number <= desired capacity * 1.21 (21%).
+	 * 
+	 * Therefore, primes can be retrieved which are quite close to any desired capacity,
+	 * which in turn avoids wasting memory.
+	 * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
+	 * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
+	 *	
+	 * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
+	 * If your hashtable has such a growthfactor then,  
+	 * after initially "rounding to a prime" upon hashtable construction, 
+	 * it will later expand to prime capacities such that there exist no better primes.
+	 *
+	 * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
+	 * If you are stingy, then delete every second or fourth chunk.
+	 */
+	
+	private static final int[] primeCapacities = {
+		//chunk #0
+		largestPrime,
+		
+		//chunk #1
+		5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
+		  411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
+		  210719881,421439783,842879579,1685759167,
+		  
+		//chunk #2
+		433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
+		  3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
+		  1854585413,
+		  
+		//chunk #3
+		953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
+		  7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
+		  2004663929,
+		  
+		//chunk #4
+		1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
+		  8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
+		  
+		//chunk #5
+		31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
+		  1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
+		  587742049,1175484103,
+		  
+		//chunk #6
+		599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
+		  4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
+		  
+		//chunk #7
+		311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
+		  2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
+		  1344393353,
+		  
+		//chunk #8
+		3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
+		  701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+		  359339171,718678369,1437356741,
+		  
+		//chunk #9
+		43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
+		  1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
+		  759155483,1518310967,
+		  
+		//chunk #10
+		379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
+		  3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
+		  1600153859
+		/*
+		// some more chunks for the low range [3..1000]
+		//chunk #11
+		13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
+		539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
+		276172823,552345671,1104691373,
+		
+		//chunk #12
+		19,41,83,167,337,677,
+		//1361,2729,5471,10949,21911,43853,87719,175447,350899,
+		//701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+		//359339171,718678369,1437356741,
+		
+		//chunk #13
+		53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
+		234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
+		60045577,120091177,240182359,480364727,960729461,1921458943
+		*/
+		};
+		
+
+	static { //initializer
+		// The above prime numbers are formatted for human readability.
+		// To find numbers fast, we sort them once and for all.
+		
+		java.util.Arrays.sort(primeCapacities);
+		//new org.apache.mahout.colt.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
+	}
+	
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected PrimeFinder() {}
+/**
+ * Tests correctness. Try 
+ * from=1000, to=10000
+ * from=200,  to=1000
+ * from=16,   to=1000
+ * from=1000, to=Integer.MAX_VALUE
+ */
+protected static void main(String args[]) {
+	int from = Integer.parseInt(args[0]);	
+	int to = Integer.parseInt(args[1]);
+	
+	statistics(from,to);
+}
+/**
+ * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code> (within 11% if <code>desiredCapacity &gt;= 1000</code>).
+ * @param desiredCapacity the capacity desired by the user.
+ * @return the capacity which should be used for a hashtable.
+ */
+public static int nextPrime(int desiredCapacity) {
+	int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
+	//int i = new org.apache.mahout.colt.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
+	if (i<0) {
+		// desired capacity not found, choose next prime greater than desired capacity
+		i = -i -1; // remember the semantics of binarySearch...
+	}
+	return primeCapacities[i];
+}
+/**
+ * Tests correctness.
+ */
+protected static void statistics(int from, int to) {
+	// check that primes contain no accidental errors
+	for (int i=0; i<primeCapacities.length-1; i++) {
+		if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
+	}
+	
+	double accDeviation = 0.0;
+	double maxDeviation = - 1.0;
+
+	for (int i=from; i<=to; i++) {
+		int primeCapacity = nextPrime(i);
+		//System.out.println(primeCapacity);
+		double deviation = (primeCapacity - i) / (double)i;
+		
+		if (deviation > maxDeviation) {
+			maxDeviation = deviation;
+			System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
+		}
+
+		accDeviation += deviation;
+	}
+	long width = 1 + (long)to - (long)from;
+	
+	double meanDeviation = accDeviation/width;
+	System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
+	System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
+	System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,210 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+/**
+Status: Experimental; Do not use for production yet. Hash map holding (key,value) associations of type <tt>(int-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Implements open addressing with double hashing, using "Brent's variation".
+Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+(It does not improve unsuccessful searches.)
+See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+ 
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+class QuickOpenIntIntHashMap extends OpenIntIntHashMap {
+	public int totalProbesSaved = 0; // benchmark only
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public QuickOpenIntIntHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public QuickOpenIntIntHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public QuickOpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, int value) {
+	/* 
+	   This is open addressing with double hashing, using "Brent's variation".
+	   Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+	   (It does not improve unsuccessful searches.)
+	   See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+	
+	   h1(key) = hash % M
+	   h2(key) = decrement = Max(1, hash/M % M)
+	   M is prime = capacity = table.length
+	   probing positions are table[(h1-j*h2) % M] for j=0,1,...
+	   (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
+	*/
+	
+	int key0;
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+	//System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	//int comp = comparisons;
+	int t = 0;  // the number of probes
+	int p0 = i; // the first position to probe
+	while (stat[i] == FULL && tab[i] != key) {
+		t++;
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;		
+	}
+	//if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		this.values[i]=value;
+		return false;
+	}
+	// not already contained, should be inserted at slot i.
+	
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		
+		//System.out.print("grow rehashing ");
+		//System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	/*
+	Brent's variation does a local reorganization to reduce probes. It essentially means:
+	We test whether it is possible to move the association we probed first (table[p0]) out of the way.
+	If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
+	However, future probes for the key that we move out of the way will increase.
+	Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
+	For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
+	If the first probe cannot be moved out of the way, we try the next probe (p1).
+	Now we safe more than we loose if t>=3.
+	We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
+
+	Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
+	*/
+	while (t>1) {
+		//System.out.println("t="+t);
+		key0 = tab[p0];
+		hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
+		decrement = (hash / length) % length;
+		if (decrement == 0) decrement = 1;
+		int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
+		if (pc<0) pc += length;
+		
+		if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
+			p0 = pc;
+			t--;
+		}
+		else { // free or removed slot found, now move...
+			//System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
+			this.totalProbesSaved += (t-1);
+			tab[pc] = key0;
+			stat[pc] = FULL;
+			values[pc] = values[p0];
+			i = p0; // prepare to insert: table[p0]=key
+			t = 0; // break loop 
+		}
+	}
+
+	//System.out.println("inserting at i="+i);
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+	
+	int oldTable[] = table;
+	int oldValues[] = values;
+	byte oldState[] = state;
+
+	int newTable[] = new int[newCapacity];
+	int newValues[] = new int[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+
+	int tmp = this.distinct;
+	this.distinct = Integer.MIN_VALUE; // switch of watermarks
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {
+			put(oldTable[i], oldValues[i]);
+			/*
+			int element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+			*/
+		}
+	}
+	this.distinct = tmp;
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native