You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/11/23 16:14:38 UTC

svn commit: r883365 [24/47] - in /lucene/mahout/trunk: ./ examples/ matrix/ matrix/src/ matrix/src/main/ matrix/src/main/java/ matrix/src/main/java/org/ matrix/src/main/java/org/apache/ matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractLongObjectMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractLongObjectMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractLongObjectMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractLongObjectMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,426 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.LongObjectProcedure;
+import org.apache.mahout.colt.function.LongProcedure;
+import org.apache.mahout.colt.list.LongArrayList;
+import org.apache.mahout.colt.list.ObjectArrayList;
+/**
+Abstract base class for hash maps holding (key,value) associations of type <tt>(long-->Object)</tt>.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+<p>
+<b>Implementation</b>:
+<p>
+Almost all methods are expressed in terms of {@link #forEachKey(LongProcedure)}. 
+As such they are fully functional, but inefficient. Override them in subclasses if necessary.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public abstract class AbstractLongObjectMap extends AbstractMap {
+	//public static int hashCollisions = 0; // for debug only
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected AbstractLongObjectMap() {}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(final long key) {
+	return ! forEachKey(
+		new LongProcedure() {
+			public boolean apply(long iterKey) {
+				return (key != iterKey);
+			}
+		}
+	);
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ * Tests for identity.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(final Object value) {
+	return ! forEachPair( 
+		new LongObjectProcedure() {
+			public boolean apply(long iterKey, Object iterValue) {
+				return (value != iterValue);
+			}
+		}
+	);
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public AbstractLongObjectMap copy() {
+	return (AbstractLongObjectMap) clone();
+}
+/**
+ * Compares the specified object with this map for equality.  Returns
+ * <tt>true</tt> if the given object is also a map and the two maps
+ * represent the same mappings.  More formally, two maps <tt>m1</tt> and
+ * <tt>m2</tt> represent the same mappings iff
+ * <pre>
+ * m1.forEachPair(
+ *		new LongObjectProcedure() {
+ *			public boolean apply(long key, Object value) {
+ *				return m2.containsKey(key) && m2.get(key) == value;
+ *			}
+ *		}
+ *	)
+ * &&
+ * m2.forEachPair(
+ *		new LongObjectProcedure() {
+ *			public boolean apply(long key, Object value) {
+ *				return m1.containsKey(key) && m1.get(key) == value;
+ *			}
+ *		}
+ *	);
+ * </pre>
+ *
+ * This implementation first checks if the specified object is this map;
+ * if so it returns <tt>true</tt>.  Then, it checks if the specified
+ * object is a map whose size is identical to the size of this set; if
+ * not, it it returns <tt>false</tt>.  If so, it applies the iteration as described above.
+ *
+ * @param obj object to be compared for equality with this map.
+ * @return <tt>true</tt> if the specified object is equal to this map.
+ */
+public boolean equals(Object obj) {
+	if (obj == this) return true;
+
+	if (!(obj instanceof AbstractLongObjectMap)) return false;
+	final AbstractLongObjectMap other = (AbstractLongObjectMap) obj;
+	if (other.size() != size()) return false;
+
+	return 
+		forEachPair(
+			new LongObjectProcedure() {
+				public boolean apply(long key, Object value) {
+					return other.containsKey(key) && other.get(key) == value;
+				}
+			}
+		)
+		&&
+		other.forEachPair(
+			new LongObjectProcedure() {
+				public boolean apply(long key, Object value) {
+					return containsKey(key) && get(key) == value;
+				}
+			}
+		);
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public abstract boolean forEachKey(LongProcedure procedure);
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final LongObjectProcedure procedure) {
+	return forEachKey(
+		new LongProcedure() {
+			public boolean apply(long key) {
+				return procedure.apply(key,get(key));
+			}
+		}
+	);
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(long)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
+ */
+public abstract Object get(long key);
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(Object)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Long.MIN_VALUE</tt> if no such key exists.
+ */
+public long keyOf(final Object value) {
+	final long[] foundKey = new long[1];
+	boolean notFound = forEachPair(
+		new LongObjectProcedure() {
+			public boolean apply(long iterKey, Object iterValue) {
+				boolean found = value == iterValue;
+				if (found) foundKey[0] = iterKey;
+				return !found;
+			}
+		}
+	);
+	if (notFound) return Long.MIN_VALUE;
+	return foundKey[0];
+}
+/**
+ * Returns a list filled with all keys contained in the receiver.
+ * The returned list has a size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @return the keys.
+ */
+public LongArrayList keys() {
+	LongArrayList list = new LongArrayList(size());
+	keys(list);
+	return list;
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(final LongArrayList list) {
+	list.clear();
+	forEachKey(
+		new LongProcedure() {
+			public boolean apply(long key) {
+				list.add(key);
+				return true;
+			}
+		}
+	);
+}
+/**
+ * Fills all keys <i>sorted ascending by their associated value</i> into the specified list.
+ * Fills into the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Primary sort criterium is "value", secondary sort criterium is "key". 
+ * This means that if any two values are equal, the smaller key comes first.
+ * <p>
+ * <b>Example:</b>
+ * <br>
+ * <tt>keys = (8,7,6), values = (1,2,2) --> keyList = (8,6,7)</tt>
+ *
+ * @param keyList the list to be filled, can have any size.
+ */
+public void keysSortedByValue(final LongArrayList keyList) {
+	pairsSortedByValue(keyList, new ObjectArrayList(size()));
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+LongObjectProcedure condition = new LongObjectProcedure() { // match even keys only
+	public boolean apply(long key, Object value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final LongObjectProcedure condition, final LongArrayList keyList, final ObjectArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	forEachPair(
+		new LongObjectProcedure() {
+			public boolean apply(long key, Object value) {
+				if (condition.apply(key,value)) {
+					keyList.add(key);
+					valueList.add(value);
+				}
+				return true;
+			}
+		}
+	);
+}
+/**
+ * Fills all keys and values <i>sorted ascending by key</i> into the specified lists.
+ * Fills into the lists, starting at index 0.
+ * After this call returns the specified lists both have a new size that equals <tt>this.size()</tt>.
+ * <p>
+ * <b>Example:</b>
+ * <br>
+ * <tt>keys = (8,7,6), values = (1,2,2) --> keyList = (6,7,8), valueList = (2,2,1)</tt>
+ *
+ * @param keyList the list to be filled with keys, can have any size.
+ * @param valueList the list to be filled with values, can have any size.
+ */
+public void pairsSortedByKey(final LongArrayList keyList, final ObjectArrayList valueList) {
+	keys(keyList);
+	keyList.sort();
+	valueList.setSize(keyList.size());
+	for (int i=keyList.size(); --i >= 0; ) {
+		valueList.setQuick(i,get(keyList.getQuick(i)));
+	}
+}
+/**
+ * Fills all keys and values <i>sorted ascending by value according to natural ordering</i> into the specified lists.
+ * Fills into the lists, starting at index 0.
+ * After this call returns the specified lists both have a new size that equals <tt>this.size()</tt>.
+ * Primary sort criterium is "value", secondary sort criterium is "key". 
+ * This means that if any two values are equal, the smaller key comes first.
+ * <p>
+ * <b>Example:</b>
+ * <br>
+ * <tt>keys = (8,7,6), values = (1,2,2) --> keyList = (8,6,7), valueList = (1,2,2)</tt>
+ *
+ * @param keyList the list to be filled with keys, can have any size.
+ * @param valueList the list to be filled with values, can have any size.
+ */
+public void pairsSortedByValue(final LongArrayList keyList, final ObjectArrayList valueList) {
+	keys(keyList);
+	values(valueList);
+	
+	final long[] k = keyList.elements();
+	final Object[] v = valueList.elements();
+	org.apache.mahout.colt.Swapper swapper = new org.apache.mahout.colt.Swapper() {
+		public void swap(int a, int b) {
+			long t2;	Object t1;
+			t1 = v[a]; v[a] = v[b]; v[b] = t1;
+			t2 = k[a]; k[a] = k[b];	k[b] = t2;
+		}
+	}; 
+
+	org.apache.mahout.colt.function.IntComparator comp = new org.apache.mahout.colt.function.IntComparator() {
+		public int compare(int a, int b) {
+			int ab = ((Comparable)v[a]).compareTo((Comparable)v[b]);
+			return ab<0 ? -1 : ab>0 ? 1 : (k[a]<k[b] ? -1 : (k[a]==k[b] ? 0 : 1));
+			//return v[a]<v[b] ? -1 : v[a]>v[b] ? 1 : (k[a]<k[b] ? -1 : (k[a]==k[b] ? 0 : 1));
+		}
+	};
+
+	org.apache.mahout.colt.GenericSorting.quickSort(0,keyList.size(),comp,swapper);
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public abstract boolean put(long key, Object value);
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public abstract boolean removeKey(long key);
+/**
+ * Returns a string representation of the receiver, containing
+ * the String representation of each key-value pair, sorted ascending by key.
+ */
+public String toString() {
+	LongArrayList theKeys = keys();
+	theKeys.sort();
+
+	StringBuffer buf = new StringBuffer();
+	buf.append("[");
+	int maxIndex = theKeys.size() - 1;
+	for (int i = 0; i <= maxIndex; i++) {
+		long key = theKeys.get(i);
+	    buf.append(String.valueOf(key));
+		buf.append("->");
+	    buf.append(String.valueOf(get(key)));
+		if (i < maxIndex) buf.append(", ");
+	}
+	buf.append("]");
+	return buf.toString();
+}
+/**
+ * Returns a string representation of the receiver, containing
+ * the String representation of each key-value pair, sorted ascending by value, according to natural ordering.
+ */
+public String toStringByValue() {
+	LongArrayList theKeys = new LongArrayList();
+	keysSortedByValue(theKeys);
+
+	StringBuffer buf = new StringBuffer();
+	buf.append("[");
+	int maxIndex = theKeys.size() - 1;
+	for (int i = 0; i <= maxIndex; i++) {
+		long key = theKeys.get(i);
+	    buf.append(String.valueOf(key));
+		buf.append("->");
+	    buf.append(String.valueOf(get(key)));
+		if (i < maxIndex) buf.append(", ");
+	}
+	buf.append("]");
+	return buf.toString();
+}
+/**
+ * Returns a list filled with all values contained in the receiver.
+ * The returned list has a size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @return the values.
+ */
+public ObjectArrayList values() {
+	ObjectArrayList list = new ObjectArrayList(size());
+	values(list);
+	return list;
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(final ObjectArrayList list) {
+	list.clear();
+	forEachKey(
+		new LongProcedure() {
+			public boolean apply(long key) {
+				list.add(get(key));
+				return true;
+			}
+		}
+	);
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractLongObjectMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,167 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+/**
+Abstract base class for hash maps holding objects or primitive data types such as <code>int</code>, <code>float</code>, etc. as keys and/or values.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+<p>
+Note that implementations are not synchronized.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public abstract class AbstractMap extends org.apache.mahout.colt.PersistentObject {
+	//public static boolean debug = false; // debug only
+	
+	/**
+	 * The number of distinct associations in the map; its "size()".
+	 */
+	protected int distinct;
+
+	/**
+	 * The table capacity c=table.length always satisfies the invariant
+	 * <tt>c * minLoadFactor <= s <= c * maxLoadFactor</tt>, where s=size() is the number of associations currently contained.
+	 * The term "c * minLoadFactor" is called the "lowWaterMark", "c * maxLoadFactor" is called the "highWaterMark".
+	 * In other words, the table capacity (and proportionally the memory used by this class) oscillates within these constraints.
+	 * The terms are precomputed and cached to avoid recalculating them each time put(..) or removeKey(...) is called.
+	 */
+	protected int lowWaterMark;
+	protected int highWaterMark;
+
+	/**
+	 * The minimum load factor for the hashtable.
+	 */
+	protected double minLoadFactor;
+
+	/**
+	 * The maximum load factor for the hashtable.
+	 */
+	protected double maxLoadFactor;
+
+	protected static final int defaultCapacity = 277;
+	protected static final double defaultMinLoadFactor = 0.2;
+	protected static final double defaultMaxLoadFactor = 0.5;
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected AbstractMap() {}
+/**
+ * Chooses a new prime table capacity optimized for growing that (approximately) satisfies the invariant
+ * <tt>c * minLoadFactor <= size <= c * maxLoadFactor</tt>
+ * and has at least one FREE slot for the given size.
+ */
+protected int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
+	return nextPrime(Math.max(size+1, (int) ((4*size / (3*minLoad+maxLoad)))));
+}
+/**
+ * Returns new high water mark threshold based on current capacity and maxLoadFactor.
+ * @return int the new threshold.
+ */
+protected int chooseHighWaterMark(int capacity, double maxLoad) {
+	return Math.min(capacity-2, (int) (capacity * maxLoad)); //makes sure there is always at least one FREE slot
+}
+/**
+ * Returns new low water mark threshold based on current capacity and minLoadFactor.
+ * @return int the new threshold.
+ */
+protected int chooseLowWaterMark(int capacity, double minLoad) {
+	return (int) (capacity * minLoad);
+}
+/**
+ * Chooses a new prime table capacity neither favoring shrinking nor growing,
+ * that (approximately) satisfies the invariant
+ * <tt>c * minLoadFactor <= size <= c * maxLoadFactor</tt>
+ * and has at least one FREE slot for the given size.
+ */
+protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
+	return nextPrime(Math.max(size+1, (int) ((2*size / (minLoad+maxLoad)))));
+}
+/**
+ * Chooses a new prime table capacity optimized for shrinking that (approximately) satisfies the invariant
+ * <tt>c * minLoadFactor <= size <= c * maxLoadFactor</tt>
+ * and has at least one FREE slot for the given size.
+ */
+protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
+	return nextPrime(Math.max(size+1, (int) ((4*size / (minLoad+3*maxLoad)))));
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ */
+public abstract void clear();
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times.
+ * <p>
+ * <b>This default implementation does nothing.</b> Override this method if necessary.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {}
+/**
+ * Returns <tt>true</tt> if the receiver contains no (key,value) associations.
+ *
+ * @return <tt>true</tt> if the receiver contains no (key,value) associations.
+ */
+public boolean isEmpty() {
+	return distinct == 0;
+}
+/**
+ * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code> (within 11% if <code>desiredCapacity &gt;= 1000</code>).
+ * @param desiredCapacity the capacity desired by the user.
+ * @return the capacity which should be used for a hashtable.
+ */
+protected int nextPrime(int desiredCapacity) {
+	return PrimeFinder.nextPrime(desiredCapacity);
+}
+/**
+ * Initializes the receiver.
+ * You will almost certainly need to override this method in subclasses to initialize the hash table.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor     the minLoadFactor of the receiver.
+ * @param      maxLoadFactor     the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	if (initialCapacity < 0)
+	    throw new IllegalArgumentException("Initial Capacity must not be less than zero: "+ initialCapacity);
+	if (minLoadFactor < 0.0 || minLoadFactor >= 1.0)
+		throw new IllegalArgumentException("Illegal minLoadFactor: "+ minLoadFactor);
+	if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0)
+		throw new IllegalArgumentException("Illegal maxLoadFactor: "+ maxLoadFactor);
+	if (minLoadFactor >= maxLoadFactor)
+		throw new IllegalArgumentException("Illegal minLoadFactor: "+ minLoadFactor+" and maxLoadFactor: "+ maxLoadFactor);
+}
+/**
+ * Returns the number of (key,value) associations currently contained.
+ *
+ * @return the number of (key,value) associations currently contained.
+ */
+public int size() {
+	return distinct;
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ * <p>
+ * This default implementation does nothing. Override this method if necessary.
+ */
+public void trimToSize() {}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/AbstractMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/Benchmark.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/Benchmark.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/Benchmark.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,151 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.Timer;
+/**
+ * Benchmarks the classes of this package.
+ *
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class Benchmark extends Object {
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected Benchmark() {}
+/**
+ */
+public static void benchmark(int runs, int size, String kind) {
+	QuickOpenIntIntHashMap map;
+
+	System.out.println("initializing...");
+	map = new QuickOpenIntIntHashMap();
+	
+	//for (int i=size; --i >=0; ) {
+	for (int i=0; i < size; i++) {
+		map.put(i,i);
+	}
+	Runtime.getRuntime().gc();
+	try { Thread.currentThread().sleep(1000); } catch (InterruptedException exc) {};
+	
+
+	System.out.println("Now benchmarking...");
+	int s=0;
+	Timer timer0 = new Timer();
+	Timer timer1 = new Timer();
+	Timer timer2 = new Timer();
+	//map.hashCollisions = 0;
+	for (int run=runs; --run >=0; ) {
+		if (kind.equals("add")) {
+			map.clear();
+			//map.ensureCapacity(size*3);
+			timer0.start();
+			for (int i=size; --i >=0; ) {
+				map.put(i,i);
+			}
+			timer0.stop();
+		}
+		if (kind.equals("get")) {
+			timer0.start();
+			for (int i=size; --i >=0; ) {
+				s += map.get(i);
+			}
+			timer0.stop();
+		}
+		else {
+			timer1.start();
+			map.rehash(PrimeFinder.nextPrime(size*2));
+			timer1.stop();
+
+			timer2.start();
+			map.rehash(PrimeFinder.nextPrime((int) (size*1.5)));
+			timer2.stop();
+		}
+	}
+
+	System.out.println("adding: "+timer0);
+	System.out.println("growing: "+timer1);
+	System.out.println("shrinking: "+timer2);
+	System.out.println("total: "+(timer1.plus(timer2)));
+	//System.out.println("collisions="+map.hashCollisions);
+	System.out.print(s);
+}
+/**
+ * Tests various methods of this class.
+ */
+public static void main(String args[]) {
+	int runs = Integer.parseInt(args[0]);
+	int size = Integer.parseInt(args[1]);
+	//boolean add = args[2].equals("add");
+	String kind = args[2];
+	benchmark(runs,size,kind);	 
+}
+/**
+ */
+public static void test2(int length) {
+org.apache.mahout.jet.random.Uniform uniform = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.MersenneTwister());
+// using a map
+//int[]    keys   = {0    , 3     , 277+3, 277*2+3, 100000, 9    };
+//double[] values = {100.0, 1000.0, 277+3, 277*2+3, 70.0  , 71.0 ,};
+//int[]    keys   = {0,1,3,4,5,6, 271,272,273,274,275,276,277+5, 277+6,277+7};
+int[]    keys   = new int[length];
+int to = 10000000;
+for (int i=0; i<length; i++) { 
+	keys[i] = uniform.nextIntFromTo(0,to);
+}
+int[] values = (int[]) keys.clone();
+
+int size = keys.length;
+//AbstractIntIntMap map = new OpenIntIntHashMap(size*2, 0.2, 0.5);
+AbstractIntIntMap map = new OpenIntIntHashMap();
+
+for (int i=0; i<keys.length; i++) {
+	map.put(keys[i], (int)values[i]);
+	//System.out.println(map);
+}
+
+/*
+System.out.println(map.containsKey(3));
+System.out.println(map.get(3));
+
+System.out.println(map.containsKey(4));
+System.out.println(map.get(4));
+
+System.out.println(map.containsValue((int)71.0));
+System.out.println(map.keyOf((int)71.0));
+*/
+
+//System.out.println(map);
+//System.out.println(map.keys());
+//System.out.println(map.values());
+/*
+if (map instanceof QuickOpenIntIntHashMap) {
+	System.out.println("totalProbesSaved="+((QuickOpenIntIntHashMap)map).totalProbesSaved);
+}
+System.out.println("probes="+map.hashCollisions);
+
+map.hashCollisions = 0;
+*/
+int sum=0;
+for (int i=0; i<keys.length; i++) {
+	sum += map.get(keys[i]);
+	//System.out.println(map);
+}
+//System.out.println("probes="+map.hashCollisions);
+
+System.out.println(map);
+System.out.println(sum);
+System.out.println("\n\n");
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/Benchmark.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/HashFunctions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/HashFunctions.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/HashFunctions.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/HashFunctions.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,114 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+/**
+ * Provides various hash functions.
+ *
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class HashFunctions extends Object {
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected HashFunctions() {}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(char value) {
+	return (int)value;
+}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(double value) {
+	long bits = Double.doubleToLongBits(value);
+	return (int)(bits ^ (bits >>> 32));
+	
+	//return (int) Double.doubleToLongBits(value*663608941.737);
+	// this avoids excessive hashCollisions in the case values are of the form (1.0, 2.0, 3.0, ...)
+}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(float value) {
+	return Float.floatToIntBits(value*663608941.737f);
+	// this avoids excessive hashCollisions in the case values are of the form (1.0, 2.0, 3.0, ...)
+}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(int value) {
+	return value;
+	
+	//return value * 0x278DDE6D; // see org.apache.mahout.jet.random.engine.DRand
+	
+	/*
+	value &= 0x7FFFFFFF; // make it >=0
+	int hashCode = 0;
+	do hashCode = 31*hashCode + value%10;
+	while ((value /= 10) > 0);
+
+	return 28629151*hashCode; // spread even further; h*31^5
+	*/
+}
+/**
+ * Returns a hashcode for the specified value. 
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(long value) {
+	return (int)(value ^ (value >> 32));
+	/* 
+	value &= 0x7FFFFFFFFFFFFFFFL; // make it >=0 (0x7FFFFFFFFFFFFFFFL==Long.MAX_VALUE)
+	int hashCode = 0;
+	do hashCode = 31*hashCode + (int) (value%10);
+	while ((value /= 10) > 0);
+
+	return 28629151*hashCode; // spread even further; h*31^5
+	*/
+}
+/**
+ * Returns a hashcode for the specified object.
+ *
+ * @return  a hash code value for the specified object. 
+ */
+public static int hash(Object object) {
+	return object==null ? 0 : object.hashCode();
+}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(short value) {
+	return (int)value;
+}
+/**
+ * Returns a hashcode for the specified value.
+ *
+ * @return  a hash code value for the specified value. 
+ */
+public static int hash(boolean value) {
+	return value ? 1231 : 1237;
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/HashFunctions.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenDoubleIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenDoubleIntHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenDoubleIntHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenDoubleIntHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,512 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.DoubleIntProcedure;
+import org.apache.mahout.colt.function.DoubleProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.DoubleArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(double-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenDoubleIntHashMap extends AbstractDoubleIntMap {
+	 /**
+	 * The hash table keys.
+	 * @serial
+	 */
+	protected double table[];
+
+	 /**
+	 * The hash table values.
+	 * @serial
+	 */
+	protected int values[];
+
+	/**
+	 * The state of each hash table entry (FREE, FULL, REMOVED).
+	 * @serial
+	 */
+	protected byte state[];
+
+	/**
+	 * The number of table entries in state==FREE.
+	 * @serial
+	 */
+	protected int freeEntries;
+
+	
+	protected static final byte FREE = 0;
+	protected static final byte FULL = 1;
+	protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenDoubleIntHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public OpenDoubleIntHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenDoubleIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+   	//new DoubleArrayList(values).fillFromToWith(0, state.length-1, 0); // delta
+
+	this.distinct = 0;
+	this.freeEntries = table.length; // delta
+	trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	OpenDoubleIntHashMap copy = (OpenDoubleIntHashMap) super.clone();
+	copy.table = (double[]) copy.table.clone();
+	copy.values = (int[]) copy.values.clone();
+	copy.state = (byte[]) copy.state.clone();
+	return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(double key) {
+	return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(int value) {
+	return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	if (table.length < minCapacity) {
+		int newCapacity = nextPrime(minCapacity);
+		rehash(newCapacity);
+	}
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachKey(DoubleProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final DoubleIntProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+	}
+	return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(double)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
+ */
+public int get(double key) {
+	int i = indexOfKey(key);
+	if (i<0) return 0; //not contained
+	return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(double key) {
+	final double tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] == FULL && tab[i] != key) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == REMOVED) {
+		// stop if we find a free slot, or if we find the key itself.
+		// do skip over removed slots (yes, open addressing is like that...)
+		// assertion: there is at least one FREE slot.
+		int j = i;
+		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+			i -= decrement;
+			//hashCollisions++;
+			if (i<0) i+=length;
+		}
+		if (stat[i] == FREE) i = j;
+	}
+	
+	
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		// return a negative number identifying the slot.
+		return -i-1;
+	}
+	// not already contained, should be inserted at slot i.
+	// return a number >= 0 identifying the slot.
+	return i; 
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(double key) {
+	final double tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a free slot, or if we find the key itself.
+	// do skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == FREE) return -1; // not found
+	return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(int value) {
+	final int val[] = values;
+	final byte stat[] = state;
+
+	for (int i=stat.length; --i >= 0;) {
+		if (stat[i]==FULL && val[i]==value) return i;
+	}
+
+	return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(int)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Double.NaN</tt> if no such key exists.
+ */
+public double keyOf(int value) {
+	//returns the first key found; there may be more matching keys, however.
+	int i = indexOfValue(value);
+	if (i<0) return Double.NaN;
+	return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(DoubleArrayList list) {
+	list.setSize(distinct);
+	double[] elements = list.elements();
+	
+	double[] tab = table;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = tab.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=tab[i];
+	}
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+DoubleIntProcedure condition = new DoubleIntProcedure() { // match even values only
+	public boolean apply(double key, int value) { return value%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final DoubleIntProcedure condition, final DoubleArrayList keyList, final IntArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL && condition.apply(table[i],values[i])) {
+			keyList.add(table[i]);
+			valueList.add(values[i]);
+		}
+	}
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(double key, int value) {
+	int i = indexOfInsertion(key);	
+	if (i<0) { //already contained
+		i = -i -1;
+		this.values[i]=value;
+		return false;
+	}
+
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		/*
+		System.out.print("grow rehashing ");
+		System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		*/
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+	
+	double oldTable[] = table;
+	int oldValues[] = values;
+	byte oldState[] = state;
+
+	double newTable[] = new double[newCapacity];
+	int newValues[] = new int[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+	
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {
+			double element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+		}
+	}
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(double key) {
+	int i = indexOfKey(key);
+	if (i<0) return false; // key not contained
+
+	this.state[i]=REMOVED;
+	//this.values[i]=0; // delta
+	this.distinct--;
+
+	if (this.distinct < this.lowWaterMark) {
+		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+		/*
+		if (table.length != newCapacity) {
+			System.out.print("shrink rehashing ");
+			System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		}
+		*/
+		rehash(newCapacity);
+	}
+	
+	return true;	
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor        the minLoadFactor of the receiver.
+ * @param      maxLoadFactor        the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	int capacity = initialCapacity;
+	super.setUp(capacity, minLoadFactor, maxLoadFactor);
+	capacity = nextPrime(capacity);
+	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+	
+	this.table = new double[capacity];
+	this.values = new int[capacity];
+	this.state = new byte[capacity];
+
+	// memory will be exhausted long before this pathological case happens, anyway.
+	this.minLoadFactor = minLoadFactor;
+	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+	else this.maxLoadFactor = maxLoadFactor;
+
+	this.distinct = 0;
+	this.freeEntries = capacity; // delta
+	
+	// lowWaterMark will be established upon first expansion.
+	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+	// See ensureCapacity(...)
+	this.lowWaterMark = 0; 
+	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	// * 1.2 because open addressing's performance exponentially degrades beyond that point
+	// so that even rehashing the table can take very long
+	int newCapacity = nextPrime((int)(1 + 1.2*size()));
+	if (table.length > newCapacity) {
+		rehash(newCapacity);
+	}
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(DoubleProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(IntArrayList list) {
+	list.setSize(distinct);
+	int[] elements = list.elements();
+	
+	int[] val = values;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = stat.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=val[i];
+	}
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenDoubleIntHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntDoubleHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntDoubleHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntDoubleHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntDoubleHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,583 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.IntDoubleProcedure;
+import org.apache.mahout.colt.function.IntProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.DoubleArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(int-->double)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see	    java.util.HashMap
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenIntDoubleHashMap extends AbstractIntDoubleMap {
+	//public static int hashCollisions = 0;
+	/**
+	 * The hash table keys.
+	 * @serial
+	 */
+	protected int table[];
+
+	 /**
+	 * The hash table values.
+	 * @serial
+	 */
+	protected double values[];
+
+	/**
+	 * The state of each hash table entry (FREE, FULL, REMOVED).
+	 * @serial
+	 */
+	protected byte state[];
+	
+	/**
+	 * The number of table entries in state==FREE.
+	 * @serial
+	 */
+	protected int freeEntries;
+
+	
+	protected static final byte FREE = 0;
+	protected static final byte FULL = 1;
+	protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenIntDoubleHashMap() {
+	this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param      initialCapacity   the initial capacity of the map.
+ * @throws     IllegalArgumentException if the initial capacity is less
+ *             than zero.
+ */
+public OpenIntDoubleHashMap(int initialCapacity) {
+	this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param      initialCapacity   the initial capacity.
+ * @param      minLoadFactor        the minimum load factor.
+ * @param      maxLoadFactor        the maximum load factor.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenIntDoubleHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+Assigns the result of a function to each value; <tt>v[i] = function(v[i])</tt>.
+
+@param function a function object taking as argument the current association's value.
+*/
+public void assign(org.apache.mahout.colt.function.DoubleFunction function) {
+	// specialization for speed
+	if (function instanceof org.apache.mahout.jet.math.Mult) { // x[i] = mult*x[i]
+		double multiplicator = ((org.apache.mahout.jet.math.Mult)function).multiplicator;
+		if (multiplicator==1) return;
+		if (multiplicator==0) {
+			clear();
+			return;
+		}
+		for (int i = table.length ; i-- > 0 ;) {
+			if (state[i]==FULL) values[i] *= multiplicator;
+		}
+	}
+	else { // the general case x[i] = f(x[i])
+		for (int i = table.length ; i-- > 0 ;) {
+			if (state[i]==FULL) values[i] = function.apply(values[i]);
+		}
+	}
+}
+/**
+ * Clears the receiver, then adds all (key,value) pairs of <tt>other</tt>values to it.
+ *
+ * @param other the other map to be copied into the receiver.
+ */
+public void assign(AbstractIntDoubleMap other) {
+	if (!(other instanceof OpenIntDoubleHashMap)) {
+		super.assign(other);
+		return;
+	}
+	OpenIntDoubleHashMap source = (OpenIntDoubleHashMap) other;
+	OpenIntDoubleHashMap copy = (OpenIntDoubleHashMap) source.copy();
+	this.values = copy.values;
+	this.table = copy.table;
+	this.state = copy.state;
+	this.freeEntries = copy.freeEntries;
+	this.distinct = copy.distinct;
+	this.lowWaterMark = copy.lowWaterMark;
+	this.highWaterMark = copy.highWaterMark;
+	this.minLoadFactor = copy.minLoadFactor;
+	this.maxLoadFactor = copy.maxLoadFactor;
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+	new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+	//new DoubleArrayList(values).fillFromToWith(0, state.length-1, 0); // delta
+	
+	/*
+	if (debug) {
+		for (int i=table.length; --i >= 0; ) {
+		    state[i] = FREE;
+		    table[i]= Integer.MAX_VALUE;
+		    values[i]= Double.NaN;
+		}
+	}
+	*/
+	
+	this.distinct = 0;
+	this.freeEntries = table.length; // delta
+	trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	OpenIntDoubleHashMap copy = (OpenIntDoubleHashMap) super.clone();
+	copy.table = (int[]) copy.table.clone();
+	copy.values = (double[]) copy.values.clone();
+	copy.state = (byte[]) copy.state.clone();
+	return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(int key) {
+	return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(double value) {
+	return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	if (table.length < minCapacity) {
+		int newCapacity = nextPrime(minCapacity);
+		rehash(newCapacity);
+	}
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachKey(IntProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEachPair(final IntDoubleProcedure procedure) {
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+	}
+	return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(int)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
+ */
+public double get(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return 0; //not contained
+	return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(int key) {
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a removed or free slot, or if we find the key itself
+	// do NOT skip over removed slots (yes, open addressing is like that...)
+	while (stat[i] == FULL && tab[i] != key) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == REMOVED) {
+		// stop if we find a free slot, or if we find the key itself.
+		// do skip over removed slots (yes, open addressing is like that...)
+		// assertion: there is at least one FREE slot.
+		int j = i;
+		while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+			i -= decrement;
+			//hashCollisions++;
+			if (i<0) i+=length;
+		}
+		if (stat[i] == FREE) i = j;
+	}
+	
+	
+	if (stat[i] == FULL) {
+		// key already contained at slot i.
+		// return a negative number identifying the slot.
+		return -i-1;
+	}
+	// not already contained, should be inserted at slot i.
+	// return a number >= 0 identifying the slot.
+	return i; 
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, else returns -1.
+ */
+protected int indexOfKey(int key) {
+	final int tab[] = table;
+	final byte stat[] = state;
+	final int length = tab.length;
+
+	final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+	int i = hash % length;
+	int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+	//int decrement = (hash / length) % length;
+	if (decrement == 0) decrement = 1;
+
+	// stop if we find a free slot, or if we find the key itself.
+	// do skip over removed slots (yes, open addressing is like that...)
+	// assertion: there is at least one FREE slot.
+	while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+		i -= decrement;
+		//hashCollisions++;
+		if (i<0) i+=length;
+	}
+	
+	if (stat[i] == FREE) return -1; // not found
+	return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(double value) {
+	final double val[] = values;
+	final byte stat[] = state;
+
+	for (int i=stat.length; --i >= 0;) {
+		if (stat[i]==FULL && val[i]==value) return i;
+	}
+
+	return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(double)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>; 
+ *		   returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ */
+public int keyOf(double value) {
+	//returns the first key found; there may be more matching keys, however.
+	int i = indexOfValue(value);
+	if (i<0) return Integer.MIN_VALUE;
+	return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(IntArrayList list) {
+	list.setSize(distinct);
+	int[] elements = list.elements();
+	
+	int[] tab = table;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = tab.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=tab[i];
+	}
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+IntDoubleProcedure condition = new IntDoubleProcedure() { // match even keys only
+	public boolean apply(int key, double value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition    the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final IntDoubleProcedure condition, final IntArrayList keyList, final DoubleArrayList valueList) {
+	keyList.clear();
+	valueList.clear();
+	
+	for (int i = table.length ; i-- > 0 ;) {
+		if (state[i]==FULL && condition.apply(table[i],values[i])) {
+			keyList.add(table[i]);
+			valueList.add(values[i]);
+		}
+	}
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ *         <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, double value) {
+	int i = indexOfInsertion(key);	
+	if (i<0) { //already contained
+		i = -i -1;
+		//if (debug) if (this.state[i] != FULL) throw new InternalError();
+		//if (debug) if (this.table[i] != key) throw new InternalError();
+		this.values[i]=value;
+		return false;
+	}
+
+	if (this.distinct > this.highWaterMark) {
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		/*
+		System.out.print("grow rehashing ");
+		System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		*/
+		rehash(newCapacity);
+		return put(key, value);
+	}
+
+	this.table[i]=key;
+	this.values[i]=value;
+	if (this.state[i]==FREE) this.freeEntries--;
+	this.state[i]=FULL;
+	this.distinct++;
+
+	if (this.freeEntries < 1) { //delta
+		int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+		rehash(newCapacity);
+	}
+	
+	return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+	int oldCapacity = table.length;
+	//if (oldCapacity == newCapacity) return;
+
+	if (newCapacity<=this.distinct) throw new InternalError();	
+	//if (debug) check();
+
+	int oldTable[] = table;
+	double oldValues[] = values;
+	byte oldState[] = state;
+
+	int newTable[] = new int[newCapacity];
+	double newValues[] = new double[newCapacity];
+	byte newState[] = new byte[newCapacity];
+
+	this.lowWaterMark  = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+	this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+	this.table = newTable;
+	this.values = newValues;
+	this.state = newState;
+	this.freeEntries = newCapacity-this.distinct; // delta
+	
+	for (int i = oldCapacity ; i-- > 0 ;) {
+		if (oldState[i]==FULL) {			
+			int element = oldTable[i];
+			int index = indexOfInsertion(element);
+			newTable[index]=element;
+			newValues[index]=oldValues[i];
+			newState[index]=FULL;
+			
+		}
+	}
+
+	//if (debug) check();
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(int key) {
+	int i = indexOfKey(key);
+	if (i<0) return false; // key not contained
+
+	//if (debug) if (this.state[i] == FREE) throw new InternalError();
+	//if (debug) if (this.state[i] == REMOVED) throw new InternalError();
+	this.state[i]=REMOVED;
+	//this.values[i]=0; // delta
+	
+	//if (debug) this.table[i]=Integer.MAX_VALUE; // delta
+	//if (debug) this.values[i]=Double.NaN; // delta
+	this.distinct--;
+
+	if (this.distinct < this.lowWaterMark) {
+		int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+		/*
+		if (table.length != newCapacity) {
+			System.out.print("shrink rehashing ");
+			System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+		}
+		*/
+		rehash(newCapacity);
+	}
+	
+	return true;	
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param      initialCapacity   the initial capacity of the receiver.
+ * @param      minLoadFactor        the minLoadFactor of the receiver.
+ * @param      maxLoadFactor        the maxLoadFactor of the receiver.
+ * @throws	IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+	int capacity = initialCapacity;
+	super.setUp(capacity, minLoadFactor, maxLoadFactor);
+	capacity = nextPrime(capacity);
+	if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+	
+	this.table = new int[capacity];
+	this.values = new double[capacity];
+	this.state = new byte[capacity];
+
+	// memory will be exhausted long before this pathological case happens, anyway.
+	this.minLoadFactor = minLoadFactor;
+	if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+	else this.maxLoadFactor = maxLoadFactor;
+
+	this.distinct = 0;
+	this.freeEntries = capacity; // delta
+	
+	// lowWaterMark will be established upon first expansion.
+	// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+	// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+	// See ensureCapacity(...)
+	this.lowWaterMark = 0; 
+	this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	// * 1.2 because open addressing's performance exponentially degrades beyond that point
+	// so that even rehashing the table can take very long
+	int newCapacity = nextPrime((int)(1 + 1.2*size()));
+	if (table.length > newCapacity) {
+		rehash(newCapacity);
+	}
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(DoubleArrayList list) {
+	list.setSize(distinct);
+	double[] elements = list.elements();
+	
+	double[] val = values;
+	byte[] stat = state;
+	
+	int j=0;
+	for (int i = stat.length ; i-- > 0 ;) {
+		if (stat[i]==FULL) elements[j++]=val[i];
+	}
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntDoubleHashMap.java
------------------------------------------------------------------------------
    svn:eol-style = native