You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/11/23 16:14:38 UTC
svn commit: r883365 [25/47] - in /lucene/mahout/trunk: ./ examples/ matrix/
matrix/src/ matrix/src/main/ matrix/src/main/java/
matrix/src/main/java/org/ matrix/src/main/java/org/apache/
matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,512 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.IntIntProcedure;
+import org.apache.mahout.colt.function.IntProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(int-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see java.util.HashMap
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenIntIntHashMap extends AbstractIntIntMap {
+ /**
+ * The hash table keys.
+ * @serial
+ */
+ protected int table[];
+
+ /**
+ * The hash table values.
+ * @serial
+ */
+ protected int values[];
+
+ /**
+ * The state of each hash table entry (FREE, FULL, REMOVED).
+ * @serial
+ */
+ protected byte state[];
+
+ /**
+ * The number of table entries in state==FREE.
+ * @serial
+ */
+ protected int freeEntries;
+
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenIntIntHashMap() {
+ this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param initialCapacity the initial capacity of the map.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero.
+ */
+public OpenIntIntHashMap(int initialCapacity) {
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param initialCapacity the initial capacity.
+ * @param minLoadFactor the minimum load factor.
+ * @param maxLoadFactor the maximum load factor.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+ new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+ //new IntArrayList(values).fillFromToWith(0, state.length-1, 0); // delta
+
+ this.distinct = 0;
+ this.freeEntries = table.length; // delta
+ trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ OpenIntIntHashMap copy = (OpenIntIntHashMap) super.clone();
+ copy.table = (int[]) copy.table.clone();
+ copy.values = (int[]) copy.values.clone();
+ copy.state = (byte[]) copy.state.clone();
+ return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(int key) {
+ return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(int value) {
+ return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachKey(IntProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+*
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachPair(final IntIntProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+ }
+ return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(int)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
+ */
+public int get(int key) {
+ int i = indexOfKey(key);
+ if (i<0) return 0; //not contained
+ return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(int key) {
+ //System.out.println("key="+key);
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ if (stat[i] == FREE) i = j;
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i-1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(int key) {
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == FREE) return -1; // not found
+ return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(int value) {
+ final int val[] = values;
+ final byte stat[] = state;
+
+ for (int i=stat.length; --i >= 0;) {
+ if (stat[i]==FULL && val[i]==value) return i;
+ }
+
+ return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(int)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>;
+ * returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ */
+public int keyOf(int value) {
+ //returns the first key found; there may be more matching keys, however.
+ int i = indexOfValue(value);
+ if (i<0) return Integer.MIN_VALUE;
+ return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(IntArrayList list) {
+ list.setSize(distinct);
+ int[] elements = list.elements();
+
+ int[] tab = table;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = tab.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=tab[i];
+ }
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+IntIntProcedure condition = new IntIntProcedure() { // match even keys only
+ public boolean apply(int key, int value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final IntIntProcedure condition, final IntArrayList keyList, final IntArrayList valueList) {
+ keyList.clear();
+ valueList.clear();
+
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL && condition.apply(table[i],values[i])) {
+ keyList.add(table[i]);
+ valueList.add(values[i]);
+ }
+ }
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ * <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, int value) {
+ int i = indexOfInsertion(key);
+ if (i<0) { //already contained
+ i = -i -1;
+ this.values[i]=value;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+
+ //System.out.print("grow rehashing ");
+ //System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ int oldTable[] = table;
+ int oldValues[] = values;
+ byte oldState[] = state;
+
+ int newTable[] = new int[newCapacity];
+ int newValues[] = new int[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ int element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ }
+ }
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(int key) {
+ int i = indexOfKey(key);
+ if (i<0) return false; // key not contained
+
+ this.state[i]=REMOVED;
+ //this.values[i]=0; // delta
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+ /*
+ if (table.length != newCapacity) {
+ System.out.print("shrink rehashing ");
+ System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+ }
+ */
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param initialCapacity the initial capacity of the receiver.
+ * @param minLoadFactor the minLoadFactor of the receiver.
+ * @param maxLoadFactor the maxLoadFactor of the receiver.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+
+ this.table = new int[capacity];
+ this.values = new int[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+ else this.maxLoadFactor = maxLoadFactor;
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int)(1 + 1.2*size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(IntArrayList list) {
+ list.setSize(distinct);
+ int[] elements = list.elements();
+
+ int[] val = values;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = stat.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=val[i];
+ }
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntIntHashMap.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,502 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.IntObjectProcedure;
+import org.apache.mahout.colt.function.IntProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.IntArrayList;
+import org.apache.mahout.colt.list.ObjectArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(int-->Object)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see java.util.HashMap
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenIntObjectHashMap extends AbstractIntObjectMap {
+ /**
+ * The hash table keys.
+ * @serial
+ */
+ protected int table[];
+
+ /**
+ * The hash table values.
+ * @serial
+ */
+ protected Object values[];
+
+ /**
+ * The state of each hash table entry (FREE, FULL, REMOVED).
+ * @serial
+ */
+ protected byte state[];
+
+ /**
+ * The number of table entries in state==FREE.
+ * @serial
+ */
+ protected int freeEntries;
+
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenIntObjectHashMap() {
+ this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param initialCapacity the initial capacity of the map.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero.
+ */
+public OpenIntObjectHashMap(int initialCapacity) {
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param initialCapacity the initial capacity.
+ * @param minLoadFactor the minimum load factor.
+ * @param maxLoadFactor the maximum load factor.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenIntObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+ new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+ new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+
+ this.distinct = 0;
+ this.freeEntries = table.length; // delta
+ trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
+ copy.table = (int[]) copy.table.clone();
+ copy.values = (Object[]) copy.values.clone();
+ copy.state = (byte[]) copy.state.clone();
+ return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(int key) {
+ return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(Object value) {
+ return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachKey(IntProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachPair(final IntObjectProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+ }
+ return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(int)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
+ */
+public Object get(int key) {
+ int i = indexOfKey(key);
+ if (i<0) return null; //not contained
+ return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(int key) {
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ if (stat[i] == FREE) i = j;
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i-1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(int key) {
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == FREE) return -1; // not found
+ return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(Object value) {
+ final Object val[] = values;
+ final byte stat[] = state;
+
+ for (int i=stat.length; --i >= 0;) {
+ if (stat[i]==FULL && val[i]==value) return i;
+ }
+
+ return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(Object)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>;
+ * returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ */
+public int keyOf(Object value) {
+ //returns the first key found; there may be more matching keys, however.
+ int i = indexOfValue(value);
+ if (i<0) return Integer.MIN_VALUE;
+ return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(IntArrayList list) {
+ list.setSize(distinct);
+ int[] elements = list.elements();
+
+ int[] tab = table;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = tab.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=tab[i];
+ }
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+IntObjectProcedure condition = new IntObjectProcedure() { // match even keys only
+ public boolean apply(int key, Object value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final IntObjectProcedure condition, final IntArrayList keyList, final ObjectArrayList valueList) {
+ keyList.clear();
+ valueList.clear();
+
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL && condition.apply(table[i],values[i])) {
+ keyList.add(table[i]);
+ valueList.add(values[i]);
+ }
+ }
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ * <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, Object value) {
+ int i = indexOfInsertion(key);
+ if (i<0) { //already contained
+ i = -i -1;
+ this.values[i]=value;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ int oldTable[] = table;
+ Object oldValues[] = values;
+ byte oldState[] = state;
+
+ int newTable[] = new int[newCapacity];
+ Object newValues[] = new Object[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ int element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ }
+ }
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(int key) {
+ int i = indexOfKey(key);
+ if (i<0) return false; // key not contained
+
+ this.state[i]=REMOVED;
+ this.values[i]=null; // delta
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param initialCapacity the initial capacity of the receiver.
+ * @param minLoadFactor the minLoadFactor of the receiver.
+ * @param maxLoadFactor the maxLoadFactor of the receiver.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+
+ this.table = new int[capacity];
+ this.values = new Object[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+ else this.maxLoadFactor = maxLoadFactor;
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int)(1 + 1.2*size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(ObjectArrayList list) {
+ list.setSize(distinct);
+ Object[] elements = list.elements();
+
+ Object[] val = values;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = stat.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=val[i];
+ }
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,502 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+import org.apache.mahout.colt.function.LongObjectProcedure;
+import org.apache.mahout.colt.function.LongProcedure;
+import org.apache.mahout.colt.list.ByteArrayList;
+import org.apache.mahout.colt.list.LongArrayList;
+import org.apache.mahout.colt.list.ObjectArrayList;
+/**
+Hash map holding (key,value) associations of type <tt>(long-->Object)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Overrides many methods for performance reasons only.
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see java.util.HashMap
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class OpenLongObjectHashMap extends AbstractLongObjectMap {
+ /**
+ * The hash table keys.
+ * @serial
+ */
+ protected long table[];
+
+ /**
+ * The hash table values.
+ * @serial
+ */
+ protected Object values[];
+
+ /**
+ * The state of each hash table entry (FREE, FULL, REMOVED).
+ * @serial
+ */
+ protected byte state[];
+
+ /**
+ * The number of table entries in state==FREE.
+ * @serial
+ */
+ protected int freeEntries;
+
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public OpenLongObjectHashMap() {
+ this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param initialCapacity the initial capacity of the map.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero.
+ */
+public OpenLongObjectHashMap(int initialCapacity) {
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param initialCapacity the initial capacity.
+ * @param minLoadFactor the minimum load factor.
+ * @param maxLoadFactor the maximum load factor.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public OpenLongObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Removes all (key,value) associations from the receiver.
+ * Implicitly calls <tt>trimToSize()</tt>.
+ */
+public void clear() {
+ new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+ new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+
+ this.distinct = 0;
+ this.freeEntries = table.length; // delta
+ trimToSize();
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
+ copy.table = (long[]) copy.table.clone();
+ copy.values = (Object[]) copy.values.clone();
+ copy.state = (byte[]) copy.state.clone();
+ return copy;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+public boolean containsKey(long key) {
+ return indexOfKey(key) >= 0;
+}
+/**
+ * Returns <tt>true</tt> if the receiver contains the specified value.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified value.
+ */
+public boolean containsValue(Object value) {
+ return indexOfValue(value) >= 0;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ * <p>
+ * This method never need be called; it is for performance tuning only.
+ * Calling this method before <tt>put()</tt>ing a large number of associations boosts performance,
+ * because the receiver will grow only once instead of potentially many times and hash collisions get less probable.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
+}
+/**
+ * Applies a procedure to each key of the receiver, if any.
+ * Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key".
+ * All methods which <i>can</i> be expressed in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this method, even if it is no particular order.
+ * This is necessary so that, for example, methods <tt>keys</tt> and <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachKey(LongProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each (key,value) pair of the receiver, if any.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEachPair(final LongObjectProcedure procedure) {
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+ }
+ return true;
+}
+/**
+ * Returns the value associated with the specified key.
+ * It is often a good idea to first check with {@link #containsKey(long)} whether the given key has a value associated or not, i.e. whether there exists an association for the given key or not.
+ *
+ * @param key the key to be searched for.
+ * @return the value associated with the specified key; <tt>null</tt> if no such key is present.
+ */
+public Object get(long key) {
+ int i = indexOfKey(key);
+ if (i<0) return null; //not contained
+ return values[i];
+}
+/**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained.
+ * Returns -index-1 if the key is already contained at slot index.
+ * Therefore, if the returned index < 0, then it is already contained at slot -index-1.
+ * If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
+ */
+protected int indexOfInsertion(long key) {
+ final long tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ if (stat[i] == FREE) i = j;
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i-1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
+}
+/**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+protected int indexOfKey(long key) {
+ final long tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == FREE) return -1; // not found
+ return i; //found, return index where key is contained
+}
+/**
+ * @param value the value to be searched in the receiver.
+ * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
+ */
+protected int indexOfValue(Object value) {
+ final Object val[] = values;
+ final byte stat[] = state;
+
+ for (int i=stat.length; --i >= 0;) {
+ if (stat[i]==FULL && val[i]==value) return i;
+ }
+
+ return -1; // not found
+}
+/**
+ * Returns the first key the given value is associated with.
+ * It is often a good idea to first check with {@link #containsValue(Object)} whether there exists an association from a key to this value.
+ * Search order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ *
+ * @param value the value to search for.
+ * @return the first key for which holds <tt>get(key) == value</tt>;
+ * returns <tt>Long.MIN_VALUE</tt> if no such key exists.
+ */
+public long keyOf(Object value) {
+ //returns the first key found; there may be more matching keys, however.
+ int i = indexOfValue(value);
+ if (i<0) return Long.MIN_VALUE;
+ return table[i];
+}
+/**
+ * Fills all keys contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void keys(LongArrayList list) {
+ list.setSize(distinct);
+ long[] elements = list.elements();
+
+ long[] tab = table;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = tab.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=tab[i];
+ }
+}
+/**
+Fills all pairs satisfying a given condition into the specified lists.
+Fills into the lists, starting at index 0.
+After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
+Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+<p>
+<b>Example:</b>
+<br>
+<pre>
+LongObjectProcedure condition = new LongObjectProcedure() { // match even keys only
+ public boolean apply(long key, Object value) { return key%2==0; }
+}
+keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
+</pre>
+
+@param condition the condition to be matched. Takes the current key as first and the current value as second argument.
+@param keyList the list to be filled with keys, can have any size.
+@param valueList the list to be filled with values, can have any size.
+*/
+public void pairsMatching(final LongObjectProcedure condition, final LongArrayList keyList, final ObjectArrayList valueList) {
+ keyList.clear();
+ valueList.clear();
+
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL && condition.apply(table[i],values[i])) {
+ keyList.add(table[i]);
+ valueList.add(values[i]);
+ }
+ }
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ * <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(long key, Object value) {
+ int i = indexOfInsertion(key);
+ if (i<0) { //already contained
+ i = -i -1;
+ this.values[i]=value;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ long oldTable[] = table;
+ Object oldValues[] = values;
+ byte oldState[] = state;
+
+ long newTable[] = new long[newCapacity];
+ Object newValues[] = new Object[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ long element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ }
+ }
+}
+/**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+public boolean removeKey(long key) {
+ int i = indexOfKey(key);
+ if (i<0) return false; // key not contained
+
+ this.state[i]=REMOVED;
+ this.values[i]=null; // delta
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Initializes the receiver.
+ *
+ * @param initialCapacity the initial capacity of the receiver.
+ * @param minLoadFactor the minLoadFactor of the receiver.
+ * @param maxLoadFactor the maxLoadFactor of the receiver.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+
+ this.table = new long[capacity];
+ this.values = new Object[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+ else this.maxLoadFactor = maxLoadFactor;
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluous internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int)(1 + 1.2*size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
+}
+/**
+ * Fills all values contained in the receiver into the specified list.
+ * Fills the list, starting at index 0.
+ * After this call returns the specified list has a new size that equals <tt>this.size()</tt>.
+ * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(LongProcedure)}.
+ * <p>
+ * This method can be used to iterate over the values of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+public void values(ObjectArrayList list) {
+ list.setSize(distinct);
+ Object[] elements = list.elements();
+
+ Object[] val = values;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = stat.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=val[i];
+ }
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,198 @@
+package org.apache.mahout.colt.map;
+
+/**
+ * Not of interest for users; only for implementors of hashtables.
+ * Used to keep hash table capacities prime numbers.
+ *
+ * <p>Choosing prime numbers as hash table capacities is a good idea to keep them working fast,
+ * particularly under hash table expansions.
+ *
+ * <p>However, JDK 1.2, JGL 3.1 and many other toolkits do nothing to keep capacities prime.
+ * This class provides efficient means to choose prime capacities.
+ *
+ * <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
+ * Memory requirements: 1 KB static memory.
+ *
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class PrimeFinder extends Object {
+ /**
+ * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
+ */
+ public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
+
+ /**
+ * The prime number list consists of 11 chunks.
+ * Each chunk contains prime numbers.
+ * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
+ * The next element is P3, for which the same holds with respect to P2, and so on.
+ *
+ * Chunks are chosen such that for any desired capacity >= 1000
+ * the list includes a prime number <= desired capacity * 1.11 (11%).
+ * For any desired capacity >= 200
+ * the list includes a prime number <= desired capacity * 1.16 (16%).
+ * For any desired capacity >= 16
+ * the list includes a prime number <= desired capacity * 1.21 (21%).
+ *
+ * Therefore, primes can be retrieved which are quite close to any desired capacity,
+ * which in turn avoids wasting memory.
+ * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
+ * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
+ *
+ * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
+ * If your hashtable has such a growthfactor then,
+ * after initially "rounding to a prime" upon hashtable construction,
+ * it will later expand to prime capacities such that there exist no better primes.
+ *
+ * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
+ * If you are stingy, then delete every second or fourth chunk.
+ */
+
+ private static final int[] primeCapacities = {
+ //chunk #0
+ largestPrime,
+
+ //chunk #1
+ 5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
+ 411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
+ 210719881,421439783,842879579,1685759167,
+
+ //chunk #2
+ 433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
+ 3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
+ 1854585413,
+
+ //chunk #3
+ 953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
+ 7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
+ 2004663929,
+
+ //chunk #4
+ 1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
+ 8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
+
+ //chunk #5
+ 31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
+ 1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
+ 587742049,1175484103,
+
+ //chunk #6
+ 599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
+ 4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
+
+ //chunk #7
+ 311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
+ 2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
+ 1344393353,
+
+ //chunk #8
+ 3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
+ 701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+ 359339171,718678369,1437356741,
+
+ //chunk #9
+ 43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
+ 1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
+ 759155483,1518310967,
+
+ //chunk #10
+ 379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
+ 3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
+ 1600153859
+ /*
+ // some more chunks for the low range [3..1000]
+ //chunk #11
+ 13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
+ 539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
+ 276172823,552345671,1104691373,
+
+ //chunk #12
+ 19,41,83,167,337,677,
+ //1361,2729,5471,10949,21911,43853,87719,175447,350899,
+ //701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+ //359339171,718678369,1437356741,
+
+ //chunk #13
+ 53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
+ 234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
+ 60045577,120091177,240182359,480364727,960729461,1921458943
+ */
+ };
+
+
+ static { //initializer
+ // The above prime numbers are formatted for human readability.
+ // To find numbers fast, we sort them once and for all.
+
+ java.util.Arrays.sort(primeCapacities);
+ //new org.apache.mahout.colt.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
+ }
+
+/**
+ * Makes this class non instantiable, but still let's others inherit from it.
+ */
+protected PrimeFinder() {}
+/**
+ * Tests correctness. Try
+ * from=1000, to=10000
+ * from=200, to=1000
+ * from=16, to=1000
+ * from=1000, to=Integer.MAX_VALUE
+ */
+protected static void main(String args[]) {
+ int from = Integer.parseInt(args[0]);
+ int to = Integer.parseInt(args[1]);
+
+ statistics(from,to);
+}
+/**
+ * Returns a prime number which is <code>>= desiredCapacity</code> and very close to <code>desiredCapacity</code> (within 11% if <code>desiredCapacity >= 1000</code>).
+ * @param desiredCapacity the capacity desired by the user.
+ * @return the capacity which should be used for a hashtable.
+ */
+public static int nextPrime(int desiredCapacity) {
+ int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
+ //int i = new org.apache.mahout.colt.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
+ if (i<0) {
+ // desired capacity not found, choose next prime greater than desired capacity
+ i = -i -1; // remember the semantics of binarySearch...
+ }
+ return primeCapacities[i];
+}
+/**
+ * Tests correctness.
+ */
+protected static void statistics(int from, int to) {
+ // check that primes contain no accidental errors
+ for (int i=0; i<primeCapacities.length-1; i++) {
+ if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
+ }
+
+ double accDeviation = 0.0;
+ double maxDeviation = - 1.0;
+
+ for (int i=from; i<=to; i++) {
+ int primeCapacity = nextPrime(i);
+ //System.out.println(primeCapacity);
+ double deviation = (primeCapacity - i) / (double)i;
+
+ if (deviation > maxDeviation) {
+ maxDeviation = deviation;
+ System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
+ }
+
+ accDeviation += deviation;
+ }
+ long width = 1 + (long)to - (long)from;
+
+ double meanDeviation = accDeviation/width;
+ System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
+ System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
+ System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,210 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.map;
+
+/**
+Status: Experimental; Do not use for production yet. Hash map holding (key,value) associations of type <tt>(int-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double hashing.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+
+Implements open addressing with double hashing, using "Brent's variation".
+Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+(It does not improve unsuccessful searches.)
+See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+
+@author wolfgang.hoschek@cern.ch
+@version 1.0, 09/24/99
+@see java.util.HashMap
+*/
+class QuickOpenIntIntHashMap extends OpenIntIntHashMap {
+ public int totalProbesSaved = 0; // benchmark only
+/**
+ * Constructs an empty map with default capacity and default load factors.
+ */
+public QuickOpenIntIntHashMap() {
+ this(defaultCapacity);
+}
+/**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param initialCapacity the initial capacity of the map.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero.
+ */
+public QuickOpenIntIntHashMap(int initialCapacity) {
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+}
+/**
+ * Constructs an empty map with
+ * the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param initialCapacity the initial capacity.
+ * @param minLoadFactor the minimum load factor.
+ * @param maxLoadFactor the maximum load factor.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ */
+public QuickOpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+}
+/**
+ * Associates the given key with the given value.
+ * Replaces any old <tt>(key,someOtherValue)</tt> association, if existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key;
+ * <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
+ */
+public boolean put(int key, int value) {
+ /*
+ This is open addressing with double hashing, using "Brent's variation".
+ Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+ (It does not improve unsuccessful searches.)
+ See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+
+ h1(key) = hash % M
+ h2(key) = decrement = Max(1, hash/M % M)
+ M is prime = capacity = table.length
+ probing positions are table[(h1-j*h2) % M] for j=0,1,...
+ (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
+ */
+
+ int key0;
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+ //System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ //int comp = comparisons;
+ int t = 0; // the number of probes
+ int p0 = i; // the first position to probe
+ while (stat[i] == FULL && tab[i] != key) {
+ t++;
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ //if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ this.values[i]=value;
+ return false;
+ }
+ // not already contained, should be inserted at slot i.
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+
+ //System.out.print("grow rehashing ");
+ //System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ /*
+ Brent's variation does a local reorganization to reduce probes. It essentially means:
+ We test whether it is possible to move the association we probed first (table[p0]) out of the way.
+ If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
+ However, future probes for the key that we move out of the way will increase.
+ Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
+ For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
+ If the first probe cannot be moved out of the way, we try the next probe (p1).
+ Now we safe more than we loose if t>=3.
+ We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
+
+ Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
+ */
+ while (t>1) {
+ //System.out.println("t="+t);
+ key0 = tab[p0];
+ hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
+ decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+ int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
+ if (pc<0) pc += length;
+
+ if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
+ p0 = pc;
+ t--;
+ }
+ else { // free or removed slot found, now move...
+ //System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
+ this.totalProbesSaved += (t-1);
+ tab[pc] = key0;
+ stat[pc] = FULL;
+ values[pc] = values[p0];
+ i = p0; // prepare to insert: table[p0]=key
+ t = 0; // break loop
+ }
+ }
+
+ //System.out.println("inserting at i="+i);
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+}
+/**
+ * Rehashes the contents of the receiver into a new table
+ * with a smaller or larger capacity.
+ * This method is called automatically when the
+ * number of keys in the receiver exceeds the high water mark or falls below the low water mark.
+ */
+protected void rehash(int newCapacity) {
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ int oldTable[] = table;
+ int oldValues[] = values;
+ byte oldState[] = state;
+
+ int newTable[] = new int[newCapacity];
+ int newValues[] = new int[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ int tmp = this.distinct;
+ this.distinct = Integer.MIN_VALUE; // switch of watermarks
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ put(oldTable[i], oldValues[i]);
+ /*
+ int element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ */
+ }
+ }
+ this.distinct = tmp;
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
------------------------------------------------------------------------------
svn:eol-style = native