You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/25 04:36:02 UTC
svn commit: r883973 [14/14] - in
/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix: ./
bitvector/ buffer/ function/ list/ list/adapter/ map/
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenIntObjectHashMap.java Wed Nov 25 03:35:59 2009
@@ -21,47 +21,47 @@
@author wolfgang.hoschek@cern.ch
@version 1.0, 09/24/99
-@see java.util.HashMap
+@see java.util.HashMap
*/
/**
* @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
*/
@Deprecated
public class OpenIntObjectHashMap extends AbstractIntObjectMap {
- /**
- * The hash table keys.
- * @serial
- */
- protected int table[];
-
- /**
- * The hash table values.
- * @serial
- */
- protected Object values[];
-
- /**
- * The state of each hash table entry (FREE, FULL, REMOVED).
- * @serial
- */
- protected byte state[];
-
- /**
- * The number of table entries in state==FREE.
- * @serial
- */
- protected int freeEntries;
-
-
- protected static final byte FREE = 0;
- protected static final byte FULL = 1;
- protected static final byte REMOVED = 2;
+ /**
+ * The hash table keys.
+ * @serial
+ */
+ protected int table[];
+
+ /**
+ * The hash table values.
+ * @serial
+ */
+ protected Object values[];
+
+ /**
+ * The state of each hash table entry (FREE, FULL, REMOVED).
+ * @serial
+ */
+ protected byte state[];
+
+ /**
+ * The number of table entries in state==FREE.
+ * @serial
+ */
+ protected int freeEntries;
+
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
/**
* Constructs an empty map with default capacity and default load factors.
*/
public OpenIntObjectHashMap() {
- this(defaultCapacity);
+ this(defaultCapacity);
}
/**
* Constructs an empty map with the specified initial capacity and default load factors.
@@ -71,7 +71,7 @@
* than zero.
*/
public OpenIntObjectHashMap(int initialCapacity) {
- this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
}
/**
* Constructs an empty map with
@@ -80,22 +80,22 @@
* @param initialCapacity the initial capacity.
* @param minLoadFactor the minimum load factor.
* @param maxLoadFactor the maximum load factor.
- * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
*/
public OpenIntObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
}
/**
* Removes all (key,value) associations from the receiver.
* Implicitly calls <tt>trimToSize()</tt>.
*/
public void clear() {
- new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
- new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+ new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+ new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
- this.distinct = 0;
- this.freeEntries = table.length; // delta
- trimToSize();
+ this.distinct = 0;
+ this.freeEntries = table.length; // delta
+ trimToSize();
}
/**
* Returns a deep copy of the receiver.
@@ -103,11 +103,11 @@
* @return a deep copy of the receiver.
*/
public Object clone() {
- OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
- copy.table = (int[]) copy.table.clone();
- copy.values = (Object[]) copy.values.clone();
- copy.state = (byte[]) copy.state.clone();
- return copy;
+ OpenIntObjectHashMap copy = (OpenIntObjectHashMap) super.clone();
+ copy.table = (int[]) copy.table.clone();
+ copy.values = (Object[]) copy.values.clone();
+ copy.state = (byte[]) copy.state.clone();
+ return copy;
}
/**
* Returns <tt>true</tt> if the receiver contains the specified key.
@@ -115,7 +115,7 @@
* @return <tt>true</tt> if the receiver contains the specified key.
*/
public boolean containsKey(int key) {
- return indexOfKey(key) >= 0;
+ return indexOfKey(key) >= 0;
}
/**
* Returns <tt>true</tt> if the receiver contains the specified value.
@@ -123,7 +123,7 @@
* @return <tt>true</tt> if the receiver contains the specified value.
*/
public boolean containsValue(Object value) {
- return indexOfValue(value) >= 0;
+ return indexOfValue(value) >= 0;
}
/**
* Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
@@ -136,10 +136,10 @@
* @param minCapacity the desired minimum capacity.
*/
public void ensureCapacity(int minCapacity) {
- if (table.length < minCapacity) {
- int newCapacity = nextPrime(minCapacity);
- rehash(newCapacity);
- }
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
}
/**
* Applies a procedure to each key of the receiver, if any.
@@ -152,10 +152,10 @@
* @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
*/
public boolean forEachKey(IntProcedure procedure) {
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
- }
- return true;
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+ }
+ return true;
}
/**
* Applies a procedure to each (key,value) pair of the receiver, if any.
@@ -165,10 +165,10 @@
* @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
*/
public boolean forEachPair(final IntObjectProcedure procedure) {
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
- }
- return true;
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+ }
+ return true;
}
/**
* Returns the value associated with the specified key.
@@ -178,9 +178,9 @@
* @return the value associated with the specified key; <tt>null</tt> if no such key is present.
*/
public Object get(int key) {
- int i = indexOfKey(key);
- if (i<0) return null; //not contained
- return values[i];
+ int i = indexOfKey(key);
+ if (i<0) return null; //not contained
+ return values[i];
}
/**
* @param key the key to be added to the receiver.
@@ -190,86 +190,86 @@
* If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
*/
protected int indexOfInsertion(int key) {
- final int tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
-
- final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
- int i = hash % length;
- int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
- //int decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
-
- // stop if we find a removed or free slot, or if we find the key itself
- // do NOT skip over removed slots (yes, open addressing is like that...)
- while (stat[i] == FULL && tab[i] != key) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
-
- if (stat[i] == REMOVED) {
- // stop if we find a free slot, or if we find the key itself.
- // do skip over removed slots (yes, open addressing is like that...)
- // assertion: there is at least one FREE slot.
- int j = i;
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
- if (stat[i] == FREE) i = j;
- }
-
-
- if (stat[i] == FULL) {
- // key already contained at slot i.
- // return a negative number identifying the slot.
- return -i-1;
- }
- // not already contained, should be inserted at slot i.
- // return a number >= 0 identifying the slot.
- return i;
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ if (stat[i] == FREE) i = j;
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i-1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
}
/**
* @param key the key to be searched in the receiver.
* @return the index where the key is contained in the receiver, returns -1 if the key was not found.
*/
protected int indexOfKey(int key) {
- final int tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
-
- final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
- int i = hash % length;
- int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
- //int decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
-
- // stop if we find a free slot, or if we find the key itself.
- // do skip over removed slots (yes, open addressing is like that...)
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
-
- if (stat[i] == FREE) return -1; // not found
- return i; //found, return index where key is contained
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == FREE) return -1; // not found
+ return i; //found, return index where key is contained
}
/**
* @param value the value to be searched in the receiver.
* @return the index where the value is contained in the receiver, returns -1 if the value was not found.
*/
protected int indexOfValue(Object value) {
- final Object val[] = values;
- final byte stat[] = state;
+ final Object val[] = values;
+ final byte stat[] = state;
- for (int i=stat.length; --i >= 0;) {
- if (stat[i]==FULL && val[i]==value) return i;
- }
+ for (int i=stat.length; --i >= 0;) {
+ if (stat[i]==FULL && val[i]==value) return i;
+ }
- return -1; // not found
+ return -1; // not found
}
/**
* Returns the first key the given value is associated with.
@@ -278,13 +278,13 @@
*
* @param value the value to search for.
* @return the first key for which holds <tt>get(key) == value</tt>;
- * returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
+ * returns <tt>Integer.MIN_VALUE</tt> if no such key exists.
*/
public int keyOf(Object value) {
- //returns the first key found; there may be more matching keys, however.
- int i = indexOfValue(value);
- if (i<0) return Integer.MIN_VALUE;
- return table[i];
+ //returns the first key found; there may be more matching keys, however.
+ int i = indexOfValue(value);
+ if (i<0) return Integer.MIN_VALUE;
+ return table[i];
}
/**
* Fills all keys contained in the receiver into the specified list.
@@ -297,16 +297,16 @@
* @param list the list to be filled, can have any size.
*/
public void keys(IntArrayList list) {
- list.setSize(distinct);
- int[] elements = list.elements();
-
- int[] tab = table;
- byte[] stat = state;
-
- int j=0;
- for (int i = tab.length ; i-- > 0 ;) {
- if (stat[i]==FULL) elements[j++]=tab[i];
- }
+ list.setSize(distinct);
+ int[] elements = list.elements();
+
+ int[] tab = table;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = tab.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=tab[i];
+ }
}
/**
Fills all pairs satisfying a given condition into the specified lists.
@@ -318,7 +318,7 @@
<br>
<pre>
IntObjectProcedure condition = new IntObjectProcedure() { // match even keys only
- public boolean apply(int key, Object value) { return key%2==0; }
+ public boolean apply(int key, Object value) { return key%2==0; }
}
keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
</pre>
@@ -328,15 +328,15 @@
@param valueList the list to be filled with values, can have any size.
*/
public void pairsMatching(final IntObjectProcedure condition, final IntArrayList keyList, final ObjectArrayList valueList) {
- keyList.clear();
- valueList.clear();
-
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL && condition.apply(table[i],values[i])) {
- keyList.add(table[i]);
- valueList.add(values[i]);
- }
- }
+ keyList.clear();
+ valueList.clear();
+
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL && condition.apply(table[i],values[i])) {
+ keyList.add(table[i]);
+ valueList.add(values[i]);
+ }
+ }
}
/**
* Associates the given key with the given value.
@@ -348,31 +348,31 @@
* <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
*/
public boolean put(int key, Object value) {
- int i = indexOfInsertion(key);
- if (i<0) { //already contained
- i = -i -1;
- this.values[i]=value;
- return false;
- }
-
- if (this.distinct > this.highWaterMark) {
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- return put(key, value);
- }
-
- this.table[i]=key;
- this.values[i]=value;
- if (this.state[i]==FREE) this.freeEntries--;
- this.state[i]=FULL;
- this.distinct++;
-
- if (this.freeEntries < 1) { //delta
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- }
+ int i = indexOfInsertion(key);
+ if (i<0) { //already contained
+ i = -i -1;
+ this.values[i]=value;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
- return true;
+ return true;
}
/**
* Rehashes the contents of the receiver into a new table
@@ -381,34 +381,34 @@
* number of keys in the receiver exceeds the high water mark or falls below the low water mark.
*/
protected void rehash(int newCapacity) {
- int oldCapacity = table.length;
- //if (oldCapacity == newCapacity) return;
-
- int oldTable[] = table;
- Object oldValues[] = values;
- byte oldState[] = state;
-
- int newTable[] = new int[newCapacity];
- Object newValues[] = new Object[newCapacity];
- byte newState[] = new byte[newCapacity];
-
- this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
- this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
- this.table = newTable;
- this.values = newValues;
- this.state = newState;
- this.freeEntries = newCapacity-this.distinct; // delta
-
- for (int i = oldCapacity ; i-- > 0 ;) {
- if (oldState[i]==FULL) {
- int element = oldTable[i];
- int index = indexOfInsertion(element);
- newTable[index]=element;
- newValues[index]=oldValues[i];
- newState[index]=FULL;
- }
- }
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ int oldTable[] = table;
+ Object oldValues[] = values;
+ byte oldState[] = state;
+
+ int newTable[] = new int[newCapacity];
+ Object newValues[] = new Object[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ int element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ }
+ }
}
/**
* Removes the given key with its associated element from the receiver, if present.
@@ -417,19 +417,19 @@
* @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
*/
public boolean removeKey(int key) {
- int i = indexOfKey(key);
- if (i<0) return false; // key not contained
+ int i = indexOfKey(key);
+ if (i<0) return false; // key not contained
- this.state[i]=REMOVED;
- this.values[i]=null; // delta
- this.distinct--;
-
- if (this.distinct < this.lowWaterMark) {
- int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- }
-
- return true;
+ this.state[i]=REMOVED;
+ this.values[i]=null; // delta
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
}
/**
* Initializes the receiver.
@@ -437,32 +437,32 @@
* @param initialCapacity the initial capacity of the receiver.
* @param minLoadFactor the minLoadFactor of the receiver.
* @param maxLoadFactor the maxLoadFactor of the receiver.
- * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
*/
protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- int capacity = initialCapacity;
- super.setUp(capacity, minLoadFactor, maxLoadFactor);
- capacity = nextPrime(capacity);
- if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
-
- this.table = new int[capacity];
- this.values = new Object[capacity];
- this.state = new byte[capacity];
-
- // memory will be exhausted long before this pathological case happens, anyway.
- this.minLoadFactor = minLoadFactor;
- if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
- else this.maxLoadFactor = maxLoadFactor;
-
- this.distinct = 0;
- this.freeEntries = capacity; // delta
-
- // lowWaterMark will be established upon first expansion.
- // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
- // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
- // See ensureCapacity(...)
- this.lowWaterMark = 0;
- this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+
+ this.table = new int[capacity];
+ this.values = new Object[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+ else this.maxLoadFactor = maxLoadFactor;
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
}
/**
* Trims the capacity of the receiver to be the receiver's current
@@ -470,12 +470,12 @@
* storage of the receiver.
*/
public void trimToSize() {
- // * 1.2 because open addressing's performance exponentially degrades beyond that point
- // so that even rehashing the table can take very long
- int newCapacity = nextPrime((int)(1 + 1.2*size()));
- if (table.length > newCapacity) {
- rehash(newCapacity);
- }
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int)(1 + 1.2*size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
}
/**
* Fills all values contained in the receiver into the specified list.
@@ -488,15 +488,15 @@
* @param list the list to be filled, can have any size.
*/
public void values(ObjectArrayList list) {
- list.setSize(distinct);
- Object[] elements = list.elements();
-
- Object[] val = values;
- byte[] stat = state;
-
- int j=0;
- for (int i = stat.length ; i-- > 0 ;) {
- if (stat[i]==FULL) elements[j++]=val[i];
- }
+ list.setSize(distinct);
+ Object[] elements = list.elements();
+
+ Object[] val = values;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = stat.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=val[i];
+ }
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/OpenLongObjectHashMap.java Wed Nov 25 03:35:59 2009
@@ -21,47 +21,47 @@
@author wolfgang.hoschek@cern.ch
@version 1.0, 09/24/99
-@see java.util.HashMap
+@see java.util.HashMap
*/
/**
* @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
*/
@Deprecated
public class OpenLongObjectHashMap extends AbstractLongObjectMap {
- /**
- * The hash table keys.
- * @serial
- */
- protected long table[];
-
- /**
- * The hash table values.
- * @serial
- */
- protected Object values[];
-
- /**
- * The state of each hash table entry (FREE, FULL, REMOVED).
- * @serial
- */
- protected byte state[];
-
- /**
- * The number of table entries in state==FREE.
- * @serial
- */
- protected int freeEntries;
-
-
- protected static final byte FREE = 0;
- protected static final byte FULL = 1;
- protected static final byte REMOVED = 2;
+ /**
+ * The hash table keys.
+ * @serial
+ */
+ protected long table[];
+
+ /**
+ * The hash table values.
+ * @serial
+ */
+ protected Object values[];
+
+ /**
+ * The state of each hash table entry (FREE, FULL, REMOVED).
+ * @serial
+ */
+ protected byte state[];
+
+ /**
+ * The number of table entries in state==FREE.
+ * @serial
+ */
+ protected int freeEntries;
+
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
/**
* Constructs an empty map with default capacity and default load factors.
*/
public OpenLongObjectHashMap() {
- this(defaultCapacity);
+ this(defaultCapacity);
}
/**
* Constructs an empty map with the specified initial capacity and default load factors.
@@ -71,7 +71,7 @@
* than zero.
*/
public OpenLongObjectHashMap(int initialCapacity) {
- this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
}
/**
* Constructs an empty map with
@@ -80,22 +80,22 @@
* @param initialCapacity the initial capacity.
* @param minLoadFactor the minimum load factor.
* @param maxLoadFactor the maximum load factor.
- * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
*/
public OpenLongObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
}
/**
* Removes all (key,value) associations from the receiver.
* Implicitly calls <tt>trimToSize()</tt>.
*/
public void clear() {
- new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
- new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
+ new ByteArrayList(this.state).fillFromToWith(0, this.state.length-1, FREE);
+ new ObjectArrayList(values).fillFromToWith(0, state.length-1, null); // delta
- this.distinct = 0;
- this.freeEntries = table.length; // delta
- trimToSize();
+ this.distinct = 0;
+ this.freeEntries = table.length; // delta
+ trimToSize();
}
/**
* Returns a deep copy of the receiver.
@@ -103,11 +103,11 @@
* @return a deep copy of the receiver.
*/
public Object clone() {
- OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
- copy.table = (long[]) copy.table.clone();
- copy.values = (Object[]) copy.values.clone();
- copy.state = (byte[]) copy.state.clone();
- return copy;
+ OpenLongObjectHashMap copy = (OpenLongObjectHashMap) super.clone();
+ copy.table = (long[]) copy.table.clone();
+ copy.values = (Object[]) copy.values.clone();
+ copy.state = (byte[]) copy.state.clone();
+ return copy;
}
/**
* Returns <tt>true</tt> if the receiver contains the specified key.
@@ -115,7 +115,7 @@
* @return <tt>true</tt> if the receiver contains the specified key.
*/
public boolean containsKey(long key) {
- return indexOfKey(key) >= 0;
+ return indexOfKey(key) >= 0;
}
/**
* Returns <tt>true</tt> if the receiver contains the specified value.
@@ -123,7 +123,7 @@
* @return <tt>true</tt> if the receiver contains the specified value.
*/
public boolean containsValue(Object value) {
- return indexOfValue(value) >= 0;
+ return indexOfValue(value) >= 0;
}
/**
* Ensures that the receiver can hold at least the specified number of associations without needing to allocate new internal memory.
@@ -136,10 +136,10 @@
* @param minCapacity the desired minimum capacity.
*/
public void ensureCapacity(int minCapacity) {
- if (table.length < minCapacity) {
- int newCapacity = nextPrime(minCapacity);
- rehash(newCapacity);
- }
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
}
/**
* Applies a procedure to each key of the receiver, if any.
@@ -152,10 +152,10 @@
* @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
*/
public boolean forEachKey(LongProcedure procedure) {
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
- }
- return true;
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i])) return false;
+ }
+ return true;
}
/**
* Applies a procedure to each (key,value) pair of the receiver, if any.
@@ -165,10 +165,10 @@
* @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
*/
public boolean forEachPair(final LongObjectProcedure procedure) {
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
- }
- return true;
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL) if (! procedure.apply(table[i],values[i])) return false;
+ }
+ return true;
}
/**
* Returns the value associated with the specified key.
@@ -178,9 +178,9 @@
* @return the value associated with the specified key; <tt>null</tt> if no such key is present.
*/
public Object get(long key) {
- int i = indexOfKey(key);
- if (i<0) return null; //not contained
- return values[i];
+ int i = indexOfKey(key);
+ if (i<0) return null; //not contained
+ return values[i];
}
/**
* @param key the key to be added to the receiver.
@@ -190,86 +190,86 @@
* If the returned index >= 0, then it is NOT already contained and should be inserted at slot index.
*/
protected int indexOfInsertion(long key) {
- final long tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
-
- final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
- int i = hash % length;
- int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
- //int decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
-
- // stop if we find a removed or free slot, or if we find the key itself
- // do NOT skip over removed slots (yes, open addressing is like that...)
- while (stat[i] == FULL && tab[i] != key) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
-
- if (stat[i] == REMOVED) {
- // stop if we find a free slot, or if we find the key itself.
- // do skip over removed slots (yes, open addressing is like that...)
- // assertion: there is at least one FREE slot.
- int j = i;
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
- if (stat[i] == FREE) i = j;
- }
-
-
- if (stat[i] == FULL) {
- // key already contained at slot i.
- // return a negative number identifying the slot.
- return -i-1;
- }
- // not already contained, should be inserted at slot i.
- // return a number >= 0 identifying the slot.
- return i;
+ final long tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ if (stat[i] == FREE) i = j;
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i-1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
}
/**
* @param key the key to be searched in the receiver.
* @return the index where the key is contained in the receiver, returns -1 if the key was not found.
*/
protected int indexOfKey(long key) {
- final long tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
-
- final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
- int i = hash % length;
- int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
- //int decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
-
- // stop if we find a free slot, or if we find the key itself.
- // do skip over removed slots (yes, open addressing is like that...)
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
-
- if (stat[i] == FREE) return -1; // not found
- return i; //found, return index where key is contained
+ final long tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length-2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+
+ if (stat[i] == FREE) return -1; // not found
+ return i; //found, return index where key is contained
}
/**
* @param value the value to be searched in the receiver.
* @return the index where the value is contained in the receiver, returns -1 if the value was not found.
*/
protected int indexOfValue(Object value) {
- final Object val[] = values;
- final byte stat[] = state;
+ final Object val[] = values;
+ final byte stat[] = state;
- for (int i=stat.length; --i >= 0;) {
- if (stat[i]==FULL && val[i]==value) return i;
- }
+ for (int i=stat.length; --i >= 0;) {
+ if (stat[i]==FULL && val[i]==value) return i;
+ }
- return -1; // not found
+ return -1; // not found
}
/**
* Returns the first key the given value is associated with.
@@ -278,13 +278,13 @@
*
* @param value the value to search for.
* @return the first key for which holds <tt>get(key) == value</tt>;
- * returns <tt>Long.MIN_VALUE</tt> if no such key exists.
+ * returns <tt>Long.MIN_VALUE</tt> if no such key exists.
*/
public long keyOf(Object value) {
- //returns the first key found; there may be more matching keys, however.
- int i = indexOfValue(value);
- if (i<0) return Long.MIN_VALUE;
- return table[i];
+ //returns the first key found; there may be more matching keys, however.
+ int i = indexOfValue(value);
+ if (i<0) return Long.MIN_VALUE;
+ return table[i];
}
/**
* Fills all keys contained in the receiver into the specified list.
@@ -297,16 +297,16 @@
* @param list the list to be filled, can have any size.
*/
public void keys(LongArrayList list) {
- list.setSize(distinct);
- long[] elements = list.elements();
-
- long[] tab = table;
- byte[] stat = state;
-
- int j=0;
- for (int i = tab.length ; i-- > 0 ;) {
- if (stat[i]==FULL) elements[j++]=tab[i];
- }
+ list.setSize(distinct);
+ long[] elements = list.elements();
+
+ long[] tab = table;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = tab.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=tab[i];
+ }
}
/**
Fills all pairs satisfying a given condition into the specified lists.
@@ -318,7 +318,7 @@
<br>
<pre>
LongObjectProcedure condition = new LongObjectProcedure() { // match even keys only
- public boolean apply(long key, Object value) { return key%2==0; }
+ public boolean apply(long key, Object value) { return key%2==0; }
}
keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
</pre>
@@ -328,15 +328,15 @@
@param valueList the list to be filled with values, can have any size.
*/
public void pairsMatching(final LongObjectProcedure condition, final LongArrayList keyList, final ObjectArrayList valueList) {
- keyList.clear();
- valueList.clear();
-
- for (int i = table.length ; i-- > 0 ;) {
- if (state[i]==FULL && condition.apply(table[i],values[i])) {
- keyList.add(table[i]);
- valueList.add(values[i]);
- }
- }
+ keyList.clear();
+ valueList.clear();
+
+ for (int i = table.length ; i-- > 0 ;) {
+ if (state[i]==FULL && condition.apply(table[i],values[i])) {
+ keyList.add(table[i]);
+ valueList.add(values[i]);
+ }
+ }
}
/**
* Associates the given key with the given value.
@@ -348,31 +348,31 @@
* <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
*/
public boolean put(long key, Object value) {
- int i = indexOfInsertion(key);
- if (i<0) { //already contained
- i = -i -1;
- this.values[i]=value;
- return false;
- }
-
- if (this.distinct > this.highWaterMark) {
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- return put(key, value);
- }
-
- this.table[i]=key;
- this.values[i]=value;
- if (this.state[i]==FREE) this.freeEntries--;
- this.state[i]=FULL;
- this.distinct++;
-
- if (this.freeEntries < 1) { //delta
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- }
+ int i = indexOfInsertion(key);
+ if (i<0) { //already contained
+ i = -i -1;
+ this.values[i]=value;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
- return true;
+ return true;
}
/**
* Rehashes the contents of the receiver into a new table
@@ -381,34 +381,34 @@
* number of keys in the receiver exceeds the high water mark or falls below the low water mark.
*/
protected void rehash(int newCapacity) {
- int oldCapacity = table.length;
- //if (oldCapacity == newCapacity) return;
-
- long oldTable[] = table;
- Object oldValues[] = values;
- byte oldState[] = state;
-
- long newTable[] = new long[newCapacity];
- Object newValues[] = new Object[newCapacity];
- byte newState[] = new byte[newCapacity];
-
- this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
- this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
- this.table = newTable;
- this.values = newValues;
- this.state = newState;
- this.freeEntries = newCapacity-this.distinct; // delta
-
- for (int i = oldCapacity ; i-- > 0 ;) {
- if (oldState[i]==FULL) {
- long element = oldTable[i];
- int index = indexOfInsertion(element);
- newTable[index]=element;
- newValues[index]=oldValues[i];
- newState[index]=FULL;
- }
- }
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ long oldTable[] = table;
+ Object oldValues[] = values;
+ byte oldState[] = state;
+
+ long newTable[] = new long[newCapacity];
+ Object newValues[] = new Object[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ long element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ }
+ }
}
/**
* Removes the given key with its associated element from the receiver, if present.
@@ -417,19 +417,19 @@
* @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
*/
public boolean removeKey(long key) {
- int i = indexOfKey(key);
- if (i<0) return false; // key not contained
+ int i = indexOfKey(key);
+ if (i<0) return false; // key not contained
- this.state[i]=REMOVED;
- this.values[i]=null; // delta
- this.distinct--;
-
- if (this.distinct < this.lowWaterMark) {
- int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- }
-
- return true;
+ this.state[i]=REMOVED;
+ this.values[i]=null; // delta
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
}
/**
* Initializes the receiver.
@@ -437,32 +437,32 @@
* @param initialCapacity the initial capacity of the receiver.
* @param minLoadFactor the minLoadFactor of the receiver.
* @param maxLoadFactor the maxLoadFactor of the receiver.
- * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
*/
protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- int capacity = initialCapacity;
- super.setUp(capacity, minLoadFactor, maxLoadFactor);
- capacity = nextPrime(capacity);
- if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
-
- this.table = new long[capacity];
- this.values = new Object[capacity];
- this.state = new byte[capacity];
-
- // memory will be exhausted long before this pathological case happens, anyway.
- this.minLoadFactor = minLoadFactor;
- if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
- else this.maxLoadFactor = maxLoadFactor;
-
- this.distinct = 0;
- this.freeEntries = capacity; // delta
-
- // lowWaterMark will be established upon first expansion.
- // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
- // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
- // See ensureCapacity(...)
- this.lowWaterMark = 0;
- this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity==0) capacity=1; // open addressing needs at least one FREE slot at any time.
+
+ this.table = new long[capacity];
+ this.values = new Object[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) this.maxLoadFactor = 1.0;
+ else this.maxLoadFactor = maxLoadFactor;
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
}
/**
* Trims the capacity of the receiver to be the receiver's current
@@ -470,12 +470,12 @@
* storage of the receiver.
*/
public void trimToSize() {
- // * 1.2 because open addressing's performance exponentially degrades beyond that point
- // so that even rehashing the table can take very long
- int newCapacity = nextPrime((int)(1 + 1.2*size()));
- if (table.length > newCapacity) {
- rehash(newCapacity);
- }
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int)(1 + 1.2*size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
}
/**
* Fills all values contained in the receiver into the specified list.
@@ -488,15 +488,15 @@
* @param list the list to be filled, can have any size.
*/
public void values(ObjectArrayList list) {
- list.setSize(distinct);
- Object[] elements = list.elements();
-
- Object[] val = values;
- byte[] stat = state;
-
- int j=0;
- for (int i = stat.length ; i-- > 0 ;) {
- if (stat[i]==FULL) elements[j++]=val[i];
- }
+ list.setSize(distinct);
+ Object[] elements = list.elements();
+
+ Object[] val = values;
+ byte[] stat = state;
+
+ int j=0;
+ for (int i = stat.length ; i-- > 0 ;) {
+ if (stat[i]==FULL) elements[j++]=val[i];
+ }
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/PrimeFinder.java Wed Nov 25 03:35:59 2009
@@ -13,126 +13,124 @@
* <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
* Memory requirements: 1 KB static memory.
*
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
*/
/**
* @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
*/
@Deprecated
public class PrimeFinder extends Object {
- /**
- * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
- */
- public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
+ /**
+ * The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>.
+ */
+ public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
- /**
- * The prime number list consists of 11 chunks.
- * Each chunk contains prime numbers.
- * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
- * The next element is P3, for which the same holds with respect to P2, and so on.
- *
- * Chunks are chosen such that for any desired capacity >= 1000
- * the list includes a prime number <= desired capacity * 1.11 (11%).
- * For any desired capacity >= 200
- * the list includes a prime number <= desired capacity * 1.16 (16%).
- * For any desired capacity >= 16
- * the list includes a prime number <= desired capacity * 1.21 (21%).
- *
- * Therefore, primes can be retrieved which are quite close to any desired capacity,
- * which in turn avoids wasting memory.
- * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
- * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
- *
- * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
- * If your hashtable has such a growthfactor then,
- * after initially "rounding to a prime" upon hashtable construction,
- * it will later expand to prime capacities such that there exist no better primes.
- *
- * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
- * If you are stingy, then delete every second or fourth chunk.
- */
-
- private static final int[] primeCapacities = {
- //chunk #0
- largestPrime,
-
- //chunk #1
- 5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
- 411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
- 210719881,421439783,842879579,1685759167,
-
- //chunk #2
- 433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
- 3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
- 1854585413,
-
- //chunk #3
- 953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
- 7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
- 2004663929,
-
- //chunk #4
- 1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
- 8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
-
- //chunk #5
- 31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
- 1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
- 587742049,1175484103,
-
- //chunk #6
- 599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
- 4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
-
- //chunk #7
- 311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
- 2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
- 1344393353,
-
- //chunk #8
- 3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
- 701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
- 359339171,718678369,1437356741,
-
- //chunk #9
- 43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
- 1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
- 759155483,1518310967,
-
- //chunk #10
- 379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
- 3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
- 1600153859
- /*
- // some more chunks for the low range [3..1000]
- //chunk #11
- 13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
- 539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
- 276172823,552345671,1104691373,
-
- //chunk #12
- 19,41,83,167,337,677,
- //1361,2729,5471,10949,21911,43853,87719,175447,350899,
- //701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
- //359339171,718678369,1437356741,
-
- //chunk #13
- 53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
- 234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
- 60045577,120091177,240182359,480364727,960729461,1921458943
- */
- };
-
+ /**
+ * The prime number list consists of 11 chunks.
+ * Each chunk contains prime numbers.
+ * A chunk starts with a prime P1. The next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1.
+ * The next element is P3, for which the same holds with respect to P2, and so on.
+ *
+ * Chunks are chosen such that for any desired capacity >= 1000
+ * the list includes a prime number <= desired capacity * 1.11 (11%).
+ * For any desired capacity >= 200
+ * the list includes a prime number <= desired capacity * 1.16 (16%).
+ * For any desired capacity >= 16
+ * the list includes a prime number <= desired capacity * 1.21 (21%).
+ *
+ * Therefore, primes can be retrieved which are quite close to any desired capacity,
+ * which in turn avoids wasting memory.
+ * For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081.
+ * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154.
+ *
+ * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0;
+ * If your hashtable has such a growthfactor then,
+ * after initially "rounding to a prime" upon hashtable construction,
+ * it will later expand to prime capacities such that there exist no better primes.
+ *
+ * In total these are about 32*10=320 numbers -> 1 KB of static memory needed.
+ * If you are stingy, then delete every second or fourth chunk.
+ */
+
+ private static final int[] primeCapacities = {
+ //chunk #0
+ largestPrime,
+
+ //chunk #1
+ 5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
+ 411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
+ 210719881,421439783,842879579,1685759167,
+
+ //chunk #2
+ 433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
+ 3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
+ 1854585413,
+
+ //chunk #3
+ 953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
+ 7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
+ 2004663929,
+
+ //chunk #4
+ 1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
+ 8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
+
+ //chunk #5
+ 31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
+ 1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
+ 587742049,1175484103,
+
+ //chunk #6
+ 599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
+ 4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
+
+ //chunk #7
+ 311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
+ 2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
+ 1344393353,
+
+ //chunk #8
+ 3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
+ 701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+ 359339171,718678369,1437356741,
+
+ //chunk #9
+ 43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
+ 1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
+ 759155483,1518310967,
+
+ //chunk #10
+ 379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
+ 3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
+ 1600153859
+ /*
+ // some more chunks for the low range [3..1000]
+ //chunk #11
+ 13,29,59,127,257,521,1049,2099,4201,8419,16843,33703,67409,134837,269683,
+ 539389,1078787,2157587,4315183,8630387,17260781,34521589,69043189,138086407,
+ 276172823,552345671,1104691373,
+
+ //chunk #12
+ 19,41,83,167,337,677,
+ //1361,2729,5471,10949,21911,43853,87719,175447,350899,
+ //701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
+ //359339171,718678369,1437356741,
+
+ //chunk #13
+ 53,107,223,449,907,1823,3659,7321,14653,29311,58631,117269,
+ 234539,469099,938207,1876417,3752839,7505681,15011389,30022781,
+ 60045577,120091177,240182359,480364727,960729461,1921458943
+ */
+ };
+
- static { //initializer
- // The above prime numbers are formatted for human readability.
- // To find numbers fast, we sort them once and for all.
-
- java.util.Arrays.sort(primeCapacities);
- //new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
- }
-
+ static { //initializer
+ // The above prime numbers are formatted for human readability.
+ // To find numbers fast, we sort them once and for all.
+
+ java.util.Arrays.sort(primeCapacities);
+ //new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).mergeSort(); // for debug only, TODO
+ }
+
/**
* Makes this class non instantiable, but still let's others inherit from it.
*/
@@ -145,10 +143,10 @@
* from=1000, to=Integer.MAX_VALUE
*/
protected static void main(String args[]) {
- int from = Integer.parseInt(args[0]);
- int to = Integer.parseInt(args[1]);
-
- statistics(from,to);
+ int from = Integer.parseInt(args[0]);
+ int to = Integer.parseInt(args[1]);
+
+ statistics(from,to);
}
/**
* Returns a prime number which is <code>>= desiredCapacity</code> and very close to <code>desiredCapacity</code> (within 11% if <code>desiredCapacity >= 1000</code>).
@@ -156,43 +154,43 @@
* @return the capacity which should be used for a hashtable.
*/
public static int nextPrime(int desiredCapacity) {
- int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
- //int i = new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
- if (i<0) {
- // desired capacity not found, choose next prime greater than desired capacity
- i = -i -1; // remember the semantics of binarySearch...
- }
- return primeCapacities[i];
+ int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
+ //int i = new org.apache.mahout.matrix.list.IntArrayList(primeCapacities).binarySearch(desiredCapacity); // for debug only TODO
+ if (i<0) {
+ // desired capacity not found, choose next prime greater than desired capacity
+ i = -i -1; // remember the semantics of binarySearch...
+ }
+ return primeCapacities[i];
}
/**
* Tests correctness.
*/
protected static void statistics(int from, int to) {
- // check that primes contain no accidental errors
- for (int i=0; i<primeCapacities.length-1; i++) {
- if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
- }
-
- double accDeviation = 0.0;
- double maxDeviation = - 1.0;
+ // check that primes contain no accidental errors
+ for (int i=0; i<primeCapacities.length-1; i++) {
+ if (primeCapacities[i] >= primeCapacities[i+1]) throw new RuntimeException("primes are unsorted or contain duplicates; detected at "+i+"@"+primeCapacities[i]);
+ }
+
+ double accDeviation = 0.0;
+ double maxDeviation = - 1.0;
- for (int i=from; i<=to; i++) {
- int primeCapacity = nextPrime(i);
- //System.out.println(primeCapacity);
- double deviation = (primeCapacity - i) / (double)i;
-
- if (deviation > maxDeviation) {
- maxDeviation = deviation;
- System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
- }
+ for (int i=from; i<=to; i++) {
+ int primeCapacity = nextPrime(i);
+ //System.out.println(primeCapacity);
+ double deviation = (primeCapacity - i) / (double)i;
+
+ if (deviation > maxDeviation) {
+ maxDeviation = deviation;
+ System.out.println("new maxdev @"+i+"@dev="+maxDeviation);
+ }
- accDeviation += deviation;
- }
- long width = 1 + (long)to - (long)from;
-
- double meanDeviation = accDeviation/width;
- System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
- System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
- System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
+ accDeviation += deviation;
+ }
+ long width = 1 + (long)to - (long)from;
+
+ double meanDeviation = accDeviation/width;
+ System.out.println("Statistics for ["+ from + ","+to+"] are as follows");
+ System.out.println("meanDeviation = "+(float)meanDeviation*100+" %");
+ System.out.println("maxDeviation = "+(float)maxDeviation*100+" %");
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/QuickOpenIntIntHashMap.java Wed Nov 25 03:35:59 2009
@@ -19,15 +19,15 @@
@author wolfgang.hoschek@cern.ch
@version 1.0, 09/24/99
-@see java.util.HashMap
+@see java.util.HashMap
*/
class QuickOpenIntIntHashMap extends OpenIntIntHashMap {
- public int totalProbesSaved = 0; // benchmark only
+ public int totalProbesSaved = 0; // benchmark only
/**
* Constructs an empty map with default capacity and default load factors.
*/
public QuickOpenIntIntHashMap() {
- this(defaultCapacity);
+ this(defaultCapacity);
}
/**
* Constructs an empty map with the specified initial capacity and default load factors.
@@ -37,7 +37,7 @@
* than zero.
*/
public QuickOpenIntIntHashMap(int initialCapacity) {
- this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
}
/**
* Constructs an empty map with
@@ -46,10 +46,10 @@
* @param initialCapacity the initial capacity.
* @param minLoadFactor the minimum load factor.
* @param maxLoadFactor the maximum load factor.
- * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= maxLoadFactor)</tt>.
*/
public QuickOpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- setUp(initialCapacity,minLoadFactor,maxLoadFactor);
+ setUp(initialCapacity,minLoadFactor,maxLoadFactor);
}
/**
* Associates the given key with the given value.
@@ -61,109 +61,109 @@
* <tt>false</tt> if the receiver did already contain such a key - the new value has now replaced the formerly associated value.
*/
public boolean put(int key, int value) {
- /*
- This is open addressing with double hashing, using "Brent's variation".
- Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
- (It does not improve unsuccessful searches.)
- See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
-
- h1(key) = hash % M
- h2(key) = decrement = Max(1, hash/M % M)
- M is prime = capacity = table.length
- probing positions are table[(h1-j*h2) % M] for j=0,1,...
- (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
- */
-
- int key0;
- final int tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
-
- int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
- int i = hash % length;
- int decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
- //System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
-
- // stop if we find a removed or free slot, or if we find the key itself
- // do NOT skip over removed slots (yes, open addressing is like that...)
- //int comp = comparisons;
- int t = 0; // the number of probes
- int p0 = i; // the first position to probe
- while (stat[i] == FULL && tab[i] != key) {
- t++;
- i -= decrement;
- //hashCollisions++;
- if (i<0) i+=length;
- }
- //if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
- if (stat[i] == FULL) {
- // key already contained at slot i.
- this.values[i]=value;
- return false;
- }
- // not already contained, should be inserted at slot i.
-
- if (this.distinct > this.highWaterMark) {
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
-
- //System.out.print("grow rehashing ");
- //System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
-
- rehash(newCapacity);
- return put(key, value);
- }
-
- /*
- Brent's variation does a local reorganization to reduce probes. It essentially means:
- We test whether it is possible to move the association we probed first (table[p0]) out of the way.
- If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
- However, future probes for the key that we move out of the way will increase.
- Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
- For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
- If the first probe cannot be moved out of the way, we try the next probe (p1).
- Now we safe more than we loose if t>=3.
- We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
-
- Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
- */
- while (t>1) {
- //System.out.println("t="+t);
- key0 = tab[p0];
- hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
- decrement = (hash / length) % length;
- if (decrement == 0) decrement = 1;
- int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
- if (pc<0) pc += length;
-
- if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
- p0 = pc;
- t--;
- }
- else { // free or removed slot found, now move...
- //System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
- this.totalProbesSaved += (t-1);
- tab[pc] = key0;
- stat[pc] = FULL;
- values[pc] = values[p0];
- i = p0; // prepare to insert: table[p0]=key
- t = 0; // break loop
- }
- }
-
- //System.out.println("inserting at i="+i);
- this.table[i]=key;
- this.values[i]=value;
- if (this.state[i]==FREE) this.freeEntries--;
- this.state[i]=FULL;
- this.distinct++;
-
- if (this.freeEntries < 1) { //delta
- int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
- rehash(newCapacity);
- }
+ /*
+ This is open addressing with double hashing, using "Brent's variation".
+ Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
+ (It does not improve unsuccessful searches.)
+ See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
+
+ h1(key) = hash % M
+ h2(key) = decrement = Max(1, hash/M % M)
+ M is prime = capacity = table.length
+ probing positions are table[(h1-j*h2) % M] for j=0,1,...
+ (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
+ */
+
+ int key0;
+ final int tab[] = table;
+ final byte stat[] = state;
+ final int length = tab.length;
+
+ int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+ //System.out.println("insert search for (key,value)=("+key+","+value+") at i="+i+", dec="+decrement);
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ //int comp = comparisons;
+ int t = 0; // the number of probes
+ int p0 = i; // the first position to probe
+ while (stat[i] == FULL && tab[i] != key) {
+ t++;
+ i -= decrement;
+ //hashCollisions++;
+ if (i<0) i+=length;
+ }
+ //if (comparisons-comp>0) System.out.println("probed "+(comparisons-comp)+" slots.");
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ this.values[i]=value;
+ return false;
+ }
+ // not already contained, should be inserted at slot i.
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+
+ //System.out.print("grow rehashing ");
+ //System.out.println("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+
+ rehash(newCapacity);
+ return put(key, value);
+ }
+
+ /*
+ Brent's variation does a local reorganization to reduce probes. It essentially means:
+ We test whether it is possible to move the association we probed first (table[p0]) out of the way.
+ If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
+ However, future probes for the key that we move out of the way will increase.
+ Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
+ For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
+ If the first probe cannot be moved out of the way, we try the next probe (p1).
+ Now we safe more than we loose if t>=3.
+ We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
+
+ Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
+ */
+ while (t>1) {
+ //System.out.println("t="+t);
+ key0 = tab[p0];
+ hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
+ decrement = (hash / length) % length;
+ if (decrement == 0) decrement = 1;
+ int pc = p0-decrement; // pc = (p0-j*decrement) % M, j=1,2,..
+ if (pc<0) pc += length;
+
+ if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
+ p0 = pc;
+ t--;
+ }
+ else { // free or removed slot found, now move...
+ //System.out.println("copying p0="+p0+" to pc="+pc+", (key,val)=("+tab[p0]+","+values[p0]+"), saving "+(t-1)+" probes.");
+ this.totalProbesSaved += (t-1);
+ tab[pc] = key0;
+ stat[pc] = FULL;
+ values[pc] = values[p0];
+ i = p0; // prepare to insert: table[p0]=key
+ t = 0; // break loop
+ }
+ }
+
+ //System.out.println("inserting at i="+i);
+ this.table[i]=key;
+ this.values[i]=value;
+ if (this.state[i]==FREE) this.freeEntries--;
+ this.state[i]=FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct+1,this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
- return true;
+ return true;
}
/**
* Rehashes the contents of the receiver into a new table
@@ -172,39 +172,39 @@
* number of keys in the receiver exceeds the high water mark or falls below the low water mark.
*/
protected void rehash(int newCapacity) {
- int oldCapacity = table.length;
- //if (oldCapacity == newCapacity) return;
-
- int oldTable[] = table;
- int oldValues[] = values;
- byte oldState[] = state;
-
- int newTable[] = new int[newCapacity];
- int newValues[] = new int[newCapacity];
- byte newState[] = new byte[newCapacity];
-
- this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
- this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
-
- this.table = newTable;
- this.values = newValues;
- this.state = newState;
- this.freeEntries = newCapacity-this.distinct; // delta
-
- int tmp = this.distinct;
- this.distinct = Integer.MIN_VALUE; // switch of watermarks
- for (int i = oldCapacity ; i-- > 0 ;) {
- if (oldState[i]==FULL) {
- put(oldTable[i], oldValues[i]);
- /*
- int element = oldTable[i];
- int index = indexOfInsertion(element);
- newTable[index]=element;
- newValues[index]=oldValues[i];
- newState[index]=FULL;
- */
- }
- }
- this.distinct = tmp;
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ int oldTable[] = table;
+ int oldValues[] = values;
+ byte oldState[] = state;
+
+ int newTable[] = new int[newCapacity];
+ int newValues[] = new int[newCapacity];
+ byte newState[] = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
+
+ this.table = newTable;
+ this.values = newValues;
+ this.state = newState;
+ this.freeEntries = newCapacity-this.distinct; // delta
+
+ int tmp = this.distinct;
+ this.distinct = Integer.MIN_VALUE; // switch of watermarks
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ if (oldState[i]==FULL) {
+ put(oldTable[i], oldValues[i]);
+ /*
+ int element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index]=element;
+ newValues[index]=oldValues[i];
+ newState[index]=FULL;
+ */
+ }
+ }
+ this.distinct = tmp;
}
}
Modified: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html?rev=883973&r1=883972&r2=883973&view=diff
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html (original)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/map/package.html Wed Nov 25 03:35:59 2009
@@ -88,17 +88,17 @@
<li><b>Open</b> uses extendible open addressing with double hashing.
</ul>
<p>Class naming follows the schema <tt><Implementation><KeyType><ValueType>HashMap</tt>.
- For example, a {@link cern.colt.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
- associations and is implemented with open addressing. A {@link cern.colt.map.OpenIntObjectHashMap}
+ For example, a {@link org.apache.mahout.matrix.map.OpenIntDoubleHashMap} holds <tt>(int-->double)</tt>
+ associations and is implemented with open addressing. A {@link org.apache.mahout.matrix.map.OpenIntObjectHashMap}
holds <tt>(int-->Object)</tt> associations and is implemented with open addressing.
</p>
<p>The classes for maps of a given (key,value) type are derived from a common
abstract base class tagged <tt>Abstract<KeyType><ValueType></tt><tt>Map</tt>.
For example, all maps operating on <tt>(int-->double)</tt> associations are
- derived from {@link cern.colt.map.AbstractIntDoubleMap}, which in turn is derived
+ derived from {@link org.apache.mahout.matrix.map.AbstractIntDoubleMap}, which in turn is derived
from an abstract base class tying together all maps regardless of assocation
- type, {@link cern.colt.map.AbstractMap}. The abstract base classes provide skeleton
+ type, {@link org.apache.mahout.matrix.map.AbstractMap}. The abstract base classes provide skeleton
implementations for all but few methods. Experimental layouts (such as chaining,
open addressing, extensible hashing, red-black-trees, etc.) can easily be implemented
and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree