You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by bi...@apache.org on 2010/01/16 18:51:20 UTC
svn commit: r900006 - in /lucene/mahout/trunk/math/src:
main/java-templates/org/apache/mahout/math/map/
main/java-templates/org/apache/mahout/math/set/
main/java/org/apache/mahout/math/map/
main/java/org/apache/mahout/math/matrix/impl/ main/java/org/ap...
Author: bimargulies
Date: Sat Jan 16 17:51:18 2010
New Revision: 900006
URL: http://svn.apache.org/viewvc?rev=900006&view=rev
Log:
MAHOUT-252: primitive sets
Added:
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java (with props)
lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/
Removed:
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/AbstractMap.java
Modified:
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseObjectMatrix1D.java
lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMapTest.java.t
lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMapTest.java.t
lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMapTest.java.t
Modified: lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t (original)
+++ lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t Sat Jan 16 17:51:18 2010
@@ -36,8 +36,9 @@
import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure;
import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+import org.apache.mahout.math.set.AbstractSet;
-public abstract class Abstract${keyTypeCap}ObjectMap<T> extends AbstractMap {
+public abstract class Abstract${keyTypeCap}ObjectMap<T> extends AbstractSet {
/**
* Returns <tt>true</tt> if the receiver contains the specified key.
Modified: lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t (original)
+++ lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t Sat Jan 16 17:51:18 2010
@@ -40,7 +40,9 @@
import org.apache.mahout.math.function.${valueTypeCap}Function;
#end
-public abstract class Abstract${keyTypeCap}${valueTypeCap}Map extends AbstractMap {
+import org.apache.mahout.math.set.AbstractSet;
+
+public abstract class Abstract${keyTypeCap}${valueTypeCap}Map extends AbstractSet {
/**
* Returns <tt>true</tt> if the receiver contains the specified key.
Modified: lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t (original)
+++ lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t Sat Jan 16 17:51:18 2010
@@ -40,8 +40,9 @@
#if (${valueTypeFloating} == 'true')
import org.apache.mahout.math.function.${valueTypeCap}Function;
#end
+import org.apache.mahout.math.set.AbstractSet;
-public abstract class AbstractObject${valueTypeCap}Map<T> extends AbstractMap {
+public abstract class AbstractObject${valueTypeCap}Map<T> extends AbstractSet {
/**
* Returns <tt>true</tt> if the receiver contains the specified key.
Added: lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t?rev=900006&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t (added)
+++ lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t Sat Jan 16 17:51:18 2010
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.math.set;
+
+import org.apache.mahout.math.function.${keyTypeCap}Procedure;
+import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+
+public abstract class Abstract${keyTypeCap}Set extends AbstractSet {
+
+ /**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+ public boolean contains(final ${keyType} key) {
+ return !forEachKey(
+ new ${keyTypeCap}Procedure() {
+ @Override
+ public boolean apply(${keyType} iterKey) {
+ return (key != iterKey);
+ }
+ }
+ );
+ }
+
+ /**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return a deep copy of the receiver.
+ */
+ public Abstract${keyTypeCap}Set copy() {
+ return (Abstract${keyTypeCap}Set) clone();
+ }
+
+ public boolean equals(Object obj) {
+ if (obj == this) {
+ return true;
+ }
+
+ if (!(obj instanceof Abstract${keyTypeCap}Set)) {
+ return false;
+ }
+ final Abstract${keyTypeCap}Set other = (Abstract${keyTypeCap}Set) obj;
+ if (other.size() != size()) {
+ return false;
+ }
+
+ return
+ forEachKey(
+ new ${keyTypeCap}Procedure() {
+ @Override
+ public boolean apply(${keyType} key) {
+ return other.contains(key);
+ }
+ }
+ );
+ }
+
+ /**
+ * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
+ * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this
+ * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and
+ * <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+ * continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+ public abstract boolean forEachKey(${keyTypeCap}Procedure procedure);
+
+ /**
+ * Returns a list filled with all keys contained in the receiver. The returned list has a size that equals
+ * <tt>this.size()</tt>. Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link
+ * #forEachKey(IntProcedure)}. <p> This method can be used to iterate over the keys of the receiver.
+ *
+ * @return the keys.
+ */
+ public ${keyTypeCap}ArrayList keys() {
+ ${keyTypeCap}ArrayList list = new ${keyTypeCap}ArrayList(size());
+ keys(list);
+ return list;
+ }
+
+ /**
+ * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this
+ * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to
+ * be <i>identical</i> to the order used by method {@link #forEachKey(IntProcedure)}. <p> This method can be used to
+ * iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+ public void keys(final ${keyTypeCap}ArrayList list) {
+ list.clear();
+ forEachKey(
+ new ${keyTypeCap}Procedure() {
+ @Override
+ public boolean apply(${keyType} key) {
+ list.add(key);
+ return true;
+ }
+ }
+ );
+ }
+ /**
+ * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if
+ * existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did
+ * already contain such a key - the new value has now replaced the formerly associated value.
+ */
+ public abstract boolean add(${keyType} key);
+
+ /**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+ public abstract boolean remove(${keyType} key);
+
+ /**
+ * Returns a string representation of the receiver, containing the String representation of each key-value pair,
+ * sorted ascending by key.
+ */
+ public String toString() {
+ ${keyTypeCap}ArrayList theKeys = keys();
+ //theKeys.sort();
+
+ StringBuilder buf = new StringBuilder();
+ buf.append('[');
+ int maxIndex = theKeys.size() - 1;
+ for (int i = 0; i <= maxIndex; i++) {
+ ${keyType} key = theKeys.get(i);
+ buf.append(String.valueOf(key));
+ if (i < maxIndex) {
+ buf.append(", ");
+ }
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+}
Added: lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t?rev=900006&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t (added)
+++ lucene/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t Sat Jan 16 17:51:18 2010
@@ -0,0 +1,443 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.math.set;
+
+import java.util.Arrays;
+
+import org.apache.mahout.math.function.${keyTypeCap}Procedure;
+import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+import org.apache.mahout.math.map.HashFunctions;
+import org.apache.mahout.math.map.PrimeFinder;
+
+/**
+ * Open hash set of ${keyType} items;
+ **/
+public class Open${keyTypeCap}HashSet extends Abstract${keyTypeCap}Set {
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+#if (${keyTypeFloating} == 'true')
+#set ($noKeyComment = "${keyTypeCap}.NaN")
+ protected static final ${keyType} NO_KEY_VALUE = ${keyTypeCap}.NaN;
+#else
+#set ($noKeyComment = "0")
+ protected static final ${keyType} NO_KEY_VALUE = 0;
+#end
+
+ /** The hash table keys. */
+ protected ${keyType}[] table;
+
+ /** The state of each hash table entry (FREE, FULL, REMOVED). */
+ protected byte[] state;
+
+ /** The number of table entries in state==FREE. */
+ protected int freeEntries;
+
+
+ /** Constructs an empty map with default capacity and default load factors. */
+ public Open${keyTypeCap}HashSet() {
+ this(defaultCapacity);
+ }
+
+ /**
+ * Constructs an empty map with the specified initial capacity and default load factors.
+ *
+ * @param initialCapacity the initial capacity of the map.
+ * @throws IllegalArgumentException if the initial capacity is less than zero.
+ */
+ public Open${keyTypeCap}HashSet(int initialCapacity) {
+ this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
+ }
+
+ /**
+ * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor.
+ *
+ * @param initialCapacity the initial capacity.
+ * @param minLoadFactor the minimum load factor.
+ * @param maxLoadFactor the maximum load factor.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
+ * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
+ * maxLoadFactor)</tt>.
+ */
+ public Open${keyTypeCap}HashSet(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ setUp(initialCapacity, minLoadFactor, maxLoadFactor);
+ }
+
+ /** Removes all values associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */
+ @Override
+ public void clear() {
+ Arrays.fill(this.state, 0, state.length - 1, FREE);
+ distinct = 0;
+ freeEntries = table.length; // delta
+ trimToSize();
+ }
+
+ /**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+ @Override
+ public Object clone() {
+ Open${keyTypeCap}HashSet copy = (Open${keyTypeCap}HashSet) super.clone();
+ copy.table = copy.table.clone();
+ copy.state = copy.state.clone();
+ return copy;
+ }
+
+ /**
+ * Returns <tt>true</tt> if the receiver contains the specified key.
+ *
+ * @return <tt>true</tt> if the receiver contains the specified key.
+ */
+ @Override
+ public boolean contains(${keyType} key) {
+ return indexOfKey(key) >= 0;
+ }
+
+ /**
+ * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new
+ * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This
+ * method never need be called; it is for performance tuning only. Calling this method before <tt>add()</tt>ing a
+ * large number of associations boosts performance, because the receiver will grow only once instead of potentially
+ * many times and hash collisions get less probable.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+ @Override
+ public void ensureCapacity(int minCapacity) {
+ if (table.length < minCapacity) {
+ int newCapacity = nextPrime(minCapacity);
+ rehash(newCapacity);
+ }
+ }
+
+ /**
+ * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
+ * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
+ * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this
+ * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and
+ * <tt>values</tt> will yield association pairs, not two uncorrelated lists.
+ *
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+ * continues.
+ * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
+ */
+ @Override
+ public boolean forEachKey(${keyTypeCap}Procedure procedure) {
+ for (int i = table.length; i-- > 0;) {
+ if (state[i] == FULL) {
+ if (!procedure.apply(table[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * @param key the key to be added to the receiver.
+ * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the
+ * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained
+ * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at
+ * slot index.
+ */
+ protected int indexOfInsertion(${keyType} key) {
+ ${keyType}[] tab = table;
+ byte[] stat = state;
+ int length = tab.length;
+
+ int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) {
+ decrement = 1;
+ }
+
+ // stop if we find a removed or free slot, or if we find the key itself
+ // do NOT skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] == FULL && tab[i] != key) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i < 0) {
+ i += length;
+ }
+ }
+
+ if (stat[i] == REMOVED) {
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ // assertion: there is at least one FREE slot.
+ int j = i;
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i < 0) {
+ i += length;
+ }
+ }
+ if (stat[i] == FREE) {
+ i = j;
+ }
+ }
+
+
+ if (stat[i] == FULL) {
+ // key already contained at slot i.
+ // return a negative number identifying the slot.
+ return -i - 1;
+ }
+ // not already contained, should be inserted at slot i.
+ // return a number >= 0 identifying the slot.
+ return i;
+ }
+
+ /**
+ * @param key the key to be searched in the receiver.
+ * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
+ */
+ protected int indexOfKey(${keyType} key) {
+ ${keyType}[] tab = table;
+ byte[] stat = state;
+ int length = tab.length;
+
+ int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
+ int i = hash % length;
+ int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
+ //int decrement = (hash / length) % length;
+ if (decrement == 0) {
+ decrement = 1;
+ }
+
+ // stop if we find a free slot, or if we find the key itself.
+ // do skip over removed slots (yes, open addressing is like that...)
+ while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
+ i -= decrement;
+ //hashCollisions++;
+ if (i < 0) {
+ i += length;
+ }
+ }
+
+ if (stat[i] == FREE) {
+ return -1;
+ } // not found
+ return i; //found, return index where key is contained
+ }
+
+ /**
+ * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this
+ * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to
+ * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}.
+ * <p> This method can be used
+ * to iterate over the keys of the receiver.
+ *
+ * @param list the list to be filled, can have any size.
+ */
+ @Override
+ public void keys(${keyTypeCap}ArrayList list) {
+ list.setSize(distinct);
+ ${keyType} [] elements = list.elements();
+
+ ${keyType} [] tab = table;
+ byte[] stat = state;
+
+ int j = 0;
+ for (int i = tab.length; i-- > 0;) {
+ if (stat[i] == FULL) {
+ elements[j++] = tab[i];
+ }
+ }
+ }
+
+ /**
+ * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if
+ * existing.
+ *
+ * @param key the key the value shall be associated with.
+ * @param value the value to be associated.
+ * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did
+ * already contain such a key - the new value has now replaced the formerly associated value.
+ */
+ @Override
+ public boolean add(${keyType} key) {
+ int i = indexOfInsertion(key);
+ if (i < 0) { //already contained
+ i = -i - 1;
+ return false;
+ }
+
+ if (this.distinct > this.highWaterMark) {
+ int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
+ /*
+ log.info("grow rehashing ");
+ log.info("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+ */
+ rehash(newCapacity);
+ return add(key);
+ }
+
+ this.table[i] = key;
+ if (this.state[i] == FREE) {
+ this.freeEntries--;
+ }
+ this.state[i] = FULL;
+ this.distinct++;
+
+ if (this.freeEntries < 1) { //delta
+ int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
+ rehash(newCapacity);
+ }
+
+ return true;
+ }
+
+ /**
+ * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called
+ * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water
+ * mark.
+ */
+ protected void rehash(int newCapacity) {
+ int oldCapacity = table.length;
+ //if (oldCapacity == newCapacity) return;
+
+ ${keyType}[] oldTable = table;
+ byte[] oldState = state;
+
+ ${keyType}[] newTable = new ${keyType}[newCapacity];
+ byte[] newState = new byte[newCapacity];
+
+ this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
+ this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
+
+ this.table = newTable;
+ this.state = newState;
+ this.freeEntries = newCapacity - this.distinct; // delta
+
+ for (int i = oldCapacity; i-- > 0;) {
+ if (oldState[i] == FULL) {
+ ${keyType} element = oldTable[i];
+ int index = indexOfInsertion(element);
+ newTable[index] = element;
+ newState[index] = FULL;
+ }
+ }
+ }
+
+ /**
+ * Removes the given key with its associated element from the receiver, if present.
+ *
+ * @param key the key to be removed from the receiver.
+ * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
+ */
+ @Override
+ public boolean remove(${keyType} key) {
+ int i = indexOfKey(key);
+ if (i < 0) {
+ return false;
+ } // key not contained
+
+ this.state[i] = REMOVED;
+ this.distinct--;
+
+ if (this.distinct < this.lowWaterMark) {
+ int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor);
+ /*
+ if (table.length != newCapacity) {
+ log.info("shrink rehashing ");
+ log.info("at distinct="+distinct+", capacity="+table.length+" to newCapacity="+newCapacity+" ...");
+ }
+ */
+ rehash(newCapacity);
+ }
+
+ return true;
+ }
+
+ /**
+ * Initializes the receiver.
+ *
+ * @param initialCapacity the initial capacity of the receiver.
+ * @param minLoadFactor the minLoadFactor of the receiver.
+ * @param maxLoadFactor the maxLoadFactor of the receiver.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
+ * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
+ * maxLoadFactor)</tt>.
+ */
+ @Override
+ protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ int capacity = initialCapacity;
+ super.setUp(capacity, minLoadFactor, maxLoadFactor);
+ capacity = nextPrime(capacity);
+ if (capacity == 0) {
+ capacity = 1;
+ } // open addressing needs at least one FREE slot at any time.
+
+ this.table = new ${keyType}[capacity];
+ this.state = new byte[capacity];
+
+ // memory will be exhausted long before this pathological case happens, anyway.
+ this.minLoadFactor = minLoadFactor;
+ if (capacity == PrimeFinder.largestPrime) {
+ this.maxLoadFactor = 1.0;
+ } else {
+ this.maxLoadFactor = maxLoadFactor;
+ }
+
+ this.distinct = 0;
+ this.freeEntries = capacity; // delta
+
+ // lowWaterMark will be established upon first expansion.
+ // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
+ // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
+ // See ensureCapacity(...)
+ this.lowWaterMark = 0;
+ this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
+ }
+
+ /**
+ * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An
+ * application can use this operation to minimize the storage of the receiver.
+ */
+ @Override
+ public void trimToSize() {
+ // * 1.2 because open addressing's performance exponentially degrades beyond that point
+ // so that even rehashing the table can take very long
+ int newCapacity = nextPrime((int) (1 + 1.2 * size()));
+ if (table.length > newCapacity) {
+ rehash(newCapacity);
+ }
+ }
+
+ /**
+ * Access for unit tests.
+ * @param capacity
+ * @param minLoadFactor
+ * @param maxLoadFactor
+ */
+ void getInternalFactors(int[] capacity,
+ double[] minLoadFactor,
+ double[] maxLoadFactor) {
+ capacity[0] = table.length;
+ minLoadFactor[0] = this.minLoadFactor;
+ maxLoadFactor[0] = this.maxLoadFactor;
+ }
+}
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseObjectMatrix1D.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseObjectMatrix1D.java?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseObjectMatrix1D.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseObjectMatrix1D.java Sat Jan 16 17:51:18 2010
@@ -1,3 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
/*
Copyright � 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java?rev=900006&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java Sat Jan 16 17:51:18 2010
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.set;
+
+import org.apache.mahout.math.PersistentObject;
+import org.apache.mahout.math.map.PrimeFinder;
+
+public abstract class AbstractSet extends PersistentObject {
+ //public static boolean debug = false; // debug only
+
+ /** The number of distinct associations in the map; its "size()". */
+ protected int distinct;
+
+ /**
+ * The table capacity c=table.length always satisfies the invariant <tt>c * minLoadFactor <= s <= c *
+ * maxLoadFactor</tt>, where s=size() is the number of associations currently contained. The term "c * minLoadFactor"
+ * is called the "lowWaterMark", "c * maxLoadFactor" is called the "highWaterMark". In other words, the table capacity
+ * (and proportionally the memory used by this class) oscillates within these constraints. The terms are precomputed
+ * and cached to avoid recalculating them each time put(..) or removeKey(...) is called.
+ */
+ protected int lowWaterMark;
+ protected int highWaterMark;
+
+ /** The minimum load factor for the hashtable. */
+ protected double minLoadFactor;
+
+ /** The maximum load factor for the hashtable. */
+ protected double maxLoadFactor;
+
+ // these are public access for unit tests.
+ static public final int defaultCapacity = 277;
+ static public final double defaultMinLoadFactor = 0.2;
+ static public final double defaultMaxLoadFactor = 0.5;
+
+ /**
+ * Chooses a new prime table capacity optimized for growing that (approximately) satisfies the invariant <tt>c *
+ * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given size.
+ */
+ protected int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
+ return nextPrime(Math.max(size + 1, (int) ((4 * size / (3 * minLoad + maxLoad)))));
+ }
+
+ /**
+ * Returns new high water mark threshold based on current capacity and maxLoadFactor.
+ *
+ * @return int the new threshold.
+ */
+ protected int chooseHighWaterMark(int capacity, double maxLoad) {
+ return Math.min(capacity - 2, (int) (capacity * maxLoad)); //makes sure there is always at least one FREE slot
+ }
+
+ /**
+ * Returns new low water mark threshold based on current capacity and minLoadFactor.
+ *
+ * @return int the new threshold.
+ */
+ protected int chooseLowWaterMark(int capacity, double minLoad) {
+ return (int) (capacity * minLoad);
+ }
+
+ /**
+ * Chooses a new prime table capacity neither favoring shrinking nor growing, that (approximately) satisfies the
+ * invariant <tt>c * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given
+ * size.
+ */
+ protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
+ return nextPrime(Math.max(size + 1, (int) ((2 * size / (minLoad + maxLoad)))));
+ }
+
+ /**
+ * Chooses a new prime table capacity optimized for shrinking that (approximately) satisfies the invariant <tt>c *
+ * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given size.
+ */
+ protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
+ return nextPrime(Math.max(size + 1, (int) ((4 * size / (minLoad + 3 * maxLoad)))));
+ }
+
+ /** Removes all (key,value) associations from the receiver. */
+ public abstract void clear();
+
+ /**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new
+ * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This
+ * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a
+ * large number of associations boosts performance, because the receiver will grow only once instead of potentially
+ * many times. <p> <b>This default implementation does nothing.</b> Override this method if necessary.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+ public void ensureCapacity(int minCapacity) {
+ }
+
+ /**
+ * Returns <tt>true</tt> if the receiver contains no (key,value) associations.
+ *
+ * @return <tt>true</tt> if the receiver contains no (key,value) associations.
+ */
+ public boolean isEmpty() {
+ return distinct == 0;
+ }
+
+ /**
+ * Returns a prime number which is <code>>= desiredCapacity</code> and very close to <code>desiredCapacity</code>
+ * (within 11% if <code>desiredCapacity >= 1000</code>).
+ *
+ * @param desiredCapacity the capacity desired by the user.
+ * @return the capacity which should be used for a hashtable.
+ */
+protected int nextPrime(int desiredCapacity) {
+ return PrimeFinder.nextPrime(desiredCapacity);
+ }
+
+ /**
+ * Initializes the receiver. You will almost certainly need to override this method in subclasses to initialize the
+ * hash table.
+ *
+ * @param initialCapacity the initial capacity of the receiver.
+ * @param minLoadFactor the minLoadFactor of the receiver.
+ * @param maxLoadFactor the maxLoadFactor of the receiver.
+ * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
+ * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
+ * maxLoadFactor)</tt>.
+ */
+ protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Initial Capacity must not be less than zero: " + initialCapacity);
+ }
+ if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) {
+ throw new IllegalArgumentException("Illegal minLoadFactor: " + minLoadFactor);
+ }
+ if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) {
+ throw new IllegalArgumentException("Illegal maxLoadFactor: " + maxLoadFactor);
+ }
+ if (minLoadFactor >= maxLoadFactor) {
+ throw new IllegalArgumentException(
+ "Illegal minLoadFactor: " + minLoadFactor + " and maxLoadFactor: " + maxLoadFactor);
+ }
+ }
+
+ /**
+ * Returns the number of (key,value) associations currently contained.
+ *
+ * @return the number of (key,value) associations currently contained.
+ */
+ public int size() {
+ return distinct;
+ }
+
+ /**
+ * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An
+ * application can use this operation to minimize the storage of the receiver. <p> This default implementation does
+ * nothing. Override this method if necessary.
+ */
+ public void trimToSize() {
+ }
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/AbstractSet.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMapTest.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMapTest.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMapTest.java.t (original)
+++ lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMapTest.java.t Sat Jan 16 17:51:18 2010
@@ -33,6 +33,8 @@
import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure;
import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+import org.apache.mahout.math.set.AbstractSet;
+
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
@@ -108,16 +110,16 @@
double[] maxLoadFactor = new double[1];
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
- assertEquals(AbstractMap.defaultCapacity, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultCapacity, capacity[0]);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
int prime = PrimeFinder.nextPrime(907);
map = new Open${keyTypeCap}ObjectHashMap<TestClass>(prime);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
assertEquals(prime, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
map = new Open${keyTypeCap}ObjectHashMap<TestClass>(prime, 0.4, 0.8);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
Modified: lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMapTest.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMapTest.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMapTest.java.t (original)
+++ lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMapTest.java.t Sat Jan 16 17:51:18 2010
@@ -41,6 +41,8 @@
#if (${keyType} != ${valueType})
import org.apache.mahout.math.list.${valueTypeCap}ArrayList;
#end
+import org.apache.mahout.math.set.AbstractSet;
+
import org.junit.Assert;
import org.junit.Test;
@@ -55,16 +57,16 @@
double[] maxLoadFactor = new double[1];
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
- assertEquals(AbstractMap.defaultCapacity, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultCapacity, capacity[0]);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
int prime = PrimeFinder.nextPrime(907);
map = new Open${keyTypeCap}${valueTypeCap}HashMap(prime);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
assertEquals(prime, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
map = new Open${keyTypeCap}${valueTypeCap}HashMap(prime, 0.4, 0.8);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
Modified: lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMapTest.java.t
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMapTest.java.t?rev=900006&r1=900005&r2=900006&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMapTest.java.t (original)
+++ lucene/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMapTest.java.t Sat Jan 16 17:51:18 2010
@@ -31,6 +31,7 @@
import org.apache.mahout.math.function.Object${valueTypeCap}Procedure;
import org.apache.mahout.math.function.ObjectProcedure;
import org.apache.mahout.math.list.${valueTypeCap}ArrayList;
+import org.apache.mahout.math.set.AbstractSet;
import org.junit.Assert;
import org.junit.Test;
@@ -112,16 +113,16 @@
double[] maxLoadFactor = new double[1];
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
- assertEquals(AbstractMap.defaultCapacity, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultCapacity, capacity[0]);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
int prime = PrimeFinder.nextPrime(907);
map = new OpenObject${valueTypeCap}HashMap<ComparableKey>(prime);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);
assertEquals(prime, capacity[0]);
- assertEquals(AbstractMap.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
- assertEquals(AbstractMap.defaultMinLoadFactor, minLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMaxLoadFactor, maxLoadFactor[0], 0.001);
+ assertEquals(AbstractSet.defaultMinLoadFactor, minLoadFactor[0], 0.001);
map = new OpenObject${valueTypeCap}HashMap<ComparableKey>(prime, 0.4, 0.8);
map.getInternalFactors(capacity, minLoadFactor, maxLoadFactor);