You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/04/06 16:44:28 UTC
svn commit: r1310359 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common:
FastByIDMap.java FastIDSet.java FastMap.java
Author: srowen
Date: Fri Apr 6 14:44:27 2012
New Revision: 1310359
URL: http://svn.apache.org/viewvc?rev=1310359&view=rev
Log:
Allow choice of load factor in custom maps
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Fri Apr 6 14:44:27 2012
@@ -37,7 +37,7 @@ import com.google.common.base.Preconditi
public final class FastByIDMap<V> implements Serializable, Cloneable {
public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
- private static final double ALLOWED_LOAD_FACTOR = 1.5;
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
/** Dummy object used to represent a key that has been removed. */
private static final long REMOVED = Long.MAX_VALUE;
@@ -45,6 +45,7 @@ public final class FastByIDMap<V> implem
private long[] keys;
private V[] values;
+ private float loadFactor;
private int numEntries;
private int numSlotsUsed;
private final int maxSize;
@@ -59,25 +60,33 @@ public final class FastByIDMap<V> implem
public FastByIDMap(int size) {
this(size, NO_MAX_SIZE);
}
-
+
+ public FastByIDMap(int size, float loadFactor) {
+ this(size, NO_MAX_SIZE, loadFactor);
+ }
+
+ public FastByIDMap(int size, int maxSize) {
+ this(size, maxSize, DEFAULT_LOAD_FACTOR);
+ }
+
/**
- * Creates a new whose capacity can accommodate the given number of entries without
- * rehash.</p>
+ * Creates a new whose capacity can accommodate the given number of entries without rehash.
*
- * @param size
- * desired capacity
- * @param maxSize
- * max capacity
- * @throws IllegalArgumentException
- * if size is less than 0, maxSize is less than 1, or at least half of
- * {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}
+ * @param size desired capacity
+ * @param maxSize max capacity
+ * @param loadFactor ratio of internal hash table size to current size
+ * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+ * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+ * loadFactor is less than 1
*/
- public FastByIDMap(int size, int maxSize) {
+ public FastByIDMap(int size, int maxSize, float loadFactor) {
Preconditions.checkArgument(size >= 0, "size must be at least 0");
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
Preconditions.checkArgument(size < max, "size must be less than " + max);
Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
- int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
keys = new long[hashSize];
Arrays.fill(keys, NULL);
values = (V[]) new Object[hashSize];
@@ -170,9 +179,9 @@ public final class FastByIDMap<V> implem
throw new NullPointerException();
}
// If less than half the slots are open, let's clear it up
- if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+ if (numSlotsUsed * loadFactor >= keys.length) {
// If over half the slots used are actual entries, let's grow
- if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+ if (numEntries * loadFactor >= numSlotsUsed) {
growAndRehash();
} else {
// Otherwise just rehash to clear REMOVED entries and don't grow
@@ -262,14 +271,14 @@ public final class FastByIDMap<V> implem
}
public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
}
private void growAndRehash() {
- if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
throw new IllegalStateException("Can't grow any more");
}
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
}
private void rehash(int newHashSize) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java Fri Apr 6 14:44:27 2012
@@ -31,13 +31,14 @@ import com.google.common.base.Preconditi
*/
public final class FastIDSet implements Serializable, Cloneable, Iterable<Long> {
- private static final double ALLOWED_LOAD_FACTOR = 1.5;
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
/** Dummy object used to represent a key that has been removed. */
private static final long REMOVED = Long.MAX_VALUE;
private static final long NULL = Long.MIN_VALUE;
private long[] keys;
+ private float loadFactor;
private int numEntries;
private int numSlotsUsed;
@@ -52,13 +53,18 @@ public final class FastIDSet implements
}
public FastIDSet(int size) {
+ this(size, DEFAULT_LOAD_FACTOR);
+ }
+
+ public FastIDSet(int size, float loadFactor) {
Preconditions.checkArgument(size >= 0, "size must be at least 0");
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
Preconditions.checkArgument(size < max, "size must be less than %d", max);
- int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
keys = new long[hashSize];
- Arrays.fill(keys, NULL);
- }
+ Arrays.fill(keys, NULL); }
/**
* @see #findForAdd(long)
@@ -118,9 +124,9 @@ public final class FastIDSet implements
Preconditions.checkArgument(key != NULL && key != REMOVED);
// If less than half the slots are open, let's clear it up
- if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+ if (numSlotsUsed * loadFactor >= keys.length) {
// If over half the slots used are actual entries, let's grow
- if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+ if (numEntries * loadFactor >= numSlotsUsed) {
growAndRehash();
} else {
// Otherwise just rehash to clear REMOVED entries and don't grow
@@ -231,14 +237,14 @@ public final class FastIDSet implements
}
private void growAndRehash() {
- if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
throw new IllegalStateException("Can't grow any more");
}
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
}
public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
}
private void rehash(int newHashSize) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Fri Apr 6 14:44:27 2012
@@ -54,13 +54,14 @@ import com.google.common.base.Preconditi
public final class FastMap<K,V> implements Map<K,V>, Serializable, Cloneable {
public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
- private static final double ALLOWED_LOAD_FACTOR = 1.5;
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
/** Dummy object used to represent a key that has been removed. */
private static final Object REMOVED = new Object();
private K[] keys;
private V[] values;
+ private float loadFactor;
private int numEntries;
private int numSlotsUsed;
private final int maxSize;
@@ -80,25 +81,32 @@ public final class FastMap<K,V> implemen
this(other.size());
putAll(other);
}
+
+ public FastMap(int size, float loadFactor) {
+ this(size, NO_MAX_SIZE, loadFactor);
+ }
+
+ public FastMap(int size, int maxSize) {
+ this(size, maxSize, DEFAULT_LOAD_FACTOR);
+ }
/**
- * Creates a new whose capacity can accommodate the given number of entries without
- * rehash.</p>
+ * Creates a new whose capacity can accommodate the given number of entries without rehash.
*
- * @param size
- * desired capacity
- * @param maxSize
- * max capacity
- * @throws IllegalArgumentException
- * if size is less than 0, maxSize is less than 1, or at least half of
- * {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}
+ * @param size desired capacity
+ * @param maxSize max capacity
+ * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+ * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+ * loadFactor is less than 1
*/
- public FastMap(int size, int maxSize) {
+ public FastMap(int size, int maxSize, float loadFactor) {
Preconditions.checkArgument(size >= 0, "size must be at least 0");
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
Preconditions.checkArgument(size < max, "size must be less than " + max);
Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
- int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
keys = (K[]) new Object[hashSize];
values = (V[]) new Object[hashSize];
this.maxSize = maxSize;
@@ -174,9 +182,9 @@ public final class FastMap<K,V> implemen
throw new NullPointerException();
}
// If less than half the slots are open, let's clear it up
- if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+ if (numSlotsUsed * loadFactor >= keys.length) {
// If over half the slots used are actual entries, let's grow
- if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+ if (numEntries * loadFactor >= numSlotsUsed) {
growAndRehash();
} else {
// Otherwise just rehash to clear REMOVED entries and don't grow
@@ -279,14 +287,14 @@ public final class FastMap<K,V> implemen
}
public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
}
private void growAndRehash() {
- if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
throw new IllegalStateException("Can't grow any more");
}
- rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
}
private void rehash(int newHashSize) {