You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/04/06 16:44:28 UTC

svn commit: r1310359 - in /mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common: FastByIDMap.java FastIDSet.java FastMap.java

Author: srowen
Date: Fri Apr  6 14:44:27 2012
New Revision: 1310359

URL: http://svn.apache.org/viewvc?rev=1310359&view=rev
Log:
Allow choice of load factor in custom maps

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Fri Apr  6 14:44:27 2012
@@ -37,7 +37,7 @@ import com.google.common.base.Preconditi
 public final class FastByIDMap<V> implements Serializable, Cloneable {
   
   public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
-  private static final double ALLOWED_LOAD_FACTOR = 1.5;
+  private static final float DEFAULT_LOAD_FACTOR = 1.5f;
   
   /** Dummy object used to represent a key that has been removed. */
   private static final long REMOVED = Long.MAX_VALUE;
@@ -45,6 +45,7 @@ public final class FastByIDMap<V> implem
   
   private long[] keys;
   private V[] values;
+  private float loadFactor;
   private int numEntries;
   private int numSlotsUsed;
   private final int maxSize;
@@ -59,25 +60,33 @@ public final class FastByIDMap<V> implem
   public FastByIDMap(int size) {
     this(size, NO_MAX_SIZE);
   }
-  
+
+  public FastByIDMap(int size, float loadFactor) {
+    this(size, NO_MAX_SIZE, loadFactor);
+  }
+
+  public FastByIDMap(int size, int maxSize) {
+    this(size, maxSize, DEFAULT_LOAD_FACTOR);
+  }
+
   /**
-   * Creates a new  whose capacity can accommodate the given number of entries without
-   * rehash.</p>
+   * Creates a new  whose capacity can accommodate the given number of entries without rehash.
    * 
-   * @param size
-   *          desired capacity
-   * @param maxSize
-   *          max capacity
-   * @throws IllegalArgumentException
-   *           if size is less than 0, maxSize is less than 1, or at least half of
-   *           {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}
+   * @param size desired capacity
+   * @param maxSize max capacity
+   * @param loadFactor ratio of internal hash table size to current size
+   * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+   *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+   *  loadFactor is less than 1
    */
-  public FastByIDMap(int size, int maxSize) {
+  public FastByIDMap(int size, int maxSize, float loadFactor) {
     Preconditions.checkArgument(size >= 0, "size must be at least 0");
-    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+    Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+    this.loadFactor = loadFactor;
+    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
     Preconditions.checkArgument(size < max, "size must be less than " + max);
     Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
-    int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+    int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
     keys = new long[hashSize];
     Arrays.fill(keys, NULL);
     values = (V[]) new Object[hashSize];
@@ -170,9 +179,9 @@ public final class FastByIDMap<V> implem
       throw new NullPointerException();
     }
     // If less than half the slots are open, let's clear it up
-    if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+    if (numSlotsUsed * loadFactor >= keys.length) {
       // If over half the slots used are actual entries, let's grow
-      if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+      if (numEntries * loadFactor >= numSlotsUsed) {
         growAndRehash();
       } else {
         // Otherwise just rehash to clear REMOVED entries and don't grow
@@ -262,14 +271,14 @@ public final class FastByIDMap<V> implem
   }
   
   public void rehash() {
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
   }
   
   private void growAndRehash() {
-    if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+    if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
       throw new IllegalStateException("Can't grow any more");
     }
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
   }
   
   private void rehash(int newHashSize) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java Fri Apr  6 14:44:27 2012
@@ -31,13 +31,14 @@ import com.google.common.base.Preconditi
  */
 public final class FastIDSet implements Serializable, Cloneable, Iterable<Long> {
   
-  private static final double ALLOWED_LOAD_FACTOR = 1.5;
+  private static final float DEFAULT_LOAD_FACTOR = 1.5f;
   
   /** Dummy object used to represent a key that has been removed. */
   private static final long REMOVED = Long.MAX_VALUE;
   private static final long NULL = Long.MIN_VALUE;
   
   private long[] keys;
+  private float loadFactor;
   private int numEntries;
   private int numSlotsUsed;
   
@@ -52,13 +53,18 @@ public final class FastIDSet implements 
   }
 
   public FastIDSet(int size) {
+    this(size, DEFAULT_LOAD_FACTOR);
+  }
+
+  public FastIDSet(int size, float loadFactor) {
     Preconditions.checkArgument(size >= 0, "size must be at least 0");
-    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+    Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+    this.loadFactor = loadFactor;
+    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
     Preconditions.checkArgument(size < max, "size must be less than %d", max);
-    int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+    int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
     keys = new long[hashSize];
-    Arrays.fill(keys, NULL);
-  }
+    Arrays.fill(keys, NULL);  }
   
   /**
    * @see #findForAdd(long)
@@ -118,9 +124,9 @@ public final class FastIDSet implements 
     Preconditions.checkArgument(key != NULL && key != REMOVED);
 
     // If less than half the slots are open, let's clear it up
-    if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+    if (numSlotsUsed * loadFactor >= keys.length) {
       // If over half the slots used are actual entries, let's grow
-      if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+      if (numEntries * loadFactor >= numSlotsUsed) {
         growAndRehash();
       } else {
         // Otherwise just rehash to clear REMOVED entries and don't grow
@@ -231,14 +237,14 @@ public final class FastIDSet implements 
   }
   
   private void growAndRehash() {
-    if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+    if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
       throw new IllegalStateException("Can't grow any more");
     }
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
   }
   
   public void rehash() {
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
   }
   
   private void rehash(int newHashSize) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1310359&r1=1310358&r2=1310359&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Fri Apr  6 14:44:27 2012
@@ -54,13 +54,14 @@ import com.google.common.base.Preconditi
 public final class FastMap<K,V> implements Map<K,V>, Serializable, Cloneable {
   
   public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
-  private static final double ALLOWED_LOAD_FACTOR = 1.5;
+  private static final float DEFAULT_LOAD_FACTOR = 1.5f;
   
   /** Dummy object used to represent a key that has been removed. */
   private static final Object REMOVED = new Object();
   
   private K[] keys;
   private V[] values;
+  private float loadFactor;
   private int numEntries;
   private int numSlotsUsed;
   private final int maxSize;
@@ -80,25 +81,32 @@ public final class FastMap<K,V> implemen
     this(other.size());
     putAll(other);
   }
+
+  public FastMap(int size, float loadFactor) {
+    this(size, NO_MAX_SIZE, loadFactor);
+  }
+
+  public FastMap(int size, int maxSize) {
+    this(size, maxSize, DEFAULT_LOAD_FACTOR);
+  }
   
   /**
-   * Creates a new  whose capacity can accommodate the given number of entries without
-   * rehash.</p>
+   * Creates a new  whose capacity can accommodate the given number of entries without rehash.
    * 
-   * @param size
-   *          desired capacity
-   * @param maxSize
-   *          max capacity
-   * @throws IllegalArgumentException
-   *           if size is less than 0, maxSize is less than 1, or at least half of
-   *           {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}
+   * @param size desired capacity
+   * @param maxSize max capacity
+   * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+   *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+   *  loadFactor is less than 1
    */
-  public FastMap(int size, int maxSize) {
+  public FastMap(int size, int maxSize, float loadFactor) {
     Preconditions.checkArgument(size >= 0, "size must be at least 0");
-    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / ALLOWED_LOAD_FACTOR);
+    Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+    this.loadFactor = loadFactor;
+    int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
     Preconditions.checkArgument(size < max, "size must be less than " + max);
     Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
-    int hashSize = RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * size));
+    int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
     keys = (K[]) new Object[hashSize];
     values = (V[]) new Object[hashSize];
     this.maxSize = maxSize;
@@ -174,9 +182,9 @@ public final class FastMap<K,V> implemen
       throw new NullPointerException();
     }
     // If less than half the slots are open, let's clear it up
-    if (numSlotsUsed * ALLOWED_LOAD_FACTOR >= keys.length) {
+    if (numSlotsUsed * loadFactor >= keys.length) {
       // If over half the slots used are actual entries, let's grow
-      if (numEntries * ALLOWED_LOAD_FACTOR >= numSlotsUsed) {
+      if (numEntries * loadFactor >= numSlotsUsed) {
         growAndRehash();
       } else {
         // Otherwise just rehash to clear REMOVED entries and don't grow
@@ -279,14 +287,14 @@ public final class FastMap<K,V> implemen
   }
   
   public void rehash() {
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * numEntries)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
   }
   
   private void growAndRehash() {
-    if (keys.length * ALLOWED_LOAD_FACTOR >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+    if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
       throw new IllegalStateException("Can't grow any more");
     }
-    rehash(RandomUtils.nextTwinPrime((int) (ALLOWED_LOAD_FACTOR * keys.length)));
+    rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
   }
   
   private void rehash(int newHashSize) {