You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/11/18 13:01:20 UTC

svn commit: r1410879 - in /mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common: FastByIDMap.java FastIDSet.java FastMap.java

Author: srowen
Date: Sun Nov 18 12:01:18 2012
New Revision: 1410879

URL: http://svn.apache.org/viewvc?rev=1410879&view=rev
Log:
Back-port from Myrrix optimization for FastMap, and, important bug fix for Fast*Map classes where a sequence of insertions and deletions could lead to a key in two places in the map

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1410879&r1=1410878&r2=1410879&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Sun Nov 18 12:01:18 2012
@@ -107,11 +107,7 @@ public final class FastByIDMap<V> implem
     int index = theHashCode % hashSize;
     long currentKey = keys[index];
     while (currentKey != NULL && key != currentKey) {
-      if (index < jump) {
-        index += hashSize - jump;
-      } else {
-        index -= jump;
-      }
+      index -= index < jump ? jump - hashSize : jump;
       currentKey = keys[index];
     }
     return index;
@@ -127,16 +123,20 @@ public final class FastByIDMap<V> implem
     int jump = 1 + theHashCode % (hashSize - 2);
     int index = theHashCode % hashSize;
     long currentKey = keys[index];
-    while (currentKey != NULL && currentKey != REMOVED && key != currentKey) { // Different
-                                                                                                             // here
-      if (index < jump) {
-        index += hashSize - jump;
-      } else {
-        index -= jump;
-      }
+    while (currentKey != NULL && currentKey != REMOVED && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
       currentKey = keys[index];
     }
-    return index;
+    if (currentKey != REMOVED) {
+      return index;
+    }
+    // If we're adding, it's here, but, the key might have a value already later
+    int addIndex = index;
+    while (currentKey != NULL && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
+      currentKey = keys[index];
+    }
+    return key == currentKey ? index : addIndex;
   }
   
   public V get(long key) {
@@ -194,20 +194,19 @@ public final class FastByIDMap<V> implem
       V oldValue = values[index];
       values[index] = value;
       return oldValue;
-    } else {
-      // If size is limited,
-      if (countingAccesses && numEntries >= maxSize) {
-        // and we're too large, clear some old-ish entry
-        clearStaleEntry(index);
-      }
-      keys[index] = key;
-      values[index] = value;
-      numEntries++;
-      if (keyIndex == NULL) {
-        numSlotsUsed++;
-      }
-      return null;
     }
+    // If size is limited,
+    if (countingAccesses && numEntries >= maxSize) {
+      // and we're too large, clear some old-ish entry
+      clearStaleEntry(index);
+    }
+    keys[index] = key;
+    values[index] = value;
+    numEntries++;
+    if (keyIndex == NULL) {
+      numSlotsUsed++;
+    }
+    return null;
   }
   
   private void clearStaleEntry(int index) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java?rev=1410879&r1=1410878&r2=1410879&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java Sun Nov 18 12:01:18 2012
@@ -78,11 +78,7 @@ public final class FastIDSet implements 
     int index = theHashCode % hashSize;
     long currentKey = keys[index];
     while (currentKey != NULL && key != currentKey) { // note: true when currentKey == REMOVED
-      if (index < jump) {
-        index += hashSize - jump;
-      } else {
-        index -= jump;
-      }
+      index -= index < jump ? jump - hashSize : jump;
       currentKey = keys[index];
     }
     return index;
@@ -98,15 +94,20 @@ public final class FastIDSet implements 
     int jump = 1 + theHashCode % (hashSize - 2);
     int index = theHashCode % hashSize;
     long currentKey = keys[index];
-    while (currentKey != NULL && currentKey != REMOVED && key != currentKey) { // Different here
-      if (index < jump) {
-        index += hashSize - jump;
-      } else {
-        index -= jump;
-      }
+    while (currentKey != NULL && currentKey != REMOVED && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
       currentKey = keys[index];
     }
-    return index;
+    if (currentKey != REMOVED) {
+      return index;
+    }
+    // If we're adding, it's here, but, the key might have a value already later
+    int addIndex = index;
+    while (currentKey != NULL && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
+      currentKey = keys[index];
+    }
+    return key == currentKey ? index : addIndex;
   }
   
   public int size() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1410879&r1=1410878&r2=1410879&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Sun Nov 18 12:01:18 2012
@@ -121,16 +121,35 @@ public final class FastMap<K,V> implemen
     int jump = 1 + theHashCode % (hashSize - 2);
     int index = theHashCode % hashSize;
     K currentKey = keys[index];
-    while (currentKey != null && (currentKey == REMOVED || !key.equals(currentKey))) {
-      if (index < jump) {
-        index += hashSize - jump;
-      } else {
-        index -= jump;
-      }
+    while (currentKey != null && !key.equals(currentKey)) {
+      index -= index < jump ? jump - hashSize : jump;
       currentKey = keys[index];
     }
     return index;
   }
+
+  private int findForAdd(Object key) {
+    int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
+    K[] keys = this.keys;
+    int hashSize = keys.length;
+    int jump = 1 + theHashCode % (hashSize - 2);
+    int index = theHashCode % hashSize;
+    K currentKey = keys[index];
+    while (currentKey != null && currentKey != REMOVED && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
+      currentKey = keys[index];
+    }
+    if (currentKey != REMOVED) {
+      return index;
+    }
+    // If we're adding, it's here, but, the key might have a value already later
+    int addIndex = index;
+    while (currentKey != null && key != currentKey) {
+      index -= index < jump ? jump - hashSize : jump;
+      currentKey = keys[index];
+    }
+    return key == currentKey ? index : addIndex;
+  }
   
   @Override
   public V get(Object key) {
@@ -191,23 +210,22 @@ public final class FastMap<K,V> implemen
       }
     }
     // Here we may later consider implementing Brent's variation described on page 532
-    int index = find(key);
-    if (keys[index] == null) {
-      // If size is limited,
-      if (countingAccesses && numEntries >= maxSize) {
-        // and we're too large, clear some old-ish entry
-        clearStaleEntry(index);
-      }
-      keys[index] = key;
-      values[index] = value;
-      numEntries++;
-      numSlotsUsed++;
-      return null;
-    } else {
+    int index = findForAdd(key);
+    if (keys[index] == key) {
       V oldValue = values[index];
       values[index] = value;
       return oldValue;
     }
+    // If size is limited,
+    if (countingAccesses && numEntries >= maxSize) {
+      // and we're too large, clear some old-ish entry
+      clearStaleEntry(index);
+    }
+    keys[index] = key;
+    values[index] = value;
+    numEntries++;
+    numSlotsUsed++;
+    return null;
   }
   
   private void clearStaleEntry(int index) {