You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/12 00:04:55 UTC

svn commit: r684957 - in /lucene/mahout/trunk/core/src: main/examples/org/apache/mahout/ga/watchmaker/cd/ main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/impl/eval/ main/java/org/apache/mahout/cf/taste/impl/model/file/...

Author: srowen
Date: Mon Aug 11 15:04:54 2008
New Revision: 684957

URL: http://svn.apache.org/viewvc?rev=684957&view=rev
Log:
More fun IntelliJ-suggested refactorings and small tweaks

Modified:
    lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/ga/watchmaker/cd/CDRule.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ByItemIDComparator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java

Modified: lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/ga/watchmaker/cd/CDRule.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/ga/watchmaker/cd/CDRule.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/ga/watchmaker/cd/CDRule.java (original)
+++ lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/ga/watchmaker/cd/CDRule.java Mon Aug 11 15:04:54 2008
@@ -189,8 +189,8 @@
         if (!empty)
           buffer.append(" && ");
 
-        buffer.append("attr" + attributeIndex(condInd) + " "
-            + (getO(condInd) ? ">=" : "<") + " " + getV(condInd));
+        buffer.append("attr").append(attributeIndex(condInd)).append(" ").append(getO(condInd) ? ">=" : "<");
+        buffer.append(" ").append(getV(condInd));
 
         empty = false;
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ByItemIDComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ByItemIDComparator.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ByItemIDComparator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ByItemIDComparator.java Mon Aug 11 15:04:54 2008
@@ -18,8 +18,9 @@
 package org.apache.mahout.cf.taste.hadoop;
 
 import java.util.Comparator;
+import java.io.Serializable;
 
-final class ByItemIDComparator implements Comparator<ItemPrefWritable> {
+final class ByItemIDComparator implements Comparator<ItemPrefWritable>, Serializable {
 
   private static final ByItemIDComparator instance = new ByItemIDComparator();
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java Mon Aug 11 15:04:54 2008
@@ -80,28 +80,7 @@
 
     for (User user : dataModel.getUsers()) {
       if (random.nextDouble() < evaluationPercentage) {
-        List<Preference> trainingPrefs = new ArrayList<Preference>();
-        List<Preference> testPrefs = new ArrayList<Preference>();
-        Preference[] prefs = user.getPreferencesAsArray();
-        for (int i = 0; i < prefs.length; i++) {
-          Preference pref = prefs[i];
-          Item itemCopy = new GenericItem<String>(pref.getItem().getID().toString());
-          Preference newPref = new GenericPreference(null, itemCopy, pref.getValue());
-          if (random.nextDouble() < trainingPercentage) {
-            trainingPrefs.add(newPref);
-          } else {
-            testPrefs.add(newPref);
-          }
-        }
-        log.debug("Training against {} preferences", trainingPrefs.size());
-        log.debug("Evaluating accuracy of {} preferences", testPrefs.size());
-        if (!trainingPrefs.isEmpty()) {
-          User trainingUser = new GenericUser<String>(user.getID().toString(), trainingPrefs);
-          trainingUsers.add(trainingUser);
-          if (!testPrefs.isEmpty()) {
-            testUserPrefs.put(trainingUser, testPrefs);
-          }
-        }
+        processOneUser(trainingPercentage, trainingUsers, testUserPrefs, user);
       }
     }
 
@@ -114,8 +93,35 @@
     return result;
   }
 
-  abstract double getEvaluation(Map<User, Collection<Preference>> testUserPrefs,
-                                Recommender recommender)
-          throws TasteException;
+  private void processOneUser(double trainingPercentage,
+                              Collection<User> trainingUsers,
+                              Map<User, Collection<Preference>> testUserPrefs,
+                              User user) {
+    List<Preference> trainingPrefs = new ArrayList<Preference>();
+    List<Preference> testPrefs = new ArrayList<Preference>();
+    Preference[] prefs = user.getPreferencesAsArray();
+    for (int i = 0; i < prefs.length; i++) {
+      Preference pref = prefs[i];
+      Item itemCopy = new GenericItem<String>(pref.getItem().getID().toString());
+      Preference newPref = new GenericPreference(null, itemCopy, pref.getValue());
+      if (random.nextDouble() < trainingPercentage) {
+        trainingPrefs.add(newPref);
+      } else {
+        testPrefs.add(newPref);
+      }
+    }
+    log.debug("Training against {} preferences", trainingPrefs.size());
+    log.debug("Evaluating accuracy of {} preferences", testPrefs.size());
+    if (!trainingPrefs.isEmpty()) {
+      User trainingUser = new GenericUser<String>(user.getID().toString(), trainingPrefs);
+      trainingUsers.add(trainingUser);
+      if (!testPrefs.isEmpty()) {
+        testUserPrefs.put(trainingUser, testPrefs);
+      }
+    }
+  }
+
+  abstract double getEvaluation(Map<User, Collection<Preference>> testUserPrefs, Recommender recommender)
+      throws TasteException;
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Mon Aug 11 15:04:54 2008
@@ -80,8 +80,8 @@
     RunningAverage precision = new FullRunningAverage();
     RunningAverage recall = new FullRunningAverage();
     for (User user : dataModel.getUsers()) {
-      Object id = user.getID();
       if (random.nextDouble() < evaluationPercentage) {
+        Object id = user.getID();
         Collection<Item> relevantItems = new HashSet<Item>(at);
         Preference[] prefs = user.getPreferencesAsArray();
         for (int i = 0; i < prefs.length; i++) {
@@ -94,23 +94,7 @@
         if (numRelevantItems > 0) {
           List<User> trainingUsers = new ArrayList<User>(dataModel.getNumUsers());
           for (User user2 : dataModel.getUsers()) {
-            if (id.equals(user2.getID())) {
-              List<Preference> trainingPrefs = new ArrayList<Preference>();
-              Preference[] prefs2 = user2.getPreferencesAsArray();
-              for (int i = 0; i < prefs2.length; i++) {
-                Preference pref = prefs2[i];
-                if (!relevantItems.contains(pref.getItem())) {
-                  trainingPrefs.add(pref);
-                }
-              }
-              if (!trainingPrefs.isEmpty()) {
-                User trainingUser = new GenericUser<String>(id.toString(), trainingPrefs);
-                trainingUsers.add(trainingUser);
-              }
-            } else {
-              trainingUsers.add(user2);
-            }
-
+            processOtherUser(id, relevantItems, trainingUsers, user2);
           }
           DataModel trainingModel = new GenericDataModel(trainingUsers);
           Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);
@@ -136,4 +120,23 @@
     return new IRStatisticsImpl(precision.getAverage(), recall.getAverage());
   }
 
+  private void processOtherUser(Object id, Collection<Item> relevantItems, List<User> trainingUsers, User user2) {
+    if (id.equals(user2.getID())) {
+      List<Preference> trainingPrefs = new ArrayList<Preference>();
+      Preference[] prefs2 = user2.getPreferencesAsArray();
+      for (int i = 0; i < prefs2.length; i++) {
+        Preference pref = prefs2[i];
+        if (!relevantItems.contains(pref.getItem())) {
+          trainingPrefs.add(pref);
+        }
+      }
+      if (!trainingPrefs.isEmpty()) {
+        User trainingUser = new GenericUser<String>(id.toString(), trainingPrefs);
+        trainingUsers.add(trainingUser);
+      }
+    } else {
+      trainingUsers.add(user2);
+    }
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Mon Aug 11 15:04:54 2008
@@ -137,7 +137,7 @@
     prefs.add(buildPreference(null, item, preferenceValue));
   }
 
-  private void checkLoaded() throws TasteException {
+  private void checkLoaded() {
     if (!loaded) {
       reload();
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java Mon Aug 11 15:04:54 2008
@@ -49,7 +49,7 @@
    */
   public NearestNUserNeighborhood(int n,
                                   UserCorrelation userCorrelation,
-                                  DataModel dataModel) throws TasteException {
+                                  DataModel dataModel) {
     this(n, userCorrelation, dataModel, 1.0);
   }
 
@@ -65,7 +65,7 @@
   public NearestNUserNeighborhood(int n,
                                   UserCorrelation userCorrelation,
                                   DataModel dataModel,
-                                  double samplingRate) throws TasteException {
+                                  double samplingRate) {
     super(userCorrelation, dataModel, samplingRate);
     if (n < 1) {
       throw new IllegalArgumentException("n must be at least 1");

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java Mon Aug 11 15:04:54 2008
@@ -51,7 +51,7 @@
    */
   public ThresholdUserNeighborhood(double threshold,
                                    UserCorrelation userCorrelation,
-                                   DataModel dataModel) throws TasteException {
+                                   DataModel dataModel) {
     this(threshold, userCorrelation, dataModel, 1.0);
   }
 
@@ -68,7 +68,7 @@
   public ThresholdUserNeighborhood(double threshold,
                                    UserCorrelation userCorrelation,
                                    DataModel dataModel,
-                                   double samplingRate) throws TasteException {
+                                   double samplingRate) {
     super(userCorrelation, dataModel, samplingRate);
     if (Double.isNaN(threshold)) {
       throw new IllegalArgumentException("threshold must not be NaN");

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java Mon Aug 11 15:04:54 2008
@@ -22,11 +22,12 @@
 import org.apache.mahout.cf.taste.recommender.Rescorer;
 
 import java.util.Comparator;
+import java.io.Serializable;
 
 /**
  * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Rescorer} which always returns the original score.</p>
  */
-final class ByRescoreComparator implements Comparator<RecommendedItem> {
+final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
 
   private final Rescorer<Item> rescorer;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Mon Aug 11 15:04:54 2008
@@ -309,127 +309,130 @@
 
         boolean done = false;
         while (!done) {
+          done = mergeClosestClusters(numUsers, clusters, done);
+        }
 
-          // We find a certain number of closest clusters...
-          boolean full = false;
-          LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
-          int i = 0;
-          for (Collection<User> cluster1 : clusters) {
-            i++;
-            ListIterator<Collection<User>> it2 = clusters.listIterator(i);
-            while (it2.hasNext()) {
-              Collection<User> cluster2 = it2.next();
-              double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
-              if (!Double.isNaN(similarity) &&
-                  (!full || similarity > queue.getLast().getSimilarity())) {
-                ListIterator<ClusterClusterPair> queueIterator =
-                        queue.listIterator(queue.size());
-                while (queueIterator.hasPrevious()) {
-                  if (similarity <= queueIterator.previous().getSimilarity()) {
-                    queueIterator.next();
-                    break;
-                  }
-                }
-                queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
-                if (full) {
-                  queue.removeLast();
-                } else if (queue.size() > numUsers) { // use numUsers as queue size limit
-                  full = true;
-                  queue.removeLast();
-                }
-              }
-            }
-          }
+        topRecsByUserID = computeTopRecsPerUserID(clusters);
+        clustersByUserID = computeClustersPerUserID(clusters);
+        allClusters = clusters;
 
-          // The first one is definitely the closest pair in existence so we can cluster
-          // the two together, put it back into the set of clusters, and start again. Instead
-          // we assume everything else in our list of closest cluster pairs is still pretty good,
-          // and we cluster them too.
+      }
 
-          while (!queue.isEmpty()) {
+      clustersBuilt = true;
+    } finally {
+      buildClustersLock.unlock();
+    }
+  }
 
-            if (!clusteringByThreshold && clusters.size() <= numClusters) {
-              done = true;
+  private boolean mergeClosestClusters(int numUsers, List<Collection<User>> clusters, boolean done) throws TasteException {
+    // We find a certain number of closest clusters...
+    boolean full = false;
+    LinkedList<ClusterClusterPair> queue = new LinkedList<ClusterClusterPair>();
+    int i = 0;
+    for (Collection<User> cluster1 : clusters) {
+      i++;
+      ListIterator<Collection<User>> it2 = clusters.listIterator(i);
+      while (it2.hasNext()) {
+        Collection<User> cluster2 = it2.next();
+        double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
+        if (!Double.isNaN(similarity) &&
+            (!full || similarity > queue.getLast().getSimilarity())) {
+          ListIterator<ClusterClusterPair> queueIterator =
+                  queue.listIterator(queue.size());
+          while (queueIterator.hasPrevious()) {
+            if (similarity <= queueIterator.previous().getSimilarity()) {
+              queueIterator.next();
               break;
             }
+          }
+          queueIterator.add(new ClusterClusterPair(cluster1, cluster2, similarity));
+          if (full) {
+            queue.removeLast();
+          } else if (queue.size() > numUsers) { // use numUsers as queue size limit
+            full = true;
+            queue.removeLast();
+          }
+        }
+      }
+    }
 
-            ClusterClusterPair top = queue.removeFirst();
-
-            if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold) {
-              done = true;
-              break;
-            }
+    // The first one is definitely the closest pair in existence so we can cluster
+    // the two together, put it back into the set of clusters, and start again. Instead
+    // we assume everything else in our list of closest cluster pairs is still pretty good,
+    // and we cluster them too.
+
+    while (!queue.isEmpty()) {
+
+      if (!clusteringByThreshold && clusters.size() <= numClusters) {
+        done = true;
+        break;
+      }
 
-            Collection<User> cluster1 = top.getCluster1();
-            Collection<User> cluster2 = top.getCluster2();
+      ClusterClusterPair top = queue.removeFirst();
 
-            // Pull out current two clusters from clusters
-            Iterator<Collection<User>> clusterIterator = clusters.iterator();
-            boolean removed1 = false;
-            boolean removed2 = false;
-            while (clusterIterator.hasNext() && !(removed1 && removed2)) {
-              Collection<User> current = clusterIterator.next();
-              // Yes, use == here
-              if (!removed1 && cluster1 == current) {
-                clusterIterator.remove();
-                removed1 = true;
-              } else if (!removed2 && cluster2 == current) {
-                clusterIterator.remove();
-                removed2 = true;
-              }
-            }
+      if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold) {
+        done = true;
+        break;
+      }
 
-            // The only catch is if a cluster showed it twice in the list of best cluster pairs;
-            // have to remove the others. Pull out anything referencing these clusters from queue
-            for (Iterator<ClusterClusterPair> queueIterator = queue.iterator();
-                 queueIterator.hasNext();) {
-              ClusterClusterPair pair = queueIterator.next();
-              Collection<User> pair1 = pair.getCluster1();
-              Collection<User> pair2 = pair.getCluster2();
-              if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) {
-                queueIterator.remove();
-              }
-            }
+      Collection<User> cluster1 = top.getCluster1();
+      Collection<User> cluster2 = top.getCluster2();
 
-            // Make new merged cluster
-            Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
-            merged.addAll(cluster1);
-            merged.addAll(cluster2);
-
-            // Compare against other clusters; update queue if needed
-            // That new pair we're just adding might be pretty close to something else, so
-            // catch that case here and put it back into our queue
-            for (Collection<User> cluster : clusters) {
-              double similarity = clusterSimilarity.getSimilarity(merged, cluster);
-              if (similarity > queue.getLast().getSimilarity()) {
-                ListIterator<ClusterClusterPair> queueIterator = queue.listIterator();
-                while (queueIterator.hasNext()) {
-                  if (similarity > queueIterator.next().getSimilarity()) {
-                    queueIterator.previous();
-                    break;
-                  }
-                }
-                queueIterator.add(new ClusterClusterPair(merged, cluster, similarity));
-              }
-            }
+      // Pull out current two clusters from clusters
+      Iterator<Collection<User>> clusterIterator = clusters.iterator();
+      boolean removed1 = false;
+      boolean removed2 = false;
+      while (clusterIterator.hasNext() && !(removed1 && removed2)) {
+        Collection<User> current = clusterIterator.next();
+        // Yes, use == here
+        if (!removed1 && cluster1 == current) {
+          clusterIterator.remove();
+          removed1 = true;
+        } else if (!removed2 && cluster2 == current) {
+          clusterIterator.remove();
+          removed2 = true;
+        }
+      }
 
-            // Finally add new cluster to list
-            clusters.add(merged);
+      // The only catch is if a cluster showed it twice in the list of best cluster pairs;
+      // have to remove the others. Pull out anything referencing these clusters from queue
+      for (Iterator<ClusterClusterPair> queueIterator = queue.iterator();
+           queueIterator.hasNext();) {
+        ClusterClusterPair pair = queueIterator.next();
+        Collection<User> pair1 = pair.getCluster1();
+        Collection<User> pair2 = pair.getCluster2();
+        if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) {
+          queueIterator.remove();
+        }
+      }
 
+      // Make new merged cluster
+      Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
+      merged.addAll(cluster1);
+      merged.addAll(cluster2);
+
+      // Compare against other clusters; update queue if needed
+      // That new pair we're just adding might be pretty close to something else, so
+      // catch that case here and put it back into our queue
+      for (Collection<User> cluster : clusters) {
+        double similarity = clusterSimilarity.getSimilarity(merged, cluster);
+        if (similarity > queue.getLast().getSimilarity()) {
+          ListIterator<ClusterClusterPair> queueIterator = queue.listIterator();
+          while (queueIterator.hasNext()) {
+            if (similarity > queueIterator.next().getSimilarity()) {
+              queueIterator.previous();
+              break;
+            }
           }
-
+          queueIterator.add(new ClusterClusterPair(merged, cluster, similarity));
         }
-
-        topRecsByUserID = computeTopRecsPerUserID(clusters);
-        clustersByUserID = computeClustersPerUserID(clusters);
-        allClusters = clusters;
-
       }
 
-      clustersBuilt = true;
-    } finally {
-      buildClustersLock.unlock();
+      // Finally add new cluster to list
+      clusters.add(merged);
+
     }
+    return done;
   }
 
   private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(Iterable<Collection<User>> clusters)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/web/RecommenderServlet.java Mon Aug 11 15:04:54 2008
@@ -90,7 +90,7 @@
     }
     String howManyString = request.getParameter("howMany");
     int howMany = howManyString == null ? DEFAULT_HOW_MANY : Integer.parseInt(howManyString);
-    boolean debug = Boolean.valueOf(request.getParameter("debug"));
+    boolean debug = Boolean.parseBoolean(request.getParameter("debug"));
     String format = request.getParameter("format");
     if (format == null) {
       format = "text";
@@ -156,41 +156,50 @@
     response.setHeader("Cache-Control", "no-cache");
     PrintWriter writer = response.getWriter();
     if (debug) {
-      DataModel dataModel = recommender.getDataModel();
-      writer.print("User:");
-      writer.println(dataModel.getUser(userID));
-      writer.print("Recommender: ");
-      writer.println(recommender);
-      writer.println();
-      writer.print("Top ");
-      writer.print(NUM_TOP_PREFERENCES);
-      writer.println(" Preferences:");
-      Preference[] rawPrefs = dataModel.getUser(userID).getPreferencesAsArray();
-      int length = rawPrefs.length;
-      Preference[] sortedPrefs = new Preference[length];
-      System.arraycopy(rawPrefs, 0, sortedPrefs, 0, length);
-      Arrays.sort(sortedPrefs, Collections.reverseOrder(ByValuePreferenceComparator.getInstance()));
-      // Cap this at 20 just to be brief
-      int max = Math.min(NUM_TOP_PREFERENCES, length);
-      for (int i = 0; i < max; i++) {
-        Preference pref = sortedPrefs[i];
-        writer.print(pref.getValue());
-        writer.print('\t');
-        writer.println(pref.getItem());
-      }
-      writer.println();
-      writer.println("Recommendations:");
-      for (RecommendedItem recommendedItem : items) {
-        writer.print(recommendedItem.getValue());
-        writer.print('\t');
-        writer.println(recommendedItem.getItem());
-      }
+      writeDebugRecommendations(userID, items, writer);
     } else {
-      for (RecommendedItem recommendedItem : items) {
-        writer.print(recommendedItem.getValue());
-        writer.print('\t');
-        writer.println(recommendedItem.getItem().getID());
-      }
+      writeRecommendations(items, writer);
+    }
+  }
+
+  private void writeRecommendations(Iterable<RecommendedItem> items, PrintWriter writer) {
+    for (RecommendedItem recommendedItem : items) {
+      writer.print(recommendedItem.getValue());
+      writer.print('\t');
+      writer.println(recommendedItem.getItem().getID());
+    }
+  }
+
+  private void writeDebugRecommendations(String userID, Iterable<RecommendedItem> items, PrintWriter writer)
+      throws TasteException {
+    DataModel dataModel = recommender.getDataModel();
+    writer.print("User:");
+    writer.println(dataModel.getUser(userID));
+    writer.print("Recommender: ");
+    writer.println(recommender);
+    writer.println();
+    writer.print("Top ");
+    writer.print(NUM_TOP_PREFERENCES);
+    writer.println(" Preferences:");
+    Preference[] rawPrefs = dataModel.getUser(userID).getPreferencesAsArray();
+    int length = rawPrefs.length;
+    Preference[] sortedPrefs = new Preference[length];
+    System.arraycopy(rawPrefs, 0, sortedPrefs, 0, length);
+    Arrays.sort(sortedPrefs, Collections.reverseOrder(ByValuePreferenceComparator.getInstance()));
+    // Cap this at NUM_TOP_PREFERENCES just to be brief
+    int max = Math.min(NUM_TOP_PREFERENCES, length);
+    for (int i = 0; i < max; i++) {
+      Preference pref = sortedPrefs[i];
+      writer.print(pref.getValue());
+      writer.print('\t');
+      writer.println(pref.getItem());
+    }
+    writer.println();
+    writer.println("Recommendations:");
+    for (RecommendedItem recommendedItem : items) {
+      writer.print(recommendedItem.getValue());
+      writer.print('\t');
+      writer.println(recommendedItem.getItem());
     }
   }
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java?rev=684957&r1=684956&r2=684957&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java Mon Aug 11 15:04:54 2008
@@ -17,11 +17,9 @@
 
 package org.apache.mahout.cf.taste.impl.common;
 
-import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.common.Refreshable;
 import junit.framework.TestCase;
 
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.Collection;