You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/10/16 17:20:05 UTC

svn commit: r825933 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: eval/LoadEvaluator.java model/file/FileDataModel.java

Author: srowen
Date: Fri Oct 16 15:20:04 2009
New Revision: 825933

URL: http://svn.apache.org/viewvc?rev=825933&view=rev
Log:
Make FileDataModel input more flexible; small tweak to LoadEvaluator for clarity

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java?rev=825933&r1=825932&r2=825933&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java Fri Oct 16 15:20:04 2009
@@ -19,7 +19,7 @@
 
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
 import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
 import org.apache.mahout.cf.taste.model.DataModel;
@@ -39,8 +39,9 @@
     DataModel dataModel = recommender.getDataModel();
     int numUsers = dataModel.getNumUsers();
     double sampleRate = 1000.0 / numUsers;
-    LongPrimitiveIterator userSampler = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(), sampleRate);
-    RunningAverageAndStdDev recommendationTime = new FullRunningAverageAndStdDev();
+    LongPrimitiveIterator userSampler =
+        SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(), sampleRate);
+    RunningAverage recommendationTime = new FullRunningAverageAndStdDev();
     int count = 0;
     while (userSampler.hasNext()) {
       long start = System.currentTimeMillis();
@@ -50,11 +51,10 @@
         recommendationTime.addDatum(end - start);
       }
       if (++count % 10 == 0) {
-        log.info(recommendationTime.toString());
+        log.info("Average time per recommendation: " + recommendationTime.getAverage());
       }
     }
-    log.info(recommendationTime.toString());
-
+    log.info("Average time per recommendation: " + recommendationTime.getAverage());
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=825933&r1=825932&r2=825933&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Fri Oct 16 15:20:04 2009
@@ -64,6 +64,8 @@
  * and is only useful in the context of an update delta file (see above). Note that if the line is empty or begins with
  * '#' it will be ignored as a comment.</p>
  *
+ * <p>It is also acceptable for the lines to contain additional fields. Fields beyond the third will be ignored.</p>
+ *
  * <p>Finally, for application that have no notion of a preference value (that is, the user simply expresses a
  * preference for an item, but no degree of preference), the caller can simply omit the third token in each line
  * altogether -- for example, "123,ABC".</p>
@@ -246,14 +248,27 @@
     }
 
     int delimiterOne = line.indexOf((int) delimiter);
+    if (delimiterOne < 0) {
+      throw new IllegalArgumentException("Bad line: " + line);
+    }
     int delimiterTwo = line.indexOf((int) delimiter, delimiterOne + 1);
-    if (delimiterOne < 0 || delimiterTwo < 0) {
+    if (delimiterTwo < 0) {
       throw new IllegalArgumentException("Bad line: " + line);
     }
+    // Look for beginning of additional, ignored fields:
+    int delimiterThree = line.indexOf((int) delimiter, delimiterTwo + 1);    
 
-    long userID = readUserIDFromString(line.substring(0, delimiterOne));
-    long itemID = readItemIDFromString(line.substring(delimiterOne + 1, delimiterTwo));
-    String preferenceValueString = line.substring(delimiterTwo + 1);
+    String userIDString = line.substring(0, delimiterOne);
+    String itemIDString = line.substring(delimiterOne + 1, delimiterTwo);
+    String preferenceValueString;
+    if (delimiterThree > delimiterTwo) {
+      preferenceValueString = line.substring(delimiterTwo + 1, delimiterThree);
+    } else {
+      preferenceValueString = line.substring(delimiterTwo + 1);
+    }
+
+    long userID = readUserIDFromString(userIDString);
+    long itemID = readItemIDFromString(itemIDString);
 
     if (transpose) {
       long tmp = userID;
@@ -439,6 +454,4 @@
     return "FileDataModel[dataFile:" + dataFile + ']';
   }
 
-
-
 }