You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/02/17 13:52:25 UTC

svn commit: r910951 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java

Author: srowen
Date: Wed Feb 17 12:52:25 2010
New Revision: 910951

URL: http://svn.apache.org/viewvc?rev=910951&view=rev
Log:
Fixed possible bug that would mix up PreferenceArray / Collection in obscure situation when reusing previously loaded file data

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=910951&r1=910950&r2=910951&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Wed Feb 17 12:52:25 2010
@@ -98,7 +98,7 @@
  * 
  * <p>
  * It is possible and likely useful to subclass this class and customize its behavior to accommodate
- * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap)} and
+ * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap, boolean)} and
  * {@link #processLineWithoutID(String, FastByIDMap)}
  */
 public class FileDataModel implements DataModel {
@@ -203,10 +203,10 @@
         
         FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
         FileLineIterator iterator = new FileLineIterator(dataFile, false);
-        processFile(iterator, data);
+        processFile(iterator, data, false);
         
         for (File updateFile : findUpdateFiles()) {
-          processFile(new FileLineIterator(updateFile, false), data);
+          processFile(new FileLineIterator(updateFile, false), data, false);
         }
         
         return new GenericDataModel(GenericDataModel.toDataMap(data, true));
@@ -216,7 +216,7 @@
         FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();
         
         for (File updateFile : findUpdateFiles()) {
-          processFile(new FileLineIterator(updateFile, false), rawData);
+          processFile(new FileLineIterator(updateFile, false), rawData, true);
         }
         
         return new GenericDataModel(rawData);
@@ -308,13 +308,15 @@
     return delimiter;
   }
   
-  protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data) {
+  protected void processFile(FileLineIterator dataOrUpdateFileIterator,
+                             FastByIDMap<?> data,
+                             boolean fromPriorData) {
     log.info("Reading file info...");
     AtomicInteger count = new AtomicInteger();
     while (dataOrUpdateFileIterator.hasNext()) {
       String line = dataOrUpdateFileIterator.next();
       if (line.length() > 0) {
-        processLine(line, data);
+        processLine(line, data, fromPriorData);
         int currentCount = count.incrementAndGet();
         if (currentCount % 1000000 == 0) {
           log.info("Processed {} lines", currentCount);
@@ -341,8 +343,13 @@
    *          line from input data file
    * @param data
    *          all data read so far, as a mapping from user IDs to preferences
+   * @param fromPriorData an implementation detail -- if true, data will map IDs to
+   *  {@link PreferenceArray} since the framework is attempting to read and update raw
+   *  data that is already in memory. Otherwise it maps to {@link Collection}s of
+   *  {@link Preference}s, since it's reading fresh data. Subclasses must be prepared
+   *  to handle this wrinkle.
    */
-  protected void processLine(String line, FastByIDMap<?> data) {
+  protected void processLine(String line, FastByIDMap<?> data, boolean fromPriorData) {
     
     if ((line.length() == 0) || (line.charAt(0) == COMMENT_CHAR)) {
       return;
@@ -379,7 +386,7 @@
     
     // This is kind of gross but need to handle two types of storage
     Object maybePrefs = data.get(userID);
-    if (maybePrefs instanceof PreferenceArray) {
+    if (fromPriorData) {
       
       PreferenceArray prefs = (PreferenceArray) maybePrefs;
       if (preferenceValueString.length() == 0) {

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java?rev=910951&r1=910950&r2=910951&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java Wed Feb 17 12:52:25 2010
@@ -57,12 +57,12 @@
   protected DataModel buildModel() throws IOException {
     FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
     FileLineIterator iterator = new FileLineIterator(getDataFile(), false);
-    processFile(iterator, data);
+    processFile(iterator, data, false);
     return new GenericDataModel(GenericDataModel.toDataMap(data, true));
   }
   
   @Override
-  protected void processLine(String line, FastByIDMap<?> rawData) {
+  protected void processLine(String line, FastByIDMap<?> rawData, boolean fromPriorData) {
     FastByIDMap<Collection<Preference>> data = (FastByIDMap<Collection<Preference>>) rawData;
     String[] jokePrefs = line.split(",");
     int count = Integer.parseInt(jokePrefs[0]);