You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/02/17 13:52:25 UTC
svn commit: r910951 - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
Author: srowen
Date: Wed Feb 17 12:52:25 2010
New Revision: 910951
URL: http://svn.apache.org/viewvc?rev=910951&view=rev
Log:
Fixed possible bug that would mix up PreferenceArray / Collection in obscure situation when reusing previously loaded file data
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=910951&r1=910950&r2=910951&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Wed Feb 17 12:52:25 2010
@@ -98,7 +98,7 @@
*
* <p>
* It is possible and likely useful to subclass this class and customize its behavior to accommodate
- * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap)} and
+ * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap, boolean)} and
* {@link #processLineWithoutID(String, FastByIDMap)}
*/
public class FileDataModel implements DataModel {
@@ -203,10 +203,10 @@
FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
FileLineIterator iterator = new FileLineIterator(dataFile, false);
- processFile(iterator, data);
+ processFile(iterator, data, false);
for (File updateFile : findUpdateFiles()) {
- processFile(new FileLineIterator(updateFile, false), data);
+ processFile(new FileLineIterator(updateFile, false), data, false);
}
return new GenericDataModel(GenericDataModel.toDataMap(data, true));
@@ -216,7 +216,7 @@
FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();
for (File updateFile : findUpdateFiles()) {
- processFile(new FileLineIterator(updateFile, false), rawData);
+ processFile(new FileLineIterator(updateFile, false), rawData, true);
}
return new GenericDataModel(rawData);
@@ -308,13 +308,15 @@
return delimiter;
}
- protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data) {
+ protected void processFile(FileLineIterator dataOrUpdateFileIterator,
+ FastByIDMap<?> data,
+ boolean fromPriorData) {
log.info("Reading file info...");
AtomicInteger count = new AtomicInteger();
while (dataOrUpdateFileIterator.hasNext()) {
String line = dataOrUpdateFileIterator.next();
if (line.length() > 0) {
- processLine(line, data);
+ processLine(line, data, fromPriorData);
int currentCount = count.incrementAndGet();
if (currentCount % 1000000 == 0) {
log.info("Processed {} lines", currentCount);
@@ -341,8 +343,13 @@
* line from input data file
* @param data
* all data read so far, as a mapping from user IDs to preferences
+ * @param fromPriorData an implementation detail -- if true, data will map IDs to
+ * {@link PreferenceArray} since the framework is attempting to read and update raw
+ * data that is already in memory. Otherwise it maps to {@link Collection}s of
+ * {@link Preference}s, since it's reading fresh data. Subclasses must be prepared
+ * to handle this wrinkle.
*/
- protected void processLine(String line, FastByIDMap<?> data) {
+ protected void processLine(String line, FastByIDMap<?> data, boolean fromPriorData) {
if ((line.length() == 0) || (line.charAt(0) == COMMENT_CHAR)) {
return;
@@ -379,7 +386,7 @@
// This is kind of gross but need to handle two types of storage
Object maybePrefs = data.get(userID);
- if (maybePrefs instanceof PreferenceArray) {
+ if (fromPriorData) {
PreferenceArray prefs = (PreferenceArray) maybePrefs;
if (preferenceValueString.length() == 0) {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java?rev=910951&r1=910950&r2=910951&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java Wed Feb 17 12:52:25 2010
@@ -57,12 +57,12 @@
protected DataModel buildModel() throws IOException {
FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
FileLineIterator iterator = new FileLineIterator(getDataFile(), false);
- processFile(iterator, data);
+ processFile(iterator, data, false);
return new GenericDataModel(GenericDataModel.toDataMap(data, true));
}
@Override
- protected void processLine(String line, FastByIDMap<?> rawData) {
+ protected void processLine(String line, FastByIDMap<?> rawData, boolean fromPriorData) {
FastByIDMap<Collection<Preference>> data = (FastByIDMap<Collection<Preference>>) rawData;
String[] jokePrefs = line.split(",");
int count = Integer.parseInt(jokePrefs[0]);