You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/11/16 23:07:25 UTC
svn commit: r1035832 -
/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
Author: srowen
Date: Tue Nov 16 22:07:25 2010
New Revision: 1035832
URL: http://svn.apache.org/viewvc?rev=1035832&view=rev
Log:
Improvement to reload logic
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=1035832&r1=1035831&r2=1035832&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Tue Nov 16 22:07:25 2010
@@ -22,9 +22,10 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
@@ -214,6 +215,7 @@ public class FileDataModel extends Abstr
boolean loadFreshData = (delegate == null) || (newLastModified > lastModified + minReloadIntervalMS);
+ long oldLastUpdateFileModifieid = lastUpdateFileModified;
lastModified = newLastModified;
lastUpdateFileModified = newLastUpdateFileModified;
@@ -227,7 +229,7 @@ public class FileDataModel extends Abstr
FileLineIterator iterator = new FileLineIterator(dataFile, false);
processFile(iterator, data, timestamps, false);
- for (File updateFile : findUpdateFiles()) {
+ for (File updateFile : findUpdateFilesAfter(newLastModified)) {
processFile(new FileLineIterator(updateFile, false), data, timestamps, false);
}
@@ -237,7 +239,7 @@ public class FileDataModel extends Abstr
FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();
- for (File updateFile : findUpdateFiles()) {
+ for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
processFile(new FileLineIterator(updateFile, false), rawData, timestamps, true);
}
@@ -253,7 +255,7 @@ public class FileDataModel extends Abstr
FileLineIterator iterator = new FileLineIterator(dataFile, false);
processFileWithoutID(iterator, data, timestamps);
- for (File updateFile : findUpdateFiles()) {
+ for (File updateFile : findUpdateFilesAfter(newLastModified)) {
processFileWithoutID(new FileLineIterator(updateFile, false), data, timestamps);
}
@@ -263,7 +265,7 @@ public class FileDataModel extends Abstr
FastByIDMap<FastIDSet> rawData = ((GenericBooleanPrefDataModel) delegate).getRawUserData();
- for (File updateFile : findUpdateFiles()) {
+ for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
processFileWithoutID(new FileLineIterator(updateFile, false), rawData, timestamps);
}
@@ -280,25 +282,26 @@ public class FileDataModel extends Abstr
* data file is /foo/data.txt.gz, you might place update files at /foo/data.1.txt.gz, /foo/data.2.txt.gz,
* etc.
*/
- private Iterable<File> findUpdateFiles() {
+ private Iterable<File> findUpdateFilesAfter(long minimumLastModified) {
String dataFileName = dataFile.getName();
int period = dataFileName.indexOf('.');
String startName = period < 0 ? dataFileName : dataFileName.substring(0, period);
File parentDir = dataFile.getParentFile();
- List<File> updateFiles = new ArrayList<File>();
+ Map<Long, File> modTimeToUpdateFile = new TreeMap<Long,File>();
for (File updateFile : parentDir.listFiles()) {
String updateFileName = updateFile.getName();
- if (updateFileName.startsWith(startName) && !updateFileName.equals(dataFileName)) {
- updateFiles.add(updateFile);
+ if (updateFileName.startsWith(startName)
+ && !updateFileName.equals(dataFileName)
+ && updateFile.lastModified() >= minimumLastModified) {
+ modTimeToUpdateFile.put(updateFile.lastModified(), updateFile);
}
}
- Collections.sort(updateFiles);
- return updateFiles;
+ return modTimeToUpdateFile.values();
}
private long readLastUpdateFileModified() {
long mostRecentModification = Long.MIN_VALUE;
- for (File updateFile : findUpdateFiles()) {
+ for (File updateFile : findUpdateFilesAfter(0L)) {
mostRecentModification = Math.max(mostRecentModification, updateFile.lastModified());
}
return mostRecentModification;