You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/07/19 19:16:57 UTC
svn commit: r678186 - in /lucene/mahout/trunk/core/src/main:
examples/org/apache/mahout/cf/taste/example/grouplens/
java/org/apache/mahout/cf/taste/impl/common/
java/org/apache/mahout/cf/taste/impl/model/file/
Author: srowen
Date: Sat Jul 19 10:16:57 2008
New Revision: 678186
URL: http://svn.apache.org/viewvc?rev=678186&view=rev
Log:
Added FileLineIterable/Iterator abstraction
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
Modified:
lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
Modified: lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=678186&r1=678185&r2=678186&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Sat Jul 19 10:16:57 2008
@@ -19,15 +19,13 @@
import org.apache.mahout.cf.taste.impl.common.FastMap;
import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.model.Item;
-import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
-import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
@@ -53,16 +51,11 @@
public GroupLensDataModel(File ratingsFile, File moviesFile) throws IOException {
super(convertGLFile(ratingsFile, true));
File convertedMoviesFile = convertGLFile(moviesFile, false);
- BufferedReader reader = new BufferedReader(new FileReader(convertedMoviesFile));
movieMap = new FastMap<String, Movie>(5001);
- try {
- for (String line; (line = reader.readLine()) != null;) {
- String[] tokens = line.split(",");
- String id = tokens[0];
- movieMap.put(id, new Movie(id, tokens[1], tokens[2]));
- }
- } finally {
- IOUtils.quietClose(reader);
+ for (String line : new FileLineIterable(convertedMoviesFile)) {
+ String[] tokens = line.split(",");
+ String id = tokens[0];
+ movieMap.put(id, new Movie(id, tokens[1], tokens[2]));
}
}
@@ -80,12 +73,10 @@
File resultFile = new File(new File(System.getProperty("java.io.tmpdir")),
"taste." + (ratings ? "ratings" : "movies") + ".txt");
if (!resultFile.exists()) {
- BufferedReader reader = null;
PrintWriter writer = null;
try {
- reader = new BufferedReader(new FileReader(originalFile), 32768);
writer = new PrintWriter(new FileWriter(resultFile));
- for (String line; (line = reader.readLine()) != null;) {
+ for (String line : new FileLineIterable(originalFile)) {
String convertedLine;
if (ratings) {
// toss the last column of data, which is a timestamp we don't want
@@ -96,15 +87,11 @@
writer.println(convertedLine);
}
writer.flush();
- } catch (FileNotFoundException fnfe) {
- resultFile.delete();
- throw fnfe;
} catch (IOException ioe) {
resultFile.delete();
throw ioe;
} finally {
IOUtils.quietClose(writer);
- IOUtils.quietClose(reader);
}
}
return resultFile;
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java?rev=678186&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java Sat Jul 19 10:16:57 2008
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines.
+ * This assumes the text file is UTF-8 encoded and that its lines are delimited in a manner
+ * consistent with how {@link java.io.BufferedReader} defines lines.
+ */
+public final class FileLineIterable implements Iterable<String> {
+
+ private final File file;
+
+ public FileLineIterable(File file) {
+ this.file = file;
+ }
+
+ public Iterator<String> iterator() {
+ try {
+ return new FileLineIterator(file);
+ } catch (IOException ioe) {
+ throw new IllegalStateException(ioe);
+ }
+ }
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=678186&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Sat Jul 19 10:16:57 2008
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.BufferedReader;
+import java.io.Reader;
+import java.io.IOException;
+import java.io.Closeable;
+
+/**
+ * Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
+ * and that its lines are delimited in a manner consistent with how {@link BufferedReader}
+ * defines lines.
+ */
+public final class FileLineIterator implements Iterator<String>, Closeable {
+
+ private final BufferedReader reader;
+ private String nextLine;
+
+ /**
+ * @throws FileNotFoundException if the file does not exist
+ * @throws IOException if the file cannot be read
+ */
+ public FileLineIterator(File file) throws IOException {
+ InputStream is = new FileInputStream(file);
+ Reader fileReader;
+ try {
+ fileReader = new InputStreamReader(is, "UTF8");
+ } catch (UnsupportedEncodingException uee) {
+ throw new AssertionError(uee);
+ }
+ reader = new BufferedReader(fileReader);
+ nextLine = reader.readLine();
+ }
+
+ public boolean hasNext() {
+ return nextLine != null;
+ }
+
+ public String next() {
+ if (nextLine == null) {
+ throw new NoSuchElementException();
+ }
+ String result = nextLine;
+ try {
+ nextLine = reader.readLine();
+ } catch (IOException ioe) {
+ // Tough situation. Best to consider us done:
+ close();
+ throw new NoSuchElementException(ioe.toString());
+ }
+ if (nextLine == null) {
+ close();
+ }
+ return result;
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ public void close() {
+ nextLine = null;
+ IOUtils.quietClose(reader);
+ }
+
+}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=678186&r1=678185&r2=678186&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Sat Jul 19 10:16:57 2008
@@ -18,8 +18,8 @@
package org.apache.mahout.cf.taste.impl.model.file;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.IOUtils;
import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
import org.apache.mahout.cf.taste.impl.model.GenericItem;
import org.apache.mahout.cf.taste.impl.model.GenericPreference;
@@ -31,10 +31,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
-import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -114,21 +112,11 @@
private void processFile(Map<String, List<Preference>> data) throws IOException {
log.info("Reading file info...");
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new FileReader(dataFile));
- boolean notDone = true;
- while (notDone) {
- String line = reader.readLine();
- if (line != null && line.length() > 0) {
- log.debug("Read line: {}", line);
- processLine(line, data);
- } else {
- notDone = false;
- }
+ for (String line : new FileLineIterable(dataFile)) {
+ if (line.length() > 0) {
+ log.debug("Read line: {}", line);
+ processLine(line, data);
}
- } finally {
- IOUtils.quietClose(reader);
}
}