You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/07/19 19:16:57 UTC

svn commit: r678186 - in /lucene/mahout/trunk/core/src/main: examples/org/apache/mahout/cf/taste/example/grouplens/ java/org/apache/mahout/cf/taste/impl/common/ java/org/apache/mahout/cf/taste/impl/model/file/

Author: srowen
Date: Sat Jul 19 10:16:57 2008
New Revision: 678186

URL: http://svn.apache.org/viewvc?rev=678186&view=rev
Log:
Added FileLineIterable/Iterator abstraction

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
Modified:
    lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java

Modified: lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=678186&r1=678185&r2=678186&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/examples/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Sat Jul 19 10:16:57 2008
@@ -19,15 +19,13 @@
 
 import org.apache.mahout.cf.taste.impl.common.FastMap;
 import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 import org.apache.mahout.cf.taste.model.Item;
 
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
-import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
@@ -53,16 +51,11 @@
   public GroupLensDataModel(File ratingsFile, File moviesFile) throws IOException {
     super(convertGLFile(ratingsFile, true));
     File convertedMoviesFile = convertGLFile(moviesFile, false);
-    BufferedReader reader = new BufferedReader(new FileReader(convertedMoviesFile));
     movieMap = new FastMap<String, Movie>(5001);
-    try {
-      for (String line; (line = reader.readLine()) != null;) {
-        String[] tokens = line.split(",");
-        String id = tokens[0];
-        movieMap.put(id, new Movie(id, tokens[1], tokens[2]));
-      }
-    } finally {
-      IOUtils.quietClose(reader);
+    for (String line : new FileLineIterable(convertedMoviesFile)) {
+      String[] tokens = line.split(",");
+      String id = tokens[0];
+      movieMap.put(id, new Movie(id, tokens[1], tokens[2]));
     }
   }
 
@@ -80,12 +73,10 @@
     File resultFile = new File(new File(System.getProperty("java.io.tmpdir")),
                                      "taste." + (ratings ? "ratings" : "movies") + ".txt");
     if (!resultFile.exists()) {
-      BufferedReader reader = null;
       PrintWriter writer = null;
       try {
-        reader = new BufferedReader(new FileReader(originalFile), 32768);
         writer = new PrintWriter(new FileWriter(resultFile));
-        for (String line; (line = reader.readLine()) != null;) {
+        for (String line : new FileLineIterable(originalFile)) {
           String convertedLine;
           if (ratings) {
             // toss the last column of data, which is a timestamp we don't want
@@ -96,15 +87,11 @@
           writer.println(convertedLine);
         }
         writer.flush();
-      } catch (FileNotFoundException fnfe) {
-        resultFile.delete();
-        throw fnfe;
       } catch (IOException ioe) {
         resultFile.delete();
         throw ioe;
       } finally {
         IOUtils.quietClose(writer);
-        IOUtils.quietClose(reader);
       }
     }
     return resultFile;

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java?rev=678186&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java Sat Jul 19 10:16:57 2008
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines.
+ * This assumes the text file is UTF-8 encoded and that its lines are delimited in a manner
+ * consistent with how {@link java.io.BufferedReader} defines lines.
+ */
+public final class FileLineIterable implements Iterable<String> {
+
+  private final File file;
+
+  public FileLineIterable(File file) {
+    this.file = file;
+  }
+
+  public Iterator<String> iterator() {
+    try {
+      return new FileLineIterator(file);
+    } catch (IOException ioe) {
+      throw new IllegalStateException(ioe);
+    }
+  }
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=678186&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Sat Jul 19 10:16:57 2008
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.BufferedReader;
+import java.io.Reader;
+import java.io.IOException;
+import java.io.Closeable;
+
+/**
+ * Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
+ * and that its lines are delimited in a manner consistent with how {@link BufferedReader}
+ * defines lines.
+ */
+public final class FileLineIterator implements Iterator<String>, Closeable {
+
+  private final BufferedReader reader;
+  private String nextLine;
+
+  /**
+   * @throws FileNotFoundException if the file does not exist
+   * @throws IOException if the file cannot be read
+   */
+  public FileLineIterator(File file) throws IOException {
+    InputStream is = new FileInputStream(file);
+    Reader fileReader;
+    try {
+      fileReader = new InputStreamReader(is, "UTF8");
+    } catch (UnsupportedEncodingException uee) {
+      throw new AssertionError(uee);
+    }
+    reader = new BufferedReader(fileReader);
+    nextLine = reader.readLine();
+  }
+
+  public boolean hasNext() {
+    return nextLine != null;
+  }
+
+  public String next() {
+    if (nextLine == null) {
+      throw new NoSuchElementException();
+    }
+    String result = nextLine;
+    try {
+      nextLine = reader.readLine();
+    } catch (IOException ioe) {
+      // Tough situation. Best to consider us done:
+      close();
+      throw new NoSuchElementException(ioe.toString());
+    }
+    if (nextLine == null) {
+      close();
+    }
+    return result;
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+  public void close() {
+    nextLine = null;
+    IOUtils.quietClose(reader);
+  }
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=678186&r1=678185&r2=678186&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Sat Jul 19 10:16:57 2008
@@ -18,8 +18,8 @@
 package org.apache.mahout.cf.taste.impl.model.file;
 
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.IOUtils;
 import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericItem;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
@@ -31,10 +31,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileNotFoundException;
-import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -114,21 +112,11 @@
 
   private void processFile(Map<String, List<Preference>> data) throws IOException {
     log.info("Reading file info...");
-    BufferedReader reader = null;
-    try {
-      reader = new BufferedReader(new FileReader(dataFile));
-      boolean notDone = true;
-      while (notDone) {
-        String line = reader.readLine();
-        if (line != null && line.length() > 0) {
-          log.debug("Read line: {}", line);
-          processLine(line, data);
-        } else {
-          notDone = false;
-        }
+    for (String line : new FileLineIterable(dataFile)) {
+      if (line.length() > 0) {
+        log.debug("Read line: {}", line);
+        processLine(line, data);
       }
-    } finally {
-      IOUtils.quietClose(reader);
     }
   }