You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/09/02 18:20:31 UTC

svn commit: r691299 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common: FileLineIterable.java FileLineIterator.java

Author: srowen
Date: Tue Sep  2 09:20:30 2008
New Revision: 691299

URL: http://svn.apache.org/viewvc?rev=691299&view=rev
Log:
Enable support for more character encodings

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java?rev=691299&r1=691298&r2=691299&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java Tue Sep  2 09:20:30 2008
@@ -20,23 +20,36 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
+import java.nio.charset.Charset;
 
 /**
  * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines.
- * This assumes the text file is UTF-8 encoded and that its lines are delimited in a manner
+ * This assumes the text file's lines are delimited in a manner
  * consistent with how {@link java.io.BufferedReader} defines lines.
  */
 public final class FileLineIterable implements Iterable<String> {
 
   private final File file;
+  private final Charset encoding;
 
+  /**
+   * Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding.
+   */
   public FileLineIterable(File file) {
+    this(file, Charset.forName("UTF-8"));
+  }
+
+  /**
+   * Creates a {@link FileLineIterable} over a given file, using the given encoding.
+   */
+  public FileLineIterable(File file, Charset encoding) {
     this.file = file;
+    this.encoding = encoding;
   }
 
   public Iterator<String> iterator() {
     try {
-      return new FileLineIterator(file);
+      return new FileLineIterator(file, encoding);
     } catch (IOException ioe) {
       throw new IllegalStateException(ioe);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=691299&r1=691298&r2=691299&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Tue Sep  2 09:20:30 2008
@@ -30,8 +30,8 @@
 import java.nio.charset.Charset;
 
 /**
- * Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
- * and that its lines are delimited in a manner consistent with how {@link BufferedReader}
+ * Iterates over the lines of a text file. This assumes the text file's lines
+ * are delimited in a manner consistent with how {@link BufferedReader}
  * defines lines.
  */
 public final class FileLineIterator implements Iterator<String>, Closeable {
@@ -40,12 +40,24 @@
   private String nextLine;
 
   /**
+   * Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
+   *
    * @throws FileNotFoundException if the file does not exist
    * @throws IOException if the file cannot be read
    */
   public FileLineIterator(File file) throws IOException {
+    this(file, Charset.forName("UTF-8"));
+  }
+
+  /**
+   * Creates a {@link FileLineIterator} over a given file, using the given encoding.
+   *
+   * @throws FileNotFoundException if the file does not exist
+   * @throws IOException if the file cannot be read
+   */
+  public FileLineIterator(File file, Charset encoding) throws IOException {
     InputStream is = new FileInputStream(file);
-    reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
+    reader = new BufferedReader(new InputStreamReader(is, encoding));
     nextLine = reader.readLine();
   }