You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/09/02 18:20:31 UTC
svn commit: r691299 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common:
FileLineIterable.java FileLineIterator.java
Author: srowen
Date: Tue Sep 2 09:20:30 2008
New Revision: 691299
URL: http://svn.apache.org/viewvc?rev=691299&view=rev
Log:
Enable support for more character encodings
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java?rev=691299&r1=691298&r2=691299&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterable.java Tue Sep 2 09:20:30 2008
@@ -20,23 +20,36 @@
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
+import java.nio.charset.Charset;
/**
* Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines.
- * This assumes the text file is UTF-8 encoded and that its lines are delimited in a manner
+ * This assumes the text file's lines are delimited in a manner
* consistent with how {@link java.io.BufferedReader} defines lines.
*/
public final class FileLineIterable implements Iterable<String> {
private final File file;
+ private final Charset encoding;
+ /**
+ * Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding.
+ */
public FileLineIterable(File file) {
+ this(file, Charset.forName("UTF-8"));
+ }
+
+ /**
+ * Creates a {@link FileLineIterable} over a given file, using the given encoding.
+ */
+ public FileLineIterable(File file, Charset encoding) {
this.file = file;
+ this.encoding = encoding;
}
public Iterator<String> iterator() {
try {
- return new FileLineIterator(file);
+ return new FileLineIterator(file, encoding);
} catch (IOException ioe) {
throw new IllegalStateException(ioe);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=691299&r1=691298&r2=691299&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Tue Sep 2 09:20:30 2008
@@ -30,8 +30,8 @@
import java.nio.charset.Charset;
/**
- * Iterates over the lines of a text file. This assumes the text file is UTF-8 encoded
- * and that its lines are delimited in a manner consistent with how {@link BufferedReader}
+ * Iterates over the lines of a text file. This assumes the text file's lines
+ * are delimited in a manner consistent with how {@link BufferedReader}
* defines lines.
*/
public final class FileLineIterator implements Iterator<String>, Closeable {
@@ -40,12 +40,24 @@
private String nextLine;
/**
+ * Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
+ *
* @throws FileNotFoundException if the file does not exist
* @throws IOException if the file cannot be read
*/
public FileLineIterator(File file) throws IOException {
+ this(file, Charset.forName("UTF-8"));
+ }
+
+ /**
+ * Creates a {@link FileLineIterator} over a given file, using the given encoding.
+ *
+ * @throws FileNotFoundException if the file does not exist
+ * @throws IOException if the file cannot be read
+ */
+ public FileLineIterator(File file, Charset encoding) throws IOException {
InputStream is = new FileInputStream(file);
- reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
+ reader = new BufferedReader(new InputStreamReader(is, encoding));
nextLine = reader.readLine();
}