You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/08/25 21:31:41 UTC
svn commit: r436916 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/mapred/TextInputFormat.java
Author: cutting
Date: Fri Aug 25 12:31:41 2006
New Revision: 436916
URL: http://svn.apache.org/viewvc?rev=436916&view=rev
Log:
HADOOP-473. Fix TextInputFormat to correctly handle more EOL formats. Contributed by Dennis Kubes & James White.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=436916&r1=436915&r2=436916&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Aug 25 12:31:41 2006
@@ -69,6 +69,11 @@
scheduling priority for daemons. (Vetle Roeim via cutting)
+17. HADOOP-473. Fix TextInputFormat to correctly handle more EOL
+ formats. Things now work correctly with CR, LF or CRLF.
+ (Dennis Kubes & James White via cutting)
+
+
Release 0.5.0 - 2006-08-04
1. HADOOP-352. Fix shell scripts to use /bin/sh instead of
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java?rev=436916&r1=436915&r2=436916&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java Fri Aug 25 12:31:41 2006
@@ -44,7 +44,16 @@
in.seek(start-1);
while (in.getPos() < end) { // scan to the next newline in the file
char c = (char)in.read();
- if (c == '\r' || c == '\n') {
+ if (c == '\n')
+ break;
+
+ if (c == '\r') {
+ long curPos = in.getPos();
+ char nextC = (char)in.read();
+ if (nextC != '\n') {
+ in.seek(curPos);
+ }
+
break;
}
}
@@ -90,8 +99,18 @@
break;
char c = (char)b; // bug: this assumes eight-bit characters.
- if (c == '\r' || c == '\n')
+ if (c == '\n')
break;
+
+ if (c == '\r') {
+ long curPos = in.getPos();
+ char nextC = (char)in.read();
+ if (nextC != '\n') {
+ in.seek(curPos);
+ }
+
+ break;
+ }
buffer.append(c);
}