You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/08/25 21:31:41 UTC

svn commit: r436916 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/TextInputFormat.java

Author: cutting
Date: Fri Aug 25 12:31:41 2006
New Revision: 436916

URL: http://svn.apache.org/viewvc?rev=436916&view=rev
Log:
HADOOP-473.  Fix TextInputFormat to correctly handle more EOL formats.  Contributed by Dennis Kubes & James White.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=436916&r1=436915&r2=436916&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Aug 25 12:31:41 2006
@@ -69,6 +69,11 @@
     scheduling priority for daemons.  (Vetle Roeim via cutting)
 
 
+17. HADOOP-473.  Fix TextInputFormat to correctly handle more EOL
+    formats.  Things now work correctly with CR, LF or CRLF.
+    (Dennis Kubes & James White via cutting)
+
+
 Release 0.5.0 - 2006-08-04
 
  1. HADOOP-352.  Fix shell scripts to use /bin/sh instead of

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java?rev=436916&r1=436915&r2=436916&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java Fri Aug 25 12:31:41 2006
@@ -44,7 +44,16 @@
       in.seek(start-1);
       while (in.getPos() < end) {    // scan to the next newline in the file
         char c = (char)in.read();
-        if (c == '\r' || c == '\n') {
+        if (c == '\n')
+          break;
+          
+        if (c == '\r') {       
+          long curPos = in.getPos();
+          char nextC = (char)in.read();
+          if (nextC != '\n') {
+            in.seek(curPos);
+          }
+
           break;
         }
       }
@@ -90,8 +99,18 @@
         break;
 
       char c = (char)b;              // bug: this assumes eight-bit characters.
-      if (c == '\r' || c == '\n')
+      if (c == '\n')
         break;
+        
+      if (c == '\r') {       
+        long curPos = in.getPos();
+        char nextC = (char)in.read();
+        if (nextC != '\n') {
+          in.seek(curPos);
+        }
+
+        break;
+      }
 
       buffer.append(c);
     }