You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2010/01/08 00:19:39 UTC

svn commit: r897051 - in /hadoop/hdfs/trunk: CHANGES.txt src/java/org/apache/hadoop/hdfs/DFSClient.java src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java

Author: tomwhite
Date: Thu Jan  7 23:19:38 2010
New Revision: 897051

URL: http://svn.apache.org/viewvc?rev=897051&view=rev
Log:
HDFS-755. Read multiple checksum chunks at once in DFSInputStream. Contributed by Todd Lipcon.

Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/DFSClient.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=897051&r1=897050&r2=897051&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Thu Jan  7 23:19:38 2010
@@ -30,6 +30,9 @@
 
     HDFS-685. Use the user-to-groups mapping service in the NameNode. (boryas, acmurthy)
 
+    HDFS-755. Read multiple checksum chunks at once in DFSInputStream.
+    (Todd Lipcon via tomwhite)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/DFSClient.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/DFSClient.java?rev=897051&r1=897050&r2=897051&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/DFSClient.java Thu Jan  7 23:19:38 2010
@@ -1503,26 +1503,60 @@
         }
       }
 
-      int chunkLen = Math.min(dataLeft, bytesPerChecksum);
-      
-      if ( chunkLen > 0 ) {
-        // len should be >= chunkLen
-        IOUtils.readFully(in, buf, offset, chunkLen);
-        checksumBytes.get(checksumBuf, 0, checksumSize);
+      // Sanity checks
+      assert len >= bytesPerChecksum;
+      assert checksum != null;
+      assert checksumSize == 0 || (checksumBuf.length % checksumSize == 0);
+
+
+      int checksumsToRead, bytesToRead;
+
+      if (checksumSize > 0) {
+
+        // How many chunks left in our stream - this is a ceiling
+        // since we may have a partial chunk at the end of the file
+        int chunksLeft = (dataLeft - 1) / bytesPerChecksum + 1;
+
+        // How many chunks we can fit in databuffer
+        //  - note this is a floor since we always read full chunks
+        int chunksCanFit = Math.min(len / bytesPerChecksum,
+                                    checksumBuf.length / checksumSize);
+
+        // How many chunks should we read
+        checksumsToRead = Math.min(chunksLeft, chunksCanFit);
+        // How many bytes should we actually read
+        bytesToRead = Math.min(
+          checksumsToRead * bytesPerChecksum, // full chunks
+          dataLeft); // in case we have a partial
+      } else {
+        // no checksum
+        bytesToRead = Math.min(dataLeft, len);
+        checksumsToRead = 0;
+      }
+
+      if ( bytesToRead > 0 ) {
+        // Assert we have enough space
+        assert bytesToRead <= len;
+        assert checksumBytes.remaining() >= checksumSize * checksumsToRead;
+        assert checksumBuf.length >= checksumSize * checksumsToRead;
+        IOUtils.readFully(in, buf, offset, bytesToRead);
+        checksumBytes.get(checksumBuf, 0, checksumSize * checksumsToRead);
       }
       
-      dataLeft -= chunkLen;
+      dataLeft -= bytesToRead;
+      assert dataLeft >= 0;
+
       lastChunkOffset = chunkOffset;
-      lastChunkLen = chunkLen;
+      lastChunkLen = bytesToRead;
       
-      if ((dataLeft == 0 && isLastPacket) || chunkLen == 0) {
+      if ((dataLeft == 0 && isLastPacket) || bytesToRead == 0) {
         gotEOS = true;
       }
-      if ( chunkLen == 0 ) {
+      if ( bytesToRead == 0 ) {
         return -1;
       }
-      
-      return chunkLen;
+
+      return bytesToRead;
     }
     
     private BlockReader( String file, long blockId, DataInputStream in, 

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java?rev=897051&r1=897050&r2=897051&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFSInputChecker.java Thu Jan  7 23:19:38 2010
@@ -212,19 +212,26 @@
   private void testChecker(FileSystem fileSys, boolean readCS)
   throws Exception {
     Path file = new Path("try.dat");
-    if( readCS ) {
-      writeFile(fileSys, file);
-    } else {
-      writeFile(fileSys, file);
+    writeFile(fileSys, file);
+
+    try {
+      if (!readCS) {
+        fileSys.setVerifyChecksum(false);
+      }
+
+      stm = fileSys.open(file);
+      checkReadAndGetPos();
+      checkSeek();
+      checkSkip();
+      //checkMark
+      assertFalse(stm.markSupported());
+      stm.close();
+    } finally {
+      if (!readCS) {
+        fileSys.setVerifyChecksum(true);
+      }
+      cleanupFile(fileSys, file);
     }
-    stm = fileSys.open(file);
-    checkReadAndGetPos();
-    checkSeek();
-    checkSkip();
-    //checkMark
-    assertFalse(stm.markSupported());
-    stm.close();
-    cleanupFile(fileSys, file);
   }
   
   private void testFileCorruption(LocalFileSystem fileSys) throws IOException {