You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ji...@apache.org on 2012/02/16 19:58:19 UTC
svn commit: r1245118 - in
/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt
src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
Author: jitendra
Date: Thu Feb 16 18:58:19 2012
New Revision: 1245118
URL: http://svn.apache.org/viewvc?rev=1245118&view=rev
Log:
HDFS-2655. BlockReaderLocal#skip performs unnecessary IO. Contributed by Brandon Li.
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1245118&r1=1245117&r2=1245118&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Thu Feb 16 18:58:19 2012
@@ -132,6 +132,9 @@ Trunk (unreleased changes)
HDFS-2878. Fix TestBlockRecovery and move it back into main test directory.
(todd)
+ HDFS-2655. BlockReaderLocal#skip performs unnecessary IO. (Brandon Li
+ via jitendra)
+
OPTIMIZATIONS
HDFS-2477. Optimize computing the diff between a block report and the
namenode state. (Tomasz Nykiel via hairong)
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java?rev=1245118&r1=1245117&r2=1245118&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java Thu Feb 16 18:58:19 2012
@@ -369,26 +369,68 @@ class BlockReaderLocal implements BlockR
if (LOG.isDebugEnabled()) {
LOG.debug("skip " + n);
}
+ if (n <= 0) {
+ return 0;
+ }
if (!verifyChecksum) {
return dataIn.skip(n);
}
- // Skip by reading the data so we stay in sync with checksums.
- // This could be implemented more efficiently in the future to
- // skip to the beginning of the appropriate checksum chunk
- // and then only read to the middle of that chunk.
+
+ // caller made sure newPosition is not beyond EOF.
+ int remaining = dataBuff.remaining();
+ int position = dataBuff.position();
+ int newPosition = position + (int)n;
+
+ // if the new offset is already read into dataBuff, just reposition
+ if (n <= remaining) {
+ assert offsetFromChunkBoundary == 0;
+ dataBuff.position(newPosition);
+ return n;
+ }
+
+ // for small gap, read through to keep the data/checksum in sync
+ if (n - remaining <= bytesPerChecksum) {
+ dataBuff.position(position + remaining);
+ if (skipBuf == null) {
+ skipBuf = new byte[bytesPerChecksum];
+ }
+ int ret = read(skipBuf, 0, (int)(n - remaining));
+ return ret;
+ }
+
+ // optimize for big gap: discard the current buffer, skip to
+ // the beginning of the appropriate checksum chunk and then
+ // read to the middle of that chunk to be in sync with checksums.
+ this.offsetFromChunkBoundary = newPosition % bytesPerChecksum;
+ long toskip = n - remaining - this.offsetFromChunkBoundary;
+
+ dataBuff.clear();
+ checksumBuff.clear();
+
+ long dataSkipped = dataIn.skip(toskip);
+ if (dataSkipped != toskip) {
+ throw new IOException("skip error in data input stream");
+ }
+ long checkSumOffset = (dataSkipped / bytesPerChecksum) * checksumSize;
+ if (checkSumOffset > 0) {
+ long skipped = checksumIn.skip(checkSumOffset);
+ if (skipped != checkSumOffset) {
+ throw new IOException("skip error in checksum input stream");
+ }
+ }
+
+ // read into the middle of the chunk
if (skipBuf == null) {
skipBuf = new byte[bytesPerChecksum];
}
- long nSkipped = 0;
- while ( nSkipped < n ) {
- int toSkip = (int)Math.min(n-nSkipped, skipBuf.length);
- int ret = read(skipBuf, 0, toSkip);
- if ( ret <= 0 ) {
- return nSkipped;
- }
- nSkipped += ret;
+ assert skipBuf.length == bytesPerChecksum;
+ assert this.offsetFromChunkBoundary < bytesPerChecksum;
+ int ret = read(skipBuf, 0, this.offsetFromChunkBoundary);
+ if (ret == -1) { // EOS
+ return toskip;
+ } else {
+ return (toskip + ret);
}
- return nSkipped;
}
@Override
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java?rev=1245118&r1=1245117&r2=1245118&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java Thu Feb 16 18:58:19 2012
@@ -238,7 +238,53 @@ public class TestShortCircuitLocalRead {
cluster.shutdown();
}
}
+
+ @Test
+ public void testSkipWithVerifyChecksum() throws IOException {
+ int size = blockSize;
+ Configuration conf = new Configuration();
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+ conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+ UserGroupInformation.getCurrentUser().getShortUserName());
+ if (simulatedStorage) {
+ conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
+ }
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
+ .format(true).build();
+ FileSystem fs = cluster.getFileSystem();
+ try {
+ // check that / exists
+ Path path = new Path("/");
+ assertTrue("/ should be a directory", fs.getFileStatus(path)
+ .isDirectory() == true);
+
+ byte[] fileData = AppendTestUtil.randomBytes(seed, size*3);
+ // create a new file in home directory. Do not close it.
+ Path file1 = new Path("filelocal.dat");
+ FSDataOutputStream stm = createFile(fs, file1, 1);
+ // write to file
+ stm.write(fileData);
+ stm.close();
+
+ // now test the skip function
+ FSDataInputStream instm = fs.open(file1);
+ byte[] actual = new byte[fileData.length];
+ // read something from the block first, otherwise BlockReaderLocal.skip()
+ // will not be invoked
+ int nread = instm.read(actual, 0, 3);
+ long skipped = 2*size+3;
+ instm.seek(skipped);
+ nread = instm.read(actual, (int)(skipped + nread), 3);
+ instm.close();
+
+ } finally {
+ fs.close();
+ cluster.shutdown();
+ }
+ }
+
/**
* Test to run benchmarks between shortcircuit read vs regular read with
* specified number of threads simultaneously reading.