You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2016/07/11 19:44:21 UTC
hbase git commit: HBASE-11625 - Verifies data before building
HFileBlock. - Adds HFileBlock.Header class which contains information about
location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum.
Repository: hbase
Updated Branches:
refs/heads/branch-1.1 25c7dee21 -> 79b77e354
HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum.
Change-Id: I107e9636b28abb6b15ec329e885f1e31b1b1b988
Signed-off-by: stack <st...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/79b77e35
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/79b77e35
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/79b77e35
Branch: refs/heads/branch-1.1
Commit: 79b77e3542a93d362c5565c98ce8d8e1f0044337
Parents: 25c7dee
Author: Apekshit <ap...@gmail.com>
Authored: Thu May 5 17:05:17 2016 -0700
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Mon Jul 11 11:39:48 2016 -0700
----------------------------------------------------------------------
.../hadoop/hbase/io/hfile/ChecksumUtil.java | 80 ++++----
.../hadoop/hbase/io/hfile/HFileBlock.java | 187 ++++++++++---------
.../hadoop/hbase/io/hfile/TestChecksum.java | 60 ++++--
3 files changed, 190 insertions(+), 137 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 0e03a42..ecf5f00 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -22,11 +22,13 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.zip.Checksum;
+import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
+import org.apache.hadoop.util.DataChecksum;
/**
* Utility methods to compute and validate checksums.
@@ -90,41 +92,37 @@ public class ChecksumUtil {
}
/**
- * Validates that the data in the specified HFileBlock matches the
- * checksum. Generates the checksum for the data and
- * then validate that it matches the value stored in the header.
- * If there is a checksum mismatch, then return false. Otherwise
- * return true.
- * The header is extracted from the specified HFileBlock while the
- * data-to-be-verified is extracted from 'data'.
+ * Validates that the data in the specified HFileBlock matches the checksum. Generates the
+ * checksums for the data and then validate that it matches those stored in the end of the data.
+ * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
+ * @param path Path of the HFile to which the {@code data} belongs. Only used for logging.
+ * @param offset offset of the data being validated. Only used for logging.
+ * @param hdrSize Size of the block header in {@code data}. Only used for logging.
+ * @return True if checksum matches, else false.
*/
- static boolean validateBlockChecksum(Path path, HFileBlock block,
- byte[] data, int hdrSize) throws IOException {
-
- // If this is an older version of the block that does not have
- // checksums, then return false indicating that checksum verification
- // did not succeed. Actually, this methiod should never be called
- // when the minorVersion is 0, thus this is a defensive check for a
- // cannot-happen case. Since this is a cannot-happen case, it is
- // better to return false to indicate a checksum validation failure.
- if (!block.getHFileContext().isUseHBaseChecksum()) {
- return false;
- }
-
- // Get a checksum object based on the type of checksum that is
- // set in the HFileBlock header. A ChecksumType.NULL indicates that
- // the caller is not interested in validating checksums, so we
- // always return true.
- ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
+ static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize)
+ throws IOException {
+ // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
+ // so we always return true.
+ ChecksumType cktype =
+ ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
if (cktype == ChecksumType.NULL) {
return true; // No checkums validations needed for this block.
}
- Checksum checksumObject = cktype.getChecksumObject();
- checksumObject.reset();
-
// read in the stored value of the checksum size from the header.
- int bytesPerChecksum = block.getBytesPerChecksum();
-
+ int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
+ int onDiskDataSizeWithHeader =
+ buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
+
+ if (HFile.LOG.isTraceEnabled()) {
+ HFile.LOG.info("dataLength=" + buffer.capacity()
+ + ", sizeWithHeader=" + onDiskDataSizeWithHeader
+ + ", checksumType=" + cktype.getName()
+ + ", file=" + path.toString()
+ + ", offset=" + offset
+ + ", headerSize=" + hdrSize
+ + ", bytesPerChecksum=" + bytesPerChecksum);
+ }
// bytesPerChecksum is always larger than the size of the header
if (bytesPerChecksum < hdrSize) {
String msg = "Unsupported value of bytesPerChecksum. " +
@@ -133,19 +131,23 @@ public class ChecksumUtil {
HFile.LOG.warn(msg);
return false; // cannot happen case, unable to verify checksum
}
- // Extract the header and compute checksum for the header.
- ByteBuffer hdr = block.getBufferWithHeader();
- if (hdr.hasArray()) {
- checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
+ byte[] data;
+ if (buffer.hasArray()) {
+ data = buffer.array();
} else {
- checksumObject.update(ByteBufferUtils.toBytes(hdr, 0, hdrSize), 0, hdrSize);
+ data = ByteBufferUtils.toBytes(buffer, 0);
}
+ Checksum checksumObject = cktype.getChecksumObject();
+ checksumObject.reset();
+ // Extract the header and compute checksum for the header.
+ checksumObject.update(data, 0, hdrSize);
+
int off = hdrSize;
int consumed = hdrSize;
- int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
- int cksumOffset = block.getOnDiskDataSizeWithHeader();
-
+ int cksumOffset = onDiskDataSizeWithHeader;
+ int bytesLeft = cksumOffset - off;
+
// validate each chunk
while (bytesLeft > 0) {
int thisChunkSize = bytesPerChecksum - consumed;
@@ -161,7 +163,7 @@ public class ChecksumUtil {
checksumObject.getValue() +
", total data size " + data.length +
" Checksum data range offset " + off + " len " + count +
- HFileBlock.toStringHeader(block.getBufferReadOnly());
+ HFileBlock.toStringHeader(buffer);
HFile.LOG.warn(msg);
if (generateExceptions) {
throw new IOException(msg); // this is only for unit tests
http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 0a95888..08bc87e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -156,6 +156,26 @@ public class HFileBlock implements Cacheable {
.registerDeserializer(blockDeserializer);
}
+ // Todo: encapsulate Header related logic in this inner class.
+ static class Header {
+ // Format of header is:
+ // 8 bytes - block magic
+ // 4 bytes int - onDiskSizeWithoutHeader
+ // 4 bytes int - uncompressedSizeWithoutHeader
+ // 8 bytes long - prevBlockOffset
+ // The following 3 are only present if header contains checksum information
+ // 1 byte - checksum type
+ // 4 byte int - bytes per checksum
+ // 4 byte int - onDiskDataSizeWithHeader
+ static int BLOCK_MAGIC_INDEX = 0;
+ static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
+ static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
+ static int PREV_BLOCK_OFFSET_INDEX = 16;
+ static int CHECKSUM_TYPE_INDEX = 24;
+ static int BYTES_PER_CHECKSUM_INDEX = 25;
+ static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
+ }
+
/** Type of block. Header field 0. */
private BlockType blockType;
@@ -251,15 +271,15 @@ public class HFileBlock implements Cacheable {
HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
b.rewind();
blockType = BlockType.read(b);
- onDiskSizeWithoutHeader = b.getInt();
- uncompressedSizeWithoutHeader = b.getInt();
- prevBlockOffset = b.getLong();
+ onDiskSizeWithoutHeader = b.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+ uncompressedSizeWithoutHeader = b.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
+ prevBlockOffset = b.getLong(Header.PREV_BLOCK_OFFSET_INDEX);
HFileContextBuilder contextBuilder = new HFileContextBuilder();
contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
if (usesHBaseChecksum) {
- contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
- contextBuilder.withBytesPerCheckSum(b.getInt());
- this.onDiskDataSizeWithHeader = b.getInt();
+ contextBuilder.withChecksumType(ChecksumType.codeToType(b.get(Header.CHECKSUM_TYPE_INDEX)));
+ contextBuilder.withBytesPerCheckSum(b.getInt(Header.BYTES_PER_CHECKSUM_INDEX));
+ this.onDiskDataSizeWithHeader = b.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
} else {
contextBuilder.withChecksumType(ChecksumType.NULL);
contextBuilder.withBytesPerCheckSum(0);
@@ -484,23 +504,21 @@ public class HFileBlock implements Cacheable {
/**
* Called after reading a block with provided onDiskSizeWithHeader.
*/
- private void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader)
- throws IOException {
- if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
+ private static void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader,
+ int actualOnDiskSizeWithoutHeader, ByteBuffer buf, long offset) throws IOException {
+ if (actualOnDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
+ // We make the read-only copy here instead of when passing the parameter to this function
+ // to make duplicates only in failure cases, instead of every single time.
+ ByteBuffer bufReadOnly = buf.asReadOnlyBuffer();
String dataBegin = null;
- if (buf.hasArray()) {
- dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset(), Math.min(32, buf.limit()));
- } else {
- ByteBuffer bufDup = getBufferReadOnly();
- byte[] dataBeginBytes = new byte[Math.min(32, bufDup.limit() - bufDup.position())];
- bufDup.get(dataBeginBytes);
- dataBegin = Bytes.toStringBinary(dataBeginBytes);
- }
+ byte[] dataBeginBytes = new byte[Math.min(32, bufReadOnly.limit() - bufReadOnly.position())];
+ bufReadOnly.get(dataBeginBytes);
+ dataBegin = Bytes.toStringBinary(dataBeginBytes);
String blockInfoMsg =
"Block offset: " + offset + ", data starts with: " + dataBegin;
throw new IOException("On-disk size without header provided is "
+ expectedOnDiskSizeWithoutHeader + ", but block "
- + "header contains " + onDiskSizeWithoutHeader + ". " +
+ + "header contains " + actualOnDiskSizeWithoutHeader + ". " +
blockInfoMsg);
}
}
@@ -595,13 +613,23 @@ public class HFileBlock implements Cacheable {
}
/** An additional sanity-check in case no compression or encryption is being used. */
- public void assumeUncompressed() throws IOException {
- if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
- totalChecksumBytes()) {
+ public static void verifyUncompressed(ByteBuffer buf, boolean useHBaseChecksum)
+ throws IOException {
+ int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+ int uncompressedSizeWithoutHeader = buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
+ int onDiskDataSizeWithHeader;
+ int checksumBytes = 0;
+ if (useHBaseChecksum) {
+ onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
+ checksumBytes = (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader,
+ buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX));
+ }
+
+ if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + checksumBytes) {
throw new IOException("Using no compression but "
+ "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
+ "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
- + ", numChecksumbytes=" + totalChecksumBytes());
+ + ", numChecksumbytes=" + checksumBytes);
}
}
@@ -1378,10 +1406,8 @@ public class HFileBlock implements Cacheable {
* -1 if it could not be determined
* @throws IOException
*/
- protected int readAtOffset(FSDataInputStream istream,
- byte[] dest, int destOffset, int size,
- boolean peekIntoNextBlock, long fileOffset, boolean pread)
- throws IOException {
+ protected int readAtOffset(FSDataInputStream istream, byte[] dest, int destOffset, int size,
+ boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
if (peekIntoNextBlock &&
destOffset + size + hdrSize > dest.length) {
// We are asked to read the next block's header as well, but there is
@@ -1501,10 +1527,8 @@ public class HFileBlock implements Cacheable {
boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
- HFileBlock blk = readBlockDataInternal(is, offset,
- onDiskSizeWithHeaderL,
- uncompressedSize, pread,
- doVerificationThruHBaseChecksum);
+ HFileBlock blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL,
+ uncompressedSize, pread, doVerificationThruHBaseChecksum);
if (blk == null) {
HFile.LOG.warn("HBase checksum verification failed for file " +
path + " at offset " +
@@ -1530,9 +1554,8 @@ public class HFileBlock implements Cacheable {
// a few more than precisely this number.
is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
doVerificationThruHBaseChecksum = false;
- blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
- uncompressedSize, pread,
- doVerificationThruHBaseChecksum);
+ blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL, uncompressedSize,
+ pread, doVerificationThruHBaseChecksum);
if (blk != null) {
HFile.LOG.warn("HDFS checksum verification suceeded for file " +
path + " at offset " +
@@ -1562,7 +1585,7 @@ public class HFileBlock implements Cacheable {
* Reads a version 2 block.
*
* @param offset the offset in the stream to read at
- * @param onDiskSizeWithHeaderL the on-disk size of the block, including
+ * @param onDiskSizeWithHeader the on-disk size of the block, including
* the header, or -1 if unknown
* @param uncompressedSize the uncompressed size of the the block. Always
* expected to be -1. This parameter is only used in version 1.
@@ -1571,13 +1594,13 @@ public class HFileBlock implements Cacheable {
* If HBase checksum is switched off, then use HDFS checksum.
* @return the HFileBlock or null if there is a HBase checksum mismatch
*/
- private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
- long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
+ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
+ int onDiskSizeWithHeader, int uncompressedSize, boolean pread,
boolean verifyChecksum)
throws IOException {
if (offset < 0) {
throw new IOException("Invalid offset=" + offset + " trying to read "
- + "block (onDiskSize=" + onDiskSizeWithHeaderL
+ + "block (onDiskSize=" + onDiskSizeWithHeader
+ ", uncompressedSize=" + uncompressedSize + ")");
}
@@ -1586,15 +1609,14 @@ public class HFileBlock implements Cacheable {
"the uncompressed size parameter");
}
- if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
- || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
- throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
+ if ((onDiskSizeWithHeader < hdrSize && onDiskSizeWithHeader != -1)
+ || onDiskSizeWithHeader >= Integer.MAX_VALUE) {
+ throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeader
+ ": expected to be at least " + hdrSize
+ " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
+ offset + ", uncompressedSize=" + uncompressedSize + ")");
}
- int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
// See if we can avoid reading the header. This is desirable, because
// we will not incur a backward seek operation if we have already
// read this block's header as part of the previous read's look-ahead.
@@ -1609,7 +1631,6 @@ public class HFileBlock implements Cacheable {
int nextBlockOnDiskSize = 0;
byte[] onDiskBlock = null;
- HFileBlock b = null;
if (onDiskSizeWithHeader > 0) {
// We know the total on-disk size. Read the entire block into memory,
// then parse the header. This code path is used when
@@ -1635,28 +1656,12 @@ public class HFileBlock implements Cacheable {
} else {
headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
}
- // We know the total on-disk size but not the uncompressed size. Parse the header.
- try {
- // TODO: FIX!!! Expensive parse just to get a length
- b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
- } catch (IOException ex) {
- // Seen in load testing. Provide comprehensive debug info.
- throw new IOException("Failed to read compressed block at "
- + offset
- + ", onDiskSizeWithoutHeader="
- + onDiskSizeWithHeader
- + ", preReadHeaderSize="
- + hdrSize
- + ", header.length="
- + prefetchedHeader.header.length
- + ", header bytes: "
- + Bytes.toStringBinary(prefetchedHeader.header, 0,
- hdrSize), ex);
- }
// if the caller specifies a onDiskSizeWithHeader, validate it.
- int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
- assert onDiskSizeWithoutHeader >= 0;
- b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
+ int expectedOnDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
+ int actualOnDiskSizeWithoutHeader =
+ headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+ validateOnDiskSizeWithoutHeader(expectedOnDiskSizeWithoutHeader,
+ actualOnDiskSizeWithoutHeader, headerBuf, offset);
} else {
// Check headerBuf to see if we have read this block's header as part of
// reading the previous block. This is an optimization of peeking into
@@ -1677,22 +1682,21 @@ public class HFileBlock implements Cacheable {
readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
hdrSize, false, offset, pread);
}
- // TODO: FIX!!! Expensive parse just to get a length
- b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
- onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
- // headerBuf is HBB
+ int onDiskSizeWithoutHeader = headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
+ onDiskSizeWithHeader = onDiskSizeWithoutHeader + hdrSize;
+ onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize];
System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
nextBlockOnDiskSize =
- readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
- - hdrSize, true, offset + hdrSize, pread);
- onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
+ readAtOffset(is, onDiskBlock, hdrSize, onDiskSizeWithHeader - hdrSize, true,
+ offset + hdrSize, pread);
}
+ ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader);
if (!fileContext.isCompressedOrEncrypted()) {
- b.assumeUncompressed();
+ verifyUncompressed(headerBuf, fileContext.isUseHBaseChecksum());
}
- if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+ if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) {
return null; // checksum mismatch
}
@@ -1700,8 +1704,7 @@ public class HFileBlock implements Cacheable {
// If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
// contains the header of next block, so no need to set next
// block's header in it.
- b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
- this.fileContext.isUseHBaseChecksum());
+ HFileBlock b = new HFileBlock(onDiskBlockByteBuffer, this.fileContext.isUseHBaseChecksum());
b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
@@ -1736,14 +1739,21 @@ public class HFileBlock implements Cacheable {
}
/**
- * Generates the checksum for the header as well as the data and
- * then validates that it matches the value stored in the header.
- * If there is a checksum mismatch, then return false. Otherwise
- * return true.
+ * Generates the checksum for the header as well as the data and then validates it.
+ * If the block doesn't uses checksum, returns false.
+ * @return True if checksum matches, else false.
*/
- protected boolean validateBlockChecksum(HFileBlock block, byte[] data, int hdrSize)
+ protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize)
throws IOException {
- return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
+ // If this is an older version of the block that does not have checksums, then return false
+ // indicating that checksum verification did not succeed. Actually, this method should never
+ // be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen
+ // case. Since this is a cannot-happen case, it is better to return false to indicate a
+ // checksum validation failure.
+ if (!fileContext.isUseHBaseChecksum()) {
+ return false;
+ }
+ return ChecksumUtil.validateChecksum(data, path, offset, hdrSize);
}
@Override
@@ -1846,19 +1856,23 @@ public class HFileBlock implements Cacheable {
}
/**
- * Calcuate the number of bytes required to store all the checksums
- * for this block. Each checksum value is a 4 byte integer.
+ * Calculate the number of bytes required to store all the checksums for this block. Each
+ * checksum value is a 4 byte integer ({@link HFileBlock#CHECKSUM_SIZE}).
*/
int totalChecksumBytes() {
+ return HFileBlock.totalChecksumBytes(this.fileContext, onDiskDataSizeWithHeader);
+ }
+
+ private static int totalChecksumBytes(HFileContext fileContext, int onDiskDataSizeWithHeader) {
// If the hfile block has minorVersion 0, then there are no checksum
// data to validate. Similarly, a zero value in this.bytesPerChecksum
// indicates that cached blocks do not have checksum data because
// checksums were already validated when the block was read from disk.
- if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
+ if (!fileContext.isUseHBaseChecksum() || fileContext.getBytesPerChecksum() == 0) {
return 0;
}
return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader,
- this.fileContext.getBytesPerChecksum());
+ fileContext.getBytesPerChecksum());
}
/**
@@ -1908,10 +1922,9 @@ public class HFileBlock implements Cacheable {
* This is mostly helpful for debugging. This assumes that the block
* has minor version > 0.
*/
- static String toStringHeader(ByteBuffer buf) throws IOException {
+ public static String toStringHeader(ByteBuffer buf) throws IOException {
byte[] magicBuf = new byte[Math.min(buf.limit() - buf.position(), BlockType.MAGIC_LENGTH)];
buf.get(magicBuf);
- BlockType bt = BlockType.parse(magicBuf, 0, BlockType.MAGIC_LENGTH);
int compressedBlockSizeNoHeader = buf.getInt();
int uncompressedBlockSizeNoHeader = buf.getInt();
long prevBlockOffset = buf.getLong();
@@ -1919,7 +1932,7 @@ public class HFileBlock implements Cacheable {
long bytesPerChecksum = buf.getInt();
long onDiskDataSizeWithHeader = buf.getInt();
return " Header dump: magic: " + Bytes.toString(magicBuf) +
- " blockType " + bt +
+ " blockType " + magicBuf +
" compressedBlockSizeNoHeader " +
compressedBlockSizeNoHeader +
" uncompressedBlockSizeNoHeader " +
http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index 011ddbf..8596bf5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -65,6 +65,7 @@ public class TestChecksum {
public void setUp() throws Exception {
fs = HFileSystem.get(TEST_UTIL.getConfiguration());
hfs = (HFileSystem)fs;
+ ChecksumUtil.generateExceptionForChecksumFailureForTest(false);
}
/**
@@ -113,7 +114,7 @@ public class TestChecksum {
.withIncludesTags(useTags)
.withHBaseCheckSum(true)
.build();
- HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta);
+ HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
b.sanityCheck();
assertEquals(4936, b.getUncompressedSizeWithoutHeader());
@@ -155,7 +156,7 @@ public class TestChecksum {
HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false);
assertEquals(false, newfs.useHBaseChecksum());
is = new FSDataInputStreamWrapper(newfs, path);
- hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta);
+ hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta);
b = hbr.readBlockData(0, -1, -1, pread);
is.close();
b.sanityCheck();
@@ -176,7 +177,7 @@ public class TestChecksum {
}
}
- /**
+ /**
* Test different values of bytesPerChecksum
*/
@Test
@@ -274,20 +275,57 @@ public class TestChecksum {
}
/**
- * A class that introduces hbase-checksum failures while
- * reading data from hfiles. This should trigger the hdfs level
- * checksum validations.
+ * This class is to test checksum behavior when data is corrupted. It mimics the following
+ * behavior:
+ * - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum
+ * is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data.
+ * - When fs checksum is enabled, hdfs will get a different copy from another node, and will
+ * always return correct data. So we don't corrupt the data when verifyChecksum for hbase is
+ * off.
*/
- static private class FSReaderImplTest extends HFileBlock.FSReaderImpl {
- public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
+ static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl {
+ /**
+ * If set to true, corrupt reads using readAtOffset(...).
+ */
+ boolean corruptDataStream = false;
+
+ public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
Path path, HFileContext meta) throws IOException {
super(istream, fileSize, (HFileSystem) fs, path, meta);
}
@Override
- protected boolean validateBlockChecksum(HFileBlock block,
- byte[] data, int hdrSize) throws IOException {
- return false; // checksum validation failure
+ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
+ int onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, boolean verifyChecksum)
+ throws IOException {
+ if (verifyChecksum) {
+ corruptDataStream = true;
+ }
+ HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
+ uncompressedSize, pread, verifyChecksum);
+ corruptDataStream = false;
+ return b;
+ }
+
+ @Override
+ protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
+ boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
+ int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock,
+ fileOffset, pread);
+ if (!corruptDataStream) {
+ return returnValue;
+ }
+ // Corrupt 3rd character of block magic of next block's header.
+ if (peekIntoNextBlock) {
+ dest[destOffset + size + 3] = 0b00000000;
+ }
+ // We might be reading this block's header too, corrupt it.
+ dest[destOffset + 1] = 0b00000000;
+ // Corrupt non header data
+ if (size > hdrSize) {
+ dest[destOffset + hdrSize + 1] = 0b00000000;
+ }
+ return returnValue;
}
}
}