You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2014/09/18 15:03:20 UTC
[2/2] git commit: HBASE-11874 Support Cell to be passed to
StoreFile.Writer rather than KeyValue.
HBASE-11874 Support Cell to be passed to StoreFile.Writer rather than KeyValue.
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/68131674
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/68131674
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/68131674
Branch: refs/heads/master
Commit: 68131674235373d07c1683bb5937c7a81f6ffeac
Parents: fdbb5e9
Author: anoopsjohn <an...@gmail.com>
Authored: Thu Sep 18 18:32:45 2014 +0530
Committer: anoopsjohn <an...@gmail.com>
Committed: Thu Sep 18 18:32:45 2014 +0530
----------------------------------------------------------------------
.../java/org/apache/hadoop/hbase/CellUtil.java | 124 +++++++++++++++++++
.../org/apache/hadoop/hbase/KeyValueUtil.java | 19 ++-
.../io/encoding/BufferedDataBlockEncoder.java | 18 +--
.../io/encoding/CopyKeyDataBlockEncoder.java | 21 ++--
.../hbase/io/encoding/DataBlockEncoder.java | 5 +-
.../hbase/io/encoding/DiffKeyDeltaEncoder.java | 67 +++++-----
.../hadoop/hbase/io/encoding/EncodingState.java | 6 +-
.../hbase/io/encoding/FastDiffDeltaEncoder.java | 78 ++++++------
.../io/encoding/PrefixKeyDeltaEncoder.java | 71 +++++++++--
.../org/apache/hadoop/hbase/TestCellUtil.java | 45 +++++++
.../hbase/codec/prefixtree/PrefixTreeCodec.java | 10 +-
.../hbase/io/hfile/AbstractHFileWriter.java | 35 +++---
.../org/apache/hadoop/hbase/io/hfile/HFile.java | 9 +-
.../hadoop/hbase/io/hfile/HFileBlock.java | 9 +-
.../hbase/io/hfile/HFileDataBlockEncoder.java | 6 +-
.../io/hfile/HFileDataBlockEncoderImpl.java | 5 +-
.../hadoop/hbase/io/hfile/HFileWriterV2.java | 31 +++--
.../hadoop/hbase/io/hfile/HFileWriterV3.java | 11 +-
.../hbase/io/hfile/NoOpDataBlockEncoder.java | 21 ++--
.../hadoop/hbase/regionserver/StoreFile.java | 61 ++++-----
.../regionserver/StripeMultiFileWriter.java | 88 ++++++-------
.../hbase/regionserver/TimeRangeTracker.java | 11 +-
.../regionserver/compactions/Compactor.java | 5 +-
.../hbase/util/CompoundBloomFilterWriter.java | 2 +-
.../hadoop/hbase/io/hfile/KeySampler.java | 16 ++-
.../apache/hadoop/hbase/io/hfile/TestHFile.java | 3 +-
.../io/hfile/TestHFileBlockCompatibility.java | 6 +-
.../hbase/io/hfile/TestHFileEncryption.java | 3 +-
.../TestHFileInlineToRootChunkConversion.java | 19 ++-
.../hadoop/hbase/io/hfile/TestHFileSeek.java | 11 +-
30 files changed, 540 insertions(+), 276 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
index 1ae2108..b696f41 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase;
+import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
@@ -29,6 +30,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
@@ -560,4 +562,126 @@ public final class CellUtil {
return cell.getRowLength() + cell.getFamilyLength() + cell.getQualifierLength()
+ cell.getValueLength() + cell.getTagsLength() + KeyValue.TIMESTAMP_TYPE_SIZE;
}
+
+ /**
+ * Writes the Cell's key part as it would have serialized in a KeyValue. The format is <2 bytes
+ * rk len><rk><1 byte cf len><cf><qualifier><8 bytes
+ * timestamp><1 byte type>
+ * @param cell
+ * @param out
+ * @throws IOException
+ */
+ public static void writeFlatKey(Cell cell, DataOutputStream out) throws IOException {
+ short rowLen = cell.getRowLength();
+ out.writeShort(rowLen);
+ out.write(cell.getRowArray(), cell.getRowOffset(), rowLen);
+ byte fLen = cell.getFamilyLength();
+ out.writeByte(fLen);
+ out.write(cell.getFamilyArray(), cell.getFamilyOffset(), fLen);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
+ out.writeLong(cell.getTimestamp());
+ out.writeByte(cell.getTypeByte());
+ }
+
+ /**
+ * Write rowkey excluding the common part.
+ * @param cell
+ * @param rLen
+ * @param commonPrefix
+ * @param out
+ * @throws IOException
+ */
+ public static void writeRowKeyExcludingCommon(Cell cell, short rLen, int commonPrefix,
+ DataOutputStream out) throws IOException {
+ if (commonPrefix == 0) {
+ out.writeShort(rLen);
+ } else if (commonPrefix == 1) {
+ out.writeByte((byte) rLen);
+ commonPrefix--;
+ } else {
+ commonPrefix -= KeyValue.ROW_LENGTH_SIZE;
+ }
+ if (rLen > commonPrefix) {
+ out.write(cell.getRowArray(), cell.getRowOffset() + commonPrefix, rLen - commonPrefix);
+ }
+ }
+
+ /**
+ * Find length of common prefix in keys of the cells, considering key as byte[] if serialized in
+ * {@link KeyValue}. The key format is <2 bytes rk len><rk><1 byte cf
+ * len><cf><qualifier><8 bytes timestamp><1 byte type>
+ * @param c1
+ * the cell
+ * @param c2
+ * the cell
+ * @param bypassFamilyCheck
+ * when true assume the family bytes same in both cells. Pass it as true when dealing
+ * with Cells in same CF so as to avoid some checks
+ * @param withTsType
+ * when true check timestamp and type bytes also.
+ * @return length of common prefix
+ */
+ public static int findCommonPrefixInFlatKey(Cell c1, Cell c2, boolean bypassFamilyCheck,
+ boolean withTsType) {
+ // Compare the 2 bytes in RK length part
+ short rLen1 = c1.getRowLength();
+ short rLen2 = c2.getRowLength();
+ int commonPrefix = KeyValue.ROW_LENGTH_SIZE;
+ if (rLen1 != rLen2) {
+ // early out when the RK length itself is not matching
+ return ByteBufferUtils.findCommonPrefix(Bytes.toBytes(rLen1), 0, KeyValue.ROW_LENGTH_SIZE,
+ Bytes.toBytes(rLen2), 0, KeyValue.ROW_LENGTH_SIZE);
+ }
+ // Compare the RKs
+ int rkCommonPrefix = ByteBufferUtils.findCommonPrefix(c1.getRowArray(), c1.getRowOffset(),
+ rLen1, c2.getRowArray(), c2.getRowOffset(), rLen2);
+ commonPrefix += rkCommonPrefix;
+ if (rkCommonPrefix != rLen1) {
+ // Early out when RK is not fully matching.
+ return commonPrefix;
+ }
+ // Compare 1 byte CF length part
+ byte fLen1 = c1.getFamilyLength();
+ if (bypassFamilyCheck) {
+ // This flag will be true when caller is sure that the family will be same for both the cells
+ // Just make commonPrefix to increment by the family part
+ commonPrefix += KeyValue.FAMILY_LENGTH_SIZE + fLen1;
+ } else {
+ byte fLen2 = c2.getFamilyLength();
+ if (fLen1 != fLen2) {
+ // early out when the CF length itself is not matching
+ return commonPrefix;
+ }
+ // CF lengths are same so there is one more byte common in key part
+ commonPrefix += KeyValue.FAMILY_LENGTH_SIZE;
+ // Compare the CF names
+ int fCommonPrefix = ByteBufferUtils.findCommonPrefix(c1.getFamilyArray(),
+ c1.getFamilyOffset(), fLen1, c2.getFamilyArray(), c2.getFamilyOffset(), fLen2);
+ commonPrefix += fCommonPrefix;
+ if (fCommonPrefix != fLen1) {
+ return commonPrefix;
+ }
+ }
+ // Compare the Qualifiers
+ int qLen1 = c1.getQualifierLength();
+ int qLen2 = c2.getQualifierLength();
+ int qCommon = ByteBufferUtils.findCommonPrefix(c1.getQualifierArray(), c1.getQualifierOffset(),
+ qLen1, c2.getQualifierArray(), c2.getQualifierOffset(), qLen2);
+ commonPrefix += qCommon;
+ if (!withTsType || Math.max(qLen1, qLen2) != qCommon) {
+ return commonPrefix;
+ }
+ // Compare the timestamp parts
+ int tsCommonPrefix = ByteBufferUtils.findCommonPrefix(Bytes.toBytes(c1.getTimestamp()), 0,
+ KeyValue.TIMESTAMP_SIZE, Bytes.toBytes(c2.getTimestamp()), 0, KeyValue.TIMESTAMP_SIZE);
+ commonPrefix += tsCommonPrefix;
+ if (tsCommonPrefix != KeyValue.TIMESTAMP_SIZE) {
+ return commonPrefix;
+ }
+ // Compare the type
+ if (c1.getTypeByte() == c2.getTypeByte()) {
+ commonPrefix += KeyValue.TYPE_SIZE;
+ }
+ return commonPrefix;
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java
index 3e47a6f..230d169 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java
@@ -44,6 +44,11 @@ public class KeyValueUtil {
/**************** length *********************/
+ /**
+ * Returns number of bytes this cell would have been used if serialized as in {@link KeyValue}
+ * @param cell
+ * @return the length
+ */
public static int length(final Cell cell) {
return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
cell.getValueLength(), cell.getTagsLength(), true);
@@ -56,7 +61,13 @@ public class KeyValueUtil {
return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen));
}
- protected static int keyLength(final Cell cell) {
+ /**
+ * Returns number of bytes this cell's key part would have been used if serialized as in
+ * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
+ * @param cell
+ * @return the key length
+ */
+ public static int keyLength(final Cell cell) {
return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
}
@@ -93,7 +104,7 @@ public class KeyValueUtil {
public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
byte[] bytes = new byte[keyLength(cell)];
- appendKeyToByteArrayWithoutValue(cell, bytes, 0);
+ appendKeyTo(cell, bytes, 0);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.position(buffer.limit());//make it look as if each field were appended
return buffer;
@@ -106,7 +117,7 @@ public class KeyValueUtil {
return backingBytes;
}
- protected static int appendKeyToByteArrayWithoutValue(final Cell cell, final byte[] output,
+ public static int appendKeyTo(final Cell cell, final byte[] output,
final int offset) {
int nextOffset = offset;
nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
@@ -126,7 +137,7 @@ public class KeyValueUtil {
int pos = offset;
pos = Bytes.putInt(output, pos, keyLength(cell));
pos = Bytes.putInt(output, pos, cell.getValueLength());
- pos = appendKeyToByteArrayWithoutValue(cell, output, pos);
+ pos = appendKeyTo(cell, output, pos);
pos = CellUtil.copyValueTo(cell, output, pos);
if ((cell.getTagsLength() > 0)) {
pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
index 4772358..6a821d3 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
@@ -835,17 +835,17 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
/**
- * @param kv
+ * @param cell
* @param out
* @param encodingCtx
* @return unencoded size added
* @throws IOException
*/
- protected final int afterEncodingKeyValue(KeyValue kv, DataOutputStream out,
+ protected final int afterEncodingKeyValue(Cell cell, DataOutputStream out,
HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
int size = 0;
if (encodingCtx.getHFileContext().isIncludesTags()) {
- int tagsLength = kv.getTagsLength();
+ int tagsLength = cell.getTagsLength();
ByteBufferUtils.putCompressedInt(out, tagsLength);
// There are some tags to be written
if (tagsLength > 0) {
@@ -854,16 +854,16 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
// the tags using Dictionary compression in such a case
if (tagCompressionContext != null) {
tagCompressionContext
- .compressTags(out, kv.getTagsArray(), kv.getTagsOffset(), tagsLength);
+ .compressTags(out, cell.getTagsArray(), cell.getTagsOffset(), tagsLength);
} else {
- out.write(kv.getTagsArray(), kv.getTagsOffset(), tagsLength);
+ out.write(cell.getTagsArray(), cell.getTagsOffset(), tagsLength);
}
}
size += tagsLength + KeyValue.TAGS_LENGTH_SIZE;
}
if (encodingCtx.getHFileContext().isIncludesMvcc()) {
// Copy memstore timestamp from the byte buffer to the output stream.
- long memstoreTS = kv.getMvccVersion();
+ long memstoreTS = cell.getSequenceId();
WritableUtils.writeVLong(out, memstoreTS);
// TODO use a writeVLong which returns the #bytes written so that 2 time parsing can be
// avoided.
@@ -973,16 +973,16 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
@Override
- public int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException {
BufferedDataBlockEncodingState state = (BufferedDataBlockEncodingState) encodingCtx
.getEncodingState();
- int encodedKvSize = internalEncode(kv, (HFileBlockDefaultEncodingContext) encodingCtx, out);
+ int encodedKvSize = internalEncode(cell, (HFileBlockDefaultEncodingContext) encodingCtx, out);
state.unencodedDataSizeWritten += encodedKvSize;
return encodedKvSize;
}
- public abstract int internalEncode(KeyValue kv, HFileBlockDefaultEncodingContext encodingCtx,
+ public abstract int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingCtx,
DataOutputStream out) throws IOException;
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
index f2bcae4..ad60d6d 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
@@ -22,7 +22,10 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
@@ -36,28 +39,28 @@ import org.apache.hadoop.io.WritableUtils;
public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
@Override
- public int internalEncode(KeyValue kv, HFileBlockDefaultEncodingContext encodingContext,
+ public int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingContext,
DataOutputStream out) throws IOException {
- int klength = kv.getKeyLength();
- int vlength = kv.getValueLength();
+ int klength = KeyValueUtil.keyLength(cell);
+ int vlength = cell.getValueLength();
out.writeInt(klength);
out.writeInt(vlength);
- out.write(kv.getBuffer(), kv.getKeyOffset(), klength);
- out.write(kv.getValueArray(), kv.getValueOffset(), vlength);
+ CellUtil.writeFlatKey(cell, out);
+ out.write(cell.getValueArray(), cell.getValueOffset(), vlength);
int size = klength + vlength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
// Write the additional tag into the stream
if (encodingContext.getHFileContext().isIncludesTags()) {
- int tagsLength = kv.getTagsLength();
+ int tagsLength = cell.getTagsLength();
out.writeShort(tagsLength);
if (tagsLength > 0) {
- out.write(kv.getTagsArray(), kv.getTagsOffset(), tagsLength);
+ out.write(cell.getTagsArray(), cell.getTagsOffset(), tagsLength);
}
size += tagsLength + KeyValue.TAGS_LENGTH_SIZE;
}
if (encodingContext.getHFileContext().isIncludesMvcc()) {
- WritableUtils.writeVLong(out, kv.getMvccVersion());
- size += WritableUtils.getVIntSize(kv.getMvccVersion());
+ WritableUtils.writeVLong(out, cell.getSequenceId());
+ size += WritableUtils.getVIntSize(cell.getSequenceId());
}
return size;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
index 99f6a7f..e73b1e4 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
@@ -53,13 +52,13 @@ public interface DataBlockEncoder {
/**
* Encodes a KeyValue.
- * @param kv
+ * @param cell
* @param encodingCtx
* @param out
* @return unencoded kv size written
* @throws IOException
*/
- int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException;
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
index fc4c314..4def2b3 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
@@ -22,7 +22,10 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
@@ -192,59 +195,55 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
}
@Override
- public int internalEncode(KeyValue kv, HFileBlockDefaultEncodingContext encodingContext,
+ public int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingContext,
DataOutputStream out) throws IOException {
EncodingState state = encodingContext.getEncodingState();
- int size = compressSingleKeyValue(out, kv, state.prevKv);
- size += afterEncodingKeyValue(kv, out, encodingContext);
- state.prevKv = kv;
+ int size = compressSingleKeyValue(out, cell, state.prevCell);
+ size += afterEncodingKeyValue(cell, out, encodingContext);
+ state.prevCell = cell;
return size;
}
- private int compressSingleKeyValue(DataOutputStream out, KeyValue kv, KeyValue prevKv)
+ private int compressSingleKeyValue(DataOutputStream out, Cell cell, Cell prevCell)
throws IOException {
byte flag = 0;
- int kLength = kv.getKeyLength();
- int vLength = kv.getValueLength();
+ int kLength = KeyValueUtil.keyLength(cell);
+ int vLength = cell.getValueLength();
long timestamp;
long diffTimestamp = 0;
int diffTimestampFitsInBytes = 0;
int timestampFitsInBytes;
- int commonPrefix;
- byte[] curKvBuf = kv.getBuffer();
+ int commonPrefix = 0;
- if (prevKv == null) {
- timestamp = kv.getTimestamp();
+ if (prevCell == null) {
+ timestamp = cell.getTimestamp();
if (timestamp < 0) {
flag |= FLAG_TIMESTAMP_SIGN;
timestamp = -timestamp;
}
timestampFitsInBytes = ByteBufferUtils.longFitsIn(timestamp);
flag |= (timestampFitsInBytes - 1) << SHIFT_TIMESTAMP_LENGTH;
- commonPrefix = 0;
// put column family
- byte familyLength = kv.getFamilyLength();
+ byte familyLength = cell.getFamilyLength();
out.write(familyLength);
- out.write(kv.getFamilyArray(), kv.getFamilyOffset(), familyLength);
+ out.write(cell.getFamilyArray(), cell.getFamilyOffset(), familyLength);
} else {
// Finding common prefix
- int preKeyLength = prevKv.getKeyLength();
- commonPrefix = ByteBufferUtils.findCommonPrefix(curKvBuf, kv.getKeyOffset(), kLength
- - KeyValue.TIMESTAMP_TYPE_SIZE, prevKv.getBuffer(), prevKv.getKeyOffset(), preKeyLength
- - KeyValue.TIMESTAMP_TYPE_SIZE);
+ int preKeyLength = KeyValueUtil.keyLength(prevCell);
+ commonPrefix = CellUtil.findCommonPrefixInFlatKey(cell, prevCell, true, false);
if (kLength == preKeyLength) {
flag |= FLAG_SAME_KEY_LENGTH;
}
- if (vLength == prevKv.getValueLength()) {
+ if (vLength == prevCell.getValueLength()) {
flag |= FLAG_SAME_VALUE_LENGTH;
}
- if (kv.getTypeByte() == prevKv.getTypeByte()) {
+ if (cell.getTypeByte() == prevCell.getTypeByte()) {
flag |= FLAG_SAME_TYPE;
}
// don't compress timestamp and type using prefix encode timestamp
- timestamp = kv.getTimestamp();
- diffTimestamp = prevKv.getTimestamp() - timestamp;
+ timestamp = cell.getTimestamp();
+ diffTimestamp = prevCell.getTimestamp() - timestamp;
boolean negativeTimestamp = timestamp < 0;
if (negativeTimestamp) {
timestamp = -timestamp;
@@ -276,13 +275,21 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
ByteBufferUtils.putCompressedInt(out, vLength);
}
ByteBufferUtils.putCompressedInt(out, commonPrefix);
- if (prevKv == null || commonPrefix < kv.getRowLength() + KeyValue.ROW_LENGTH_SIZE) {
- int restRowLength = kv.getRowLength() + KeyValue.ROW_LENGTH_SIZE - commonPrefix;
- out.write(curKvBuf, kv.getKeyOffset() + commonPrefix, restRowLength);
- out.write(curKvBuf, kv.getQualifierOffset(), kv.getQualifierLength());
+ short rLen = cell.getRowLength();
+ if (commonPrefix < rLen + KeyValue.ROW_LENGTH_SIZE) {
+ // Previous and current rows are different. Copy the differing part of
+ // the row, skip the column family, and copy the qualifier.
+ CellUtil.writeRowKeyExcludingCommon(cell, rLen, commonPrefix, out);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
} else {
- out.write(curKvBuf, kv.getKeyOffset() + commonPrefix, kLength - commonPrefix
- - KeyValue.TIMESTAMP_TYPE_SIZE);
+ // The common part includes the whole row. As the column family is the
+ // same across the whole file, it will automatically be included in the
+ // common prefix, so we need not special-case it here.
+ // What we write here is the non common part of the qualifier
+ int commonQualPrefix = commonPrefix - (rLen + KeyValue.ROW_LENGTH_SIZE)
+ - (cell.getFamilyLength() + KeyValue.FAMILY_LENGTH_SIZE);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset() + commonQualPrefix,
+ cell.getQualifierLength() - commonQualPrefix);
}
if ((flag & FLAG_TIMESTAMP_IS_DIFF) == 0) {
ByteBufferUtils.putLong(out, timestamp, timestampFitsInBytes);
@@ -291,9 +298,9 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
}
if ((flag & FLAG_SAME_TYPE) == 0) {
- out.write(kv.getTypeByte());
+ out.write(cell.getTypeByte());
}
- out.write(kv.getValueArray(), kv.getValueOffset(), vLength);
+ out.write(cell.getValueArray(), cell.getValueOffset(), vLength);
return kLength + vLength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodingState.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodingState.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodingState.java
index b16f099..58a1184 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodingState.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodingState.java
@@ -19,7 +19,7 @@
package org.apache.hadoop.hbase.io.encoding;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Cell;
/**
* Keeps track of the encoding state.
@@ -28,7 +28,7 @@ import org.apache.hadoop.hbase.KeyValue;
public class EncodingState {
/**
- * The previous KeyValue the encoder encoded.
+ * The previous Cell the encoder encoded.
*/
- protected KeyValue prevKv = null;
+ protected Cell prevCell = null;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
index c133308..0b4d10f 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
@@ -22,7 +22,10 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
@@ -101,11 +104,10 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
}
- private int findCommonTimestampPrefix(byte[] curKvBuf, int curKvTsOff, byte[] preKvBuf,
- int preKvTsOff) {
+ private int findCommonTimestampPrefix(byte[] curTsBuf, byte[] prevTsBuf) {
int commonPrefix = 0;
while (commonPrefix < (KeyValue.TIMESTAMP_SIZE - 1)
- && curKvBuf[curKvTsOff + commonPrefix] == preKvBuf[preKvTsOff + commonPrefix]) {
+ && curTsBuf[commonPrefix] == prevTsBuf[commonPrefix]) {
commonPrefix++;
}
return commonPrefix; // has to be at most 7 bytes
@@ -237,59 +239,57 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
}
@Override
- public int internalEncode(KeyValue kv, HFileBlockDefaultEncodingContext encodingContext,
+ public int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingContext,
DataOutputStream out) throws IOException {
EncodingState state = encodingContext.getEncodingState();
- int size = compressSingleKeyValue(out, kv, state.prevKv);
- size += afterEncodingKeyValue(kv, out, encodingContext);
- state.prevKv = kv;
+ int size = compressSingleKeyValue(out, cell, state.prevCell);
+ size += afterEncodingKeyValue(cell, out, encodingContext);
+ state.prevCell = cell;
return size;
}
- private int compressSingleKeyValue(DataOutputStream out, KeyValue kv, KeyValue prevKv)
+ private int compressSingleKeyValue(DataOutputStream out, Cell cell, Cell prevCell)
throws IOException {
byte flag = 0;
- int kLength = kv.getKeyLength();
- int vLength = kv.getValueLength();
- byte[] curKvBuf = kv.getBuffer();
+ int kLength = KeyValueUtil.keyLength(cell);
+ int vLength = cell.getValueLength();
- if (prevKv == null) {
+ if (prevCell == null) {
// copy the key, there is no common prefix with none
out.write(flag);
ByteBufferUtils.putCompressedInt(out, kLength);
ByteBufferUtils.putCompressedInt(out, vLength);
ByteBufferUtils.putCompressedInt(out, 0);
- out.write(curKvBuf, kv.getKeyOffset(), kLength + vLength);
+ CellUtil.writeFlatKey(cell, out);
+ // Write the value part
+ out.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
} else {
- byte[] preKvBuf = prevKv.getBuffer();
- int preKeyLength = prevKv.getKeyLength();
- int preValLength = prevKv.getValueLength();
+ int preKeyLength = KeyValueUtil.keyLength(prevCell);
+ int preValLength = prevCell.getValueLength();
// find a common prefix and skip it
- int commonPrefix = ByteBufferUtils.findCommonPrefix(curKvBuf, kv.getKeyOffset(), kLength
- - KeyValue.TIMESTAMP_TYPE_SIZE, preKvBuf, prevKv.getKeyOffset(), preKeyLength
- - KeyValue.TIMESTAMP_TYPE_SIZE);
+ int commonPrefix = CellUtil.findCommonPrefixInFlatKey(cell, prevCell, true, false);
- if (kLength == prevKv.getKeyLength()) {
+ if (kLength == preKeyLength) {
flag |= FLAG_SAME_KEY_LENGTH;
}
- if (vLength == prevKv.getValueLength()) {
+ if (vLength == prevCell.getValueLength()) {
flag |= FLAG_SAME_VALUE_LENGTH;
}
- if (kv.getTypeByte() == prevKv.getTypeByte()) {
+ if (cell.getTypeByte() == prevCell.getTypeByte()) {
flag |= FLAG_SAME_TYPE;
}
- int commonTimestampPrefix = findCommonTimestampPrefix(curKvBuf, kv.getKeyOffset() + kLength
- - KeyValue.TIMESTAMP_TYPE_SIZE, preKvBuf, prevKv.getKeyOffset() + preKeyLength
- - KeyValue.TIMESTAMP_TYPE_SIZE);
+ byte[] curTsBuf = Bytes.toBytes(cell.getTimestamp());
+ int commonTimestampPrefix = findCommonTimestampPrefix(curTsBuf,
+ Bytes.toBytes(prevCell.getTimestamp()));
flag |= commonTimestampPrefix << SHIFT_TIMESTAMP_LENGTH;
// Check if current and previous values are the same. Compare value
// length first as an optimization.
if (vLength == preValLength
- && Bytes.equals(kv.getValueArray(), kv.getValueOffset(), vLength,
- prevKv.getValueArray(), prevKv.getValueOffset(), preValLength)) {
+ && Bytes.equals(cell.getValueArray(), cell.getValueOffset(), vLength,
+ prevCell.getValueArray(), prevCell.getValueOffset(), preValLength)) {
flag |= FLAG_SAME_VALUE;
}
@@ -301,31 +301,33 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
ByteBufferUtils.putCompressedInt(out, vLength);
}
ByteBufferUtils.putCompressedInt(out, commonPrefix);
-
- if (commonPrefix < kv.getRowLength() + KeyValue.ROW_LENGTH_SIZE) {
+ short rLen = cell.getRowLength();
+ if (commonPrefix < rLen + KeyValue.ROW_LENGTH_SIZE) {
// Previous and current rows are different. Copy the differing part of
// the row, skip the column family, and copy the qualifier.
- out.write(curKvBuf, kv.getKeyOffset() + commonPrefix, kv.getRowLength()
- + KeyValue.ROW_LENGTH_SIZE - commonPrefix);
- out.write(curKvBuf, kv.getQualifierOffset(), kv.getQualifierLength());
+ CellUtil.writeRowKeyExcludingCommon(cell, rLen, commonPrefix, out);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
} else {
// The common part includes the whole row. As the column family is the
// same across the whole file, it will automatically be included in the
// common prefix, so we need not special-case it here.
- int restKeyLength = kLength - commonPrefix - KeyValue.TIMESTAMP_TYPE_SIZE;
- out.write(curKvBuf, kv.getKeyOffset() + commonPrefix, restKeyLength);
+ // What we write here is the non common part of the qualifier
+ int commonQualPrefix = commonPrefix - (rLen + KeyValue.ROW_LENGTH_SIZE)
+ - (cell.getFamilyLength() + KeyValue.FAMILY_LENGTH_SIZE);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset() + commonQualPrefix,
+ cell.getQualifierLength() - commonQualPrefix);
}
- out.write(curKvBuf, kv.getKeyOffset() + kLength - KeyValue.TIMESTAMP_TYPE_SIZE
- + commonTimestampPrefix, KeyValue.TIMESTAMP_SIZE - commonTimestampPrefix);
+ // Write non common ts part
+ out.write(curTsBuf, commonTimestampPrefix, KeyValue.TIMESTAMP_SIZE - commonTimestampPrefix);
// Write the type if it is not the same as before.
if ((flag & FLAG_SAME_TYPE) == 0) {
- out.write(kv.getTypeByte());
+ out.write(cell.getTypeByte());
}
// Write the value if it is not the same as before.
if ((flag & FLAG_SAME_VALUE) == 0) {
- out.write(kv.getValueArray(), kv.getValueOffset(), vLength);
+ out.write(cell.getValueArray(), cell.getValueOffset(), vLength);
}
}
return kLength + vLength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
index a6db8ee..3766a85 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
@@ -22,8 +22,11 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
@@ -45,34 +48,78 @@ import org.apache.hadoop.hbase.util.Bytes;
public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
@Override
- public int internalEncode(KeyValue kv, HFileBlockDefaultEncodingContext encodingContext,
+ public int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingContext,
DataOutputStream out) throws IOException {
- byte[] kvBuf = kv.getBuffer();
- int klength = kv.getKeyLength();
- int vlength = kv.getValueLength();
+ int klength = KeyValueUtil.keyLength(cell);
+ int vlength = cell.getValueLength();
EncodingState state = encodingContext.getEncodingState();
- if (state.prevKv == null) {
+ if (state.prevCell == null) {
// copy the key, there is no common prefix with none
ByteBufferUtils.putCompressedInt(out, klength);
ByteBufferUtils.putCompressedInt(out, vlength);
ByteBufferUtils.putCompressedInt(out, 0);
- out.write(kvBuf, kv.getKeyOffset(), klength + vlength);
+ CellUtil.writeFlatKey(cell, out);
} else {
// find a common prefix and skip it
- int common = ByteBufferUtils.findCommonPrefix(state.prevKv.getBuffer(),
- state.prevKv.getKeyOffset(), state.prevKv.getKeyLength(), kvBuf, kv.getKeyOffset(),
- kv.getKeyLength());
+ int common = CellUtil.findCommonPrefixInFlatKey(cell, state.prevCell, true, true);
ByteBufferUtils.putCompressedInt(out, klength - common);
ByteBufferUtils.putCompressedInt(out, vlength);
ByteBufferUtils.putCompressedInt(out, common);
- out.write(kvBuf, kv.getKeyOffset() + common, klength - common + vlength);
+ writeKeyExcludingCommon(cell, common, out);
}
+ // Write the value part
+ out.write(cell.getValueArray(), cell.getValueOffset(), vlength);
int size = klength + vlength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
- size += afterEncodingKeyValue(kv, out, encodingContext);
- state.prevKv = kv;
+ size += afterEncodingKeyValue(cell, out, encodingContext);
+ state.prevCell = cell;
return size;
}
+ private void writeKeyExcludingCommon(Cell cell, int commonPrefix, DataOutputStream out)
+ throws IOException {
+ short rLen = cell.getRowLength();
+ if (commonPrefix < rLen + KeyValue.ROW_LENGTH_SIZE) {
+ // Previous and current rows are different. Need to write the differing part followed by
+ // cf,q,ts and type
+ CellUtil.writeRowKeyExcludingCommon(cell, rLen, commonPrefix, out);
+ byte fLen = cell.getFamilyLength();
+ out.writeByte(fLen);
+ out.write(cell.getFamilyArray(), cell.getFamilyOffset(), fLen);
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
+ out.writeLong(cell.getTimestamp());
+ out.writeByte(cell.getTypeByte());
+ } else {
+ // The full row key part is common. CF part will be common for sure as we deal with Cells in
+ // same family. Just need write the differing part in q, ts and type
+ commonPrefix = commonPrefix - (rLen + KeyValue.ROW_LENGTH_SIZE)
+ - (cell.getFamilyLength() + KeyValue.FAMILY_LENGTH_SIZE);
+ int qLen = cell.getQualifierLength();
+ int commonQualPrefix = Math.min(commonPrefix, qLen);
+ int qualPartLenToWrite = qLen - commonQualPrefix;
+ if (qualPartLenToWrite > 0) {
+ out.write(cell.getQualifierArray(), cell.getQualifierOffset() + commonQualPrefix,
+ qualPartLenToWrite);
+ }
+ commonPrefix -= commonQualPrefix;
+ // Common part in TS also?
+ if (commonPrefix > 0) {
+ int commonTimestampPrefix = Math.min(commonPrefix, KeyValue.TIMESTAMP_SIZE);
+ if (commonTimestampPrefix < KeyValue.TIMESTAMP_SIZE) {
+ byte[] curTsBuf = Bytes.toBytes(cell.getTimestamp());
+ out.write(curTsBuf, commonTimestampPrefix, KeyValue.TIMESTAMP_SIZE
+ - commonTimestampPrefix);
+ }
+ commonPrefix -= commonTimestampPrefix;
+ if (commonPrefix == 0) {
+ out.writeByte(cell.getTypeByte());
+ }
+ } else {
+ out.writeLong(cell.getTimestamp());
+ out.writeByte(cell.getTypeByte());
+ }
+ }
+ }
+
@Override
protected ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java
index 133fa03..182c4db 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
+import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
@@ -325,4 +326,48 @@ public class TestCellUtil {
Assert.assertFalse(CellUtil.overlappingKeys(empty, b, b, c));
Assert.assertFalse(CellUtil.overlappingKeys(empty, a, b, c));
}
+
+ @Test
+ public void testFindCommonPrefixInFlatKey() {
+ // The whole key matching case
+ KeyValue kv1 = new KeyValue("r1".getBytes(), "f1".getBytes(), "q1".getBytes(), null);
+ Assert.assertEquals(kv1.getKeyLength(),
+ CellUtil.findCommonPrefixInFlatKey(kv1, kv1, true, true));
+ Assert.assertEquals(kv1.getKeyLength(),
+ CellUtil.findCommonPrefixInFlatKey(kv1, kv1, false, true));
+ Assert.assertEquals(kv1.getKeyLength() - KeyValue.TIMESTAMP_TYPE_SIZE,
+ CellUtil.findCommonPrefixInFlatKey(kv1, kv1, true, false));
+ // The rk length itself mismatch
+ KeyValue kv2 = new KeyValue("r12".getBytes(), "f1".getBytes(), "q1".getBytes(), null);
+ Assert.assertEquals(1, CellUtil.findCommonPrefixInFlatKey(kv1, kv2, true, true));
+ // part of rk is same
+ KeyValue kv3 = new KeyValue("r14".getBytes(), "f1".getBytes(), "q1".getBytes(), null);
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + "r1".getBytes().length,
+ CellUtil.findCommonPrefixInFlatKey(kv2, kv3, true, true));
+ // entire rk is same but different cf name
+ KeyValue kv4 = new KeyValue("r14".getBytes(), "f2".getBytes(), "q1".getBytes(), null);
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + kv3.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE
+ + "f".getBytes().length, CellUtil.findCommonPrefixInFlatKey(kv3, kv4, false, true));
+ // rk and family are same and part of qualifier
+ KeyValue kv5 = new KeyValue("r14".getBytes(), "f2".getBytes(), "q123".getBytes(), null);
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + kv3.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE
+ + kv4.getFamilyLength() + kv4.getQualifierLength(),
+ CellUtil.findCommonPrefixInFlatKey(kv4, kv5, true, true));
+ // rk, cf and q are same. ts differs
+ KeyValue kv6 = new KeyValue("rk".getBytes(), 1234L);
+ KeyValue kv7 = new KeyValue("rk".getBytes(), 1235L);
+ // only last byte out of 8 ts bytes in ts part differs
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + kv6.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE
+ + kv6.getFamilyLength() + kv6.getQualifierLength() + 7,
+ CellUtil.findCommonPrefixInFlatKey(kv6, kv7, true, true));
+ // rk, cf, q and ts are same. Only type differs
+ KeyValue kv8 = new KeyValue("rk".getBytes(), 1234L, Type.Delete);
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + kv6.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE
+ + kv6.getFamilyLength() + kv6.getQualifierLength() + KeyValue.TIMESTAMP_SIZE,
+ CellUtil.findCommonPrefixInFlatKey(kv6, kv8, true, true));
+ // With out TS_TYPE check
+ Assert.assertEquals(KeyValue.ROW_LENGTH_SIZE + kv6.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE
+ + kv6.getFamilyLength() + kv6.getQualifierLength(),
+ CellUtil.findCommonPrefixInFlatKey(kv6, kv8, true, false));
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
index 2a0c459..b24ecb8 100644
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
@@ -24,6 +24,8 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.KeyValue.MetaComparator;
@@ -161,14 +163,14 @@ public class PrefixTreeCodec implements DataBlockEncoder{
}
@Override
- public int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException {
PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState();
PrefixTreeEncoder builder = state.builder;
- builder.write(kv);
- int size = kv.getLength();
+ builder.write(cell);
+ int size = KeyValueUtil.length(cell);
if (encodingCtx.getHFileContext().isIncludesMvcc()) {
- size += WritableUtils.getVIntSize(kv.getMvccVersion());
+ size += WritableUtils.getVIntSize(cell.getSequenceId());
}
return size;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
index 25e53cd..0733f2e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
@@ -22,7 +22,6 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -31,9 +30,11 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
@@ -47,10 +48,9 @@ import org.apache.hadoop.io.Writable;
@InterfaceAudience.Private
public abstract class AbstractHFileWriter implements HFile.Writer {
- /** Key previously appended. Becomes the last key in the file. */
- protected byte[] lastKeyBuffer = null;
+ /** The Cell previously appended. Becomes the last cell in the file.*/
+ protected Cell lastCell = null;
- protected int lastKeyOffset = -1;
protected int lastKeyLength = -1;
/** FileSystem stream to write into. */
@@ -131,11 +131,12 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
* Add last bits of metadata to file info before it is written out.
*/
protected void finishFileInfo() throws IOException {
- if (lastKeyBuffer != null) {
+ if (lastCell != null) {
// Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
// byte buffer. Won't take a tuple.
- fileInfo.append(FileInfo.LASTKEY, Arrays.copyOfRange(lastKeyBuffer,
- lastKeyOffset, lastKeyOffset + lastKeyLength), false);
+ byte[] lastKey = new byte[lastKeyLength];
+ KeyValueUtil.appendKeyTo(lastCell, lastKey, 0);
+ fileInfo.append(FileInfo.LASTKEY, lastKey, false);
}
// Average key length.
@@ -181,30 +182,24 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
}
/**
- * Checks that the given key does not violate the key order.
+ * Checks that the given Cell's key does not violate the key order.
*
- * @param key Key to check.
+ * @param cell Cell whose key to check.
* @return true if the key is duplicate
* @throws IOException if the key or the key order is wrong
*/
- protected boolean checkKey(final byte[] key, final int offset,
- final int length) throws IOException {
+ protected boolean checkKey(final Cell cell) throws IOException {
boolean isDuplicateKey = false;
- if (key == null || length <= 0) {
+ if (cell == null) {
throw new IOException("Key cannot be null or empty");
}
- if (lastKeyBuffer != null) {
- int keyComp = comparator.compareFlatKey(lastKeyBuffer, lastKeyOffset,
- lastKeyLength, key, offset, length);
+ if (lastCell != null) {
+ int keyComp = comparator.compareOnlyKeyPortion(lastCell, cell);
if (keyComp > 0) {
throw new IOException("Added a key not lexically larger than"
- + " previous key="
- + Bytes.toStringBinary(key, offset, length)
- + ", lastkey="
- + Bytes.toStringBinary(lastKeyBuffer, lastKeyOffset,
- lastKeyLength));
+ + " previous. Current cell = " + cell + ", lastCell = " + lastCell);
} else if (keyComp == 0) {
isDuplicateKey = true;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index 5c57599..c6733b9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -35,12 +35,8 @@ import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -51,6 +47,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
@@ -63,13 +60,13 @@ import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
+import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.Writable;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
/**
* File format for hbase.
@@ -203,7 +200,7 @@ public class HFile {
/** Add an element to the file info map. */
void appendFileInfo(byte[] key, byte[] value) throws IOException;
- void append(KeyValue kv) throws IOException;
+ void append(Cell cell) throws IOException;
void append(byte[] key, byte[] value) throws IOException;
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 75a87a9..a29103e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.fs.HFileSystem;
@@ -818,13 +819,13 @@ public class HFileBlock implements Cacheable {
}
/**
- * Writes the kv to this block
- * @param kv
+ * Writes the Cell to this block
+ * @param cell
* @throws IOException
*/
- public void write(KeyValue kv) throws IOException{
+ public void write(Cell cell) throws IOException{
expectState(State.WRITING);
- this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(kv, dataBlockEncodingCtx,
+ this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx,
this.userDataStream);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java
index 7049e4c..2d25449 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java
@@ -20,7 +20,7 @@ import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
@@ -49,13 +49,13 @@ public interface HFileDataBlockEncoder {
/**
* Encodes a KeyValue.
- * @param kv
+ * @param cell
* @param encodingCtx
* @param out
* @return unencoded kv size
* @throws IOException
*/
- int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException;
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java
index edf4cc6..afc8b71 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java
@@ -20,6 +20,7 @@ import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -92,9 +93,9 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
}
@Override
- public int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException {
- return this.encoding.getEncoder().encode(kv, encodingCtx, out);
+ return this.encoding.getEncoder().encode(cell, encodingCtx, out);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
index 1d71f3e..ee0e303 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
@@ -32,8 +32,10 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
@@ -207,7 +209,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
firstKeyInBlock = null;
if (lastKeyLength > 0) {
lastKeyOfPreviousBlock = new byte[lastKeyLength];
- System.arraycopy(lastKeyBuffer, lastKeyOffset, lastKeyOfPreviousBlock, 0, lastKeyLength);
+ KeyValueUtil.appendKeyTo(lastCell, lastKeyOfPreviousBlock, 0);
}
}
@@ -242,19 +244,17 @@ public class HFileWriterV2 extends AbstractHFileWriter {
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
*
- * @param kv
- * KeyValue to add. Cannot be empty nor null.
+ * @param cell
+ * Cell to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
- public void append(final KeyValue kv) throws IOException {
- byte[] key = kv.getBuffer();
- int koffset = kv.getKeyOffset();
- int klength = kv.getKeyLength();
- byte[] value = kv.getValueArray();
- int voffset = kv.getValueOffset();
- int vlength = kv.getValueLength();
- boolean dupKey = checkKey(key, koffset, klength);
+ public void append(final Cell cell) throws IOException {
+ int klength = KeyValueUtil.keyLength(cell);
+ byte[] value = cell.getValueArray();
+ int voffset = cell.getValueOffset();
+ int vlength = cell.getValueLength();
+ boolean dupKey = checkKey(cell);
checkValue(value, voffset, vlength);
if (!dupKey) {
checkBlockBoundary();
@@ -263,7 +263,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
if (!fsBlockWriter.isWriting())
newBlock();
- fsBlockWriter.write(kv);
+ fsBlockWriter.write(cell);
totalKeyLength += klength;
totalValueLength += vlength;
@@ -272,14 +272,13 @@ public class HFileWriterV2 extends AbstractHFileWriter {
if (firstKeyInBlock == null) {
// Copy the key.
firstKeyInBlock = new byte[klength];
- System.arraycopy(key, koffset, firstKeyInBlock, 0, klength);
+ KeyValueUtil.appendKeyTo(cell, firstKeyInBlock, 0);
}
- lastKeyBuffer = key;
- lastKeyOffset = koffset;
+ lastCell = cell;
lastKeyLength = klength;
entryCount++;
- this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion());
+ this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId());
}
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java
index b7885f3..a0a2372 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
@@ -77,15 +78,15 @@ public class HFileWriterV3 extends HFileWriterV2 {
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
*
- * @param kv
- * KeyValue to add. Cannot be empty nor null.
+ * @param cell
+ * Cell to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
- public void append(final KeyValue kv) throws IOException {
+ public void append(final Cell cell) throws IOException {
// Currently get the complete arrays
- super.append(kv);
- int tagsLength = kv.getTagsLength();
+ super.append(cell);
+ int tagsLength = cell.getTagsLength();
if (tagsLength > this.maxTagsLength) {
this.maxTagsLength = tagsLength;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java
index b544798..5a17ef2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java
@@ -20,7 +20,10 @@ import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
@@ -42,28 +45,28 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
}
@Override
- public int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
+ public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException {
- int klength = kv.getKeyLength();
- int vlength = kv.getValueLength();
+ int klength = KeyValueUtil.keyLength(cell);
+ int vlength = cell.getValueLength();
out.writeInt(klength);
out.writeInt(vlength);
- out.write(kv.getBuffer(), kv.getKeyOffset(), klength);
- out.write(kv.getValueArray(), kv.getValueOffset(), vlength);
+ CellUtil.writeFlatKey(cell, out);
+ out.write(cell.getValueArray(), cell.getValueOffset(), vlength);
int encodedKvSize = klength + vlength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
// Write the additional tag into the stream
if (encodingCtx.getHFileContext().isIncludesTags()) {
- int tagsLength = kv.getTagsLength();
+ int tagsLength = cell.getTagsLength();
out.writeShort(tagsLength);
if (tagsLength > 0) {
- out.write(kv.getTagsArray(), kv.getTagsOffset(), tagsLength);
+ out.write(cell.getTagsArray(), cell.getTagsOffset(), tagsLength);
}
encodedKvSize += tagsLength + KeyValue.TAGS_LENGTH_SIZE;
}
if (encodingCtx.getHFileContext().isIncludesMvcc()) {
- WritableUtils.writeVLong(out, kv.getMvccVersion());
- encodedKvSize += WritableUtils.getVIntSize(kv.getMvccVersion());
+ WritableUtils.writeVLong(out, cell.getSequenceId());
+ encodedKvSize += WritableUtils.getVIntSize(cell.getSequenceId());
}
return encodedKvSize;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
index 26e007a..769d1d2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
@@ -698,9 +699,9 @@ public class StoreFile {
private byte[] lastBloomKey;
private int lastBloomKeyOffset, lastBloomKeyLen;
private KVComparator kvComparator;
- private KeyValue lastKv = null;
+ private Cell lastCell = null;
private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
- private KeyValue lastDeleteFamilyKV = null;
+ private Cell lastDeleteFamilyCell = null;
private long deleteFamilyCnt = 0;
/** Bytes per Checksum */
@@ -810,28 +811,28 @@ public class StoreFile {
*
* If the timeRangeTracker is not set,
* update TimeRangeTracker to include the timestamp of this key
- * @param kv
+ * @param cell
*/
- public void trackTimestamps(final KeyValue kv) {
- if (KeyValue.Type.Put.getCode() == kv.getTypeByte()) {
- earliestPutTs = Math.min(earliestPutTs, kv.getTimestamp());
+ public void trackTimestamps(final Cell cell) {
+ if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
+ earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
}
if (!isTimeRangeTrackerSet) {
- timeRangeTracker.includeTimestamp(kv);
+ timeRangeTracker.includeTimestamp(cell);
}
}
- private void appendGeneralBloomfilter(final KeyValue kv) throws IOException {
+ private void appendGeneralBloomfilter(final Cell cell) throws IOException {
if (this.generalBloomFilterWriter != null) {
// only add to the bloom filter on a new, unique key
boolean newKey = true;
- if (this.lastKv != null) {
+ if (this.lastCell != null) {
switch(bloomType) {
case ROW:
- newKey = ! kvComparator.matchingRows(kv, lastKv);
+ newKey = ! kvComparator.matchingRows(cell, lastCell);
break;
case ROWCOL:
- newKey = ! kvComparator.matchingRowColumn(kv, lastKv);
+ newKey = ! kvComparator.matchingRowColumn(cell, lastCell);
break;
case NONE:
newKey = false;
@@ -855,17 +856,17 @@ public class StoreFile {
switch (bloomType) {
case ROW:
- bloomKey = kv.getRowArray();
- bloomKeyOffset = kv.getRowOffset();
- bloomKeyLen = kv.getRowLength();
+ bloomKey = cell.getRowArray();
+ bloomKeyOffset = cell.getRowOffset();
+ bloomKeyLen = cell.getRowLength();
break;
case ROWCOL:
// merge(row, qualifier)
// TODO: could save one buffer copy in case of compound Bloom
// filters when this involves creating a KeyValue
- bloomKey = generalBloomFilterWriter.createBloomKey(kv.getRowArray(),
- kv.getRowOffset(), kv.getRowLength(), kv.getQualifierArray(),
- kv.getQualifierOffset(), kv.getQualifierLength());
+ bloomKey = generalBloomFilterWriter.createBloomKey(cell.getRowArray(),
+ cell.getRowOffset(), cell.getRowLength(), cell.getQualifierArray(),
+ cell.getQualifierOffset(), cell.getQualifierLength());
bloomKeyOffset = 0;
bloomKeyLen = bloomKey.length;
break;
@@ -887,14 +888,14 @@ public class StoreFile {
lastBloomKey = bloomKey;
lastBloomKeyOffset = bloomKeyOffset;
lastBloomKeyLen = bloomKeyLen;
- this.lastKv = kv;
+ this.lastCell = cell;
}
}
}
- private void appendDeleteFamilyBloomFilter(final KeyValue kv)
+ private void appendDeleteFamilyBloomFilter(final Cell cell)
throws IOException {
- if (!CellUtil.isDeleteFamily(kv) && !CellUtil.isDeleteFamilyVersion(kv)) {
+ if (!CellUtil.isDeleteFamily(cell) && !CellUtil.isDeleteFamilyVersion(cell)) {
return;
}
@@ -902,22 +903,22 @@ public class StoreFile {
deleteFamilyCnt++;
if (null != this.deleteFamilyBloomFilterWriter) {
boolean newKey = true;
- if (lastDeleteFamilyKV != null) {
- newKey = !kvComparator.matchingRows(kv, lastDeleteFamilyKV);
+ if (lastDeleteFamilyCell != null) {
+ newKey = !kvComparator.matchingRows(cell, lastDeleteFamilyCell);
}
if (newKey) {
- this.deleteFamilyBloomFilterWriter.add(kv.getRowArray(),
- kv.getRowOffset(), kv.getRowLength());
- this.lastDeleteFamilyKV = kv;
+ this.deleteFamilyBloomFilterWriter.add(cell.getRowArray(),
+ cell.getRowOffset(), cell.getRowLength());
+ this.lastDeleteFamilyCell = cell;
}
}
}
- public void append(final KeyValue kv) throws IOException {
- appendGeneralBloomfilter(kv);
- appendDeleteFamilyBloomFilter(kv);
- writer.append(kv);
- trackTimestamps(kv);
+ public void append(final Cell cell) throws IOException {
+ appendGeneralBloomfilter(cell);
+ appendDeleteFamilyBloomFilter(cell);
+ writer.append(cell);
+ trackTimestamps(cell);
}
public Path getPath() {
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeMultiFileWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeMultiFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeMultiFileWriter.java
index c82497e..39e669e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeMultiFileWriter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeMultiFileWriter.java
@@ -26,7 +26,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
import org.apache.hadoop.hbase.util.Bytes;
@@ -159,8 +159,8 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
private StoreFile.Writer currentWriter;
private byte[] currentWriterEndKey;
- private KeyValue lastKv;
- private long kvsInCurrentWriter = 0;
+ private Cell lastCell;
+ private long cellsInCurrentWriter = 0;
private int majorRangeFromIndex = -1, majorRangeToIndex = -1;
private boolean hasAnyWriter = false;
@@ -193,21 +193,21 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
}
@Override
- public void append(KeyValue kv) throws IOException {
+ public void append(Cell cell) throws IOException {
if (currentWriter == null && existingWriters.isEmpty()) {
// First append ever, do a sanity check.
sanityCheckLeft(this.boundaries.get(0),
- kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
+ cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
}
- prepareWriterFor(kv);
- currentWriter.append(kv);
- lastKv = kv; // for the sanity check
- ++kvsInCurrentWriter;
+ prepareWriterFor(cell);
+ currentWriter.append(cell);
+ lastCell = cell; // for the sanity check
+ ++cellsInCurrentWriter;
}
- private boolean isKvAfterCurrentWriter(KeyValue kv) {
+ private boolean isCellAfterCurrentWriter(Cell cell) {
return ((currentWriterEndKey != StripeStoreFileManager.OPEN_KEY) &&
- (comparator.compareRows(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
+ (comparator.compareRows(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
currentWriterEndKey, 0, currentWriterEndKey.length) >= 0));
}
@@ -217,18 +217,18 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
while (existingWriters.size() < boundaries.size() - 1) {
createEmptyWriter();
}
- if (lastKv != null) {
+ if (lastCell != null) {
sanityCheckRight(boundaries.get(boundaries.size() - 1),
- lastKv.getRowArray(), lastKv.getRowOffset(), lastKv.getRowLength());
+ lastCell.getRowArray(), lastCell.getRowOffset(), lastCell.getRowLength());
}
}
- private void prepareWriterFor(KeyValue kv) throws IOException {
- if (currentWriter != null && !isKvAfterCurrentWriter(kv)) return; // Use same writer.
+ private void prepareWriterFor(Cell cell) throws IOException {
+ if (currentWriter != null && !isCellAfterCurrentWriter(cell)) return; // Use same writer.
stopUsingCurrentWriter();
// See if KV will be past the writer we are about to create; need to add another one.
- while (isKvAfterCurrentWriter(kv)) {
+ while (isCellAfterCurrentWriter(cell)) {
checkCanCreateWriter();
createEmptyWriter();
}
@@ -273,9 +273,9 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
if (currentWriter != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Stopping to use a writer after [" + Bytes.toString(currentWriterEndKey)
- + "] row; wrote out " + kvsInCurrentWriter + " kvs");
+ + "] row; wrote out " + cellsInCurrentWriter + " kvs");
}
- kvsInCurrentWriter = 0;
+ cellsInCurrentWriter = 0;
}
currentWriter = null;
currentWriterEndKey = (existingWriters.size() + 1 == boundaries.size())
@@ -291,16 +291,16 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
*/
public static class SizeMultiWriter extends StripeMultiFileWriter {
private int targetCount;
- private long targetKvs;
+ private long targetCells;
private byte[] left;
private byte[] right;
- private KeyValue lastKv;
+ private Cell lastCell;
private StoreFile.Writer currentWriter;
protected byte[] lastRowInCurrentWriter = null;
- private long kvsInCurrentWriter = 0;
- private long kvsSeen = 0;
- private long kvsSeenInPrevious = 0;
+ private long cellsInCurrentWriter = 0;
+ private long cellsSeen = 0;
+ private long cellsSeenInPrevious = 0;
/**
* @param targetCount The maximum count of writers that can be created.
@@ -311,7 +311,7 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
public SizeMultiWriter(int targetCount, long targetKvs, byte[] left, byte[] right) {
super();
this.targetCount = targetCount;
- this.targetKvs = targetKvs;
+ this.targetCells = targetKvs;
this.left = left;
this.right = right;
int preallocate = Math.min(this.targetCount, 64);
@@ -320,28 +320,28 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
}
@Override
- public void append(KeyValue kv) throws IOException {
+ public void append(Cell cell) throws IOException {
// If we are waiting for opportunity to close and we started writing different row,
// discard the writer and stop waiting.
boolean doCreateWriter = false;
if (currentWriter == null) {
// First append ever, do a sanity check.
- sanityCheckLeft(left, kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
+ sanityCheckLeft(left, cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
doCreateWriter = true;
} else if (lastRowInCurrentWriter != null
- && !comparator.matchingRows(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
+ && !comparator.matchingRows(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
lastRowInCurrentWriter, 0, lastRowInCurrentWriter.length)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Stopping to use a writer after [" + Bytes.toString(lastRowInCurrentWriter)
- + "] row; wrote out " + kvsInCurrentWriter + " kvs");
+ + "] row; wrote out " + cellsInCurrentWriter + " kvs");
}
lastRowInCurrentWriter = null;
- kvsInCurrentWriter = 0;
- kvsSeenInPrevious += kvsSeen;
+ cellsInCurrentWriter = 0;
+ cellsSeenInPrevious += cellsSeen;
doCreateWriter = true;
}
if (doCreateWriter) {
- byte[] boundary = existingWriters.isEmpty() ? left : kv.getRow(); // make a copy
+ byte[] boundary = existingWriters.isEmpty() ? left : cell.getRow(); // make a copy
if (LOG.isDebugEnabled()) {
LOG.debug("Creating new writer starting at [" + Bytes.toString(boundary) + "]");
}
@@ -350,25 +350,25 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
existingWriters.add(currentWriter);
}
- currentWriter.append(kv);
- lastKv = kv; // for the sanity check
- ++kvsInCurrentWriter;
- kvsSeen = kvsInCurrentWriter;
+ currentWriter.append(cell);
+ lastCell = cell; // for the sanity check
+ ++cellsInCurrentWriter;
+ cellsSeen = cellsInCurrentWriter;
if (this.sourceScanner != null) {
- kvsSeen = Math.max(kvsSeen,
- this.sourceScanner.getEstimatedNumberOfKvsScanned() - kvsSeenInPrevious);
+ cellsSeen = Math.max(cellsSeen,
+ this.sourceScanner.getEstimatedNumberOfKvsScanned() - cellsSeenInPrevious);
}
// If we are not already waiting for opportunity to close, start waiting if we can
// create any more writers and if the current one is too big.
if (lastRowInCurrentWriter == null
&& existingWriters.size() < targetCount
- && kvsSeen >= targetKvs) {
- lastRowInCurrentWriter = kv.getRow(); // make a copy
+ && cellsSeen >= targetCells) {
+ lastRowInCurrentWriter = cell.getRow(); // make a copy
if (LOG.isDebugEnabled()) {
LOG.debug("Preparing to start a new writer after [" + Bytes.toString(
- lastRowInCurrentWriter) + "] row; observed " + kvsSeen + " kvs and wrote out "
- + kvsInCurrentWriter + " kvs");
+ lastRowInCurrentWriter) + "] row; observed " + cellsSeen + " kvs and wrote out "
+ + cellsInCurrentWriter + " kvs");
}
}
}
@@ -376,13 +376,13 @@ public abstract class StripeMultiFileWriter implements Compactor.CellSink {
@Override
protected void commitWritersInternal() throws IOException {
if (LOG.isDebugEnabled()) {
- LOG.debug("Stopping with " + kvsInCurrentWriter + " kvs in last writer" +
+ LOG.debug("Stopping with " + cellsInCurrentWriter + " kvs in last writer" +
((this.sourceScanner == null) ? "" : ("; observed estimated "
+ this.sourceScanner.getEstimatedNumberOfKvsScanned() + " KVs total")));
}
- if (lastKv != null) {
+ if (lastCell != null) {
sanityCheckRight(
- right, lastKv.getRowArray(), lastKv.getRowOffset(), lastKv.getRowLength());
+ right, lastCell.getRowArray(), lastCell.getRowOffset(), lastCell.getRowLength());
}
// When expired stripes were going to be merged into one, and if no writer was created during
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
index 1546c55..8f5585c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
@@ -23,6 +23,7 @@ import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
@@ -66,14 +67,14 @@ public class TimeRangeTracker implements Writable {
}
/**
- * Update the current TimestampRange to include the timestamp from KeyValue
+ * Update the current TimestampRange to include the timestamp from Cell
* If the Key is of type DeleteColumn or DeleteFamily, it includes the
* entire time range from 0 to timestamp of the key.
- * @param kv the KeyValue to include
+ * @param cell the Cell to include
*/
- public void includeTimestamp(final KeyValue kv) {
- includeTimestamp(kv.getTimestamp());
- if (CellUtil.isDeleteColumnOrFamily(kv)) {
+ public void includeTimestamp(final Cell cell) {
+ includeTimestamp(cell.getTimestamp());
+ if (CellUtil.isDeleteColumnOrFamily(cell)) {
includeTimestamp(0);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java
index 7eab275..6e06e5b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java
@@ -75,11 +75,8 @@ public abstract class Compactor {
HConstants.MIN_KEEP_SEQID_PERIOD), HConstants.MIN_KEEP_SEQID_PERIOD);
}
- /**
- * TODO: Replace this with CellOutputStream when StoreFile.Writer uses cells.
- */
public interface CellSink {
- void append(KeyValue kv) throws IOException;
+ void append(Cell cell) throws IOException;
}
public CompactionProgress getProgress() {
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java
index f6709bd..e1c09bf 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java
@@ -155,7 +155,7 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
* as defined by the comparator this compound Bloom filter is configured
* with. For efficiency, key monotonicity is not checked here. See
* {@link org.apache.hadoop.hbase.regionserver.StoreFile.Writer#append(
- * org.apache.hadoop.hbase.KeyValue)} for the details of deduplication.
+ * org.apache.hadoop.hbase.Cell)} for the details of deduplication.
*/
@Override
public void add(byte[] bloomKey, int keyOffset, int keyLength) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/KeySampler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/KeySampler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/KeySampler.java
index 2489029..a4c1a9b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/KeySampler.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/KeySampler.java
@@ -18,8 +18,8 @@ package org.apache.hadoop.hbase.io.hfile;
import java.util.Random;
-import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.hbase.io.hfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.io.BytesWritable;
/*
* <p>
@@ -37,8 +37,12 @@ class KeySampler {
public KeySampler(Random random, byte [] first, byte [] last,
DiscreteRNG keyLenRNG) {
this.random = random;
- min = keyPrefixToInt(first);
- max = keyPrefixToInt(last);
+ int firstLen = keyPrefixToInt(first);
+ int lastLen = keyPrefixToInt(last);
+ min = Math.min(firstLen, lastLen);
+ max = Math.max(firstLen, lastLen);
+ System.out.println(min);
+ System.out.println(max);
this.keyLenRNG = keyLenRNG;
}
@@ -52,7 +56,11 @@ class KeySampler {
public void next(BytesWritable key) {
key.setSize(Math.max(MIN_KEY_LEN, keyLenRNG.nextInt()));
random.nextBytes(key.get());
- int n = random.nextInt(max - min) + min;
+ int rnd = 0;
+ if (max != min) {
+ rnd = random.nextInt(max - min);
+ }
+ int n = rnd + min;
byte[] b = key.get();
b[0] = (byte) (n >> 24);
b[1] = (byte) (n >> 16);
http://git-wip-us.apache.org/repos/asf/hbase/blob/68131674/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index 4b14d14..4523b2a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -370,7 +370,8 @@ public class TestHFile extends HBaseTestCase {
.withOutputStream(fout)
.withFileContext(meta)
.create();
- writer.append("foo".getBytes(), "value".getBytes());
+ KeyValue kv = new KeyValue("foo".getBytes(), "f1".getBytes(), null, "value".getBytes());
+ writer.append(kv);
writer.close();
fout.close();
Reader reader = HFile.createReader(fs, mFile, cacheConf, conf);