You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ra...@apache.org on 2015/06/09 08:33:47 UTC
[2/2] hbase git commit: HBASE-13451 - Make the HFileBlockIndex
blockKeys to Cells so that it could be easy to use in the CellComparators
(Ram)
HBASE-13451 - Make the HFileBlockIndex blockKeys to Cells so that it could
be easy to use in the CellComparators (Ram)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/487e4aa7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/487e4aa7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/487e4aa7
Branch: refs/heads/master
Commit: 487e4aa74fcc6ef4201f6ffdcfd1a7169c754562
Parents: c62b396
Author: ramkrishna <ra...@gmail.com>
Authored: Tue Jun 9 12:03:01 2015 +0530
Committer: ramkrishna <ra...@gmail.com>
Committed: Tue Jun 9 12:03:01 2015 +0530
----------------------------------------------------------------------
.../java/org/apache/hadoop/hbase/CellUtil.java | 6 +-
.../org/apache/hadoop/hbase/util/Bytes.java | 46 +-
.../hadoop/hbase/io/HalfStoreFileReader.java | 10 +-
.../hbase/io/hfile/CompoundBloomFilter.java | 201 +++++++
.../hbase/io/hfile/CompoundBloomFilterBase.java | 71 +++
.../io/hfile/CompoundBloomFilterWriter.java | 277 ++++++++++
.../org/apache/hadoop/hbase/io/hfile/HFile.java | 4 +-
.../hadoop/hbase/io/hfile/HFileBlockIndex.java | 523 ++++++++++++-------
.../hbase/io/hfile/HFilePrettyPrinter.java | 2 +-
.../hadoop/hbase/io/hfile/HFileReaderImpl.java | 26 +-
.../hbase/regionserver/HRegionFileSystem.java | 5 +-
.../hadoop/hbase/regionserver/HStore.java | 9 +-
.../hadoop/hbase/regionserver/StoreFile.java | 18 +-
.../hadoop/hbase/util/BloomFilterChunk.java | 2 +-
.../hadoop/hbase/util/BloomFilterFactory.java | 3 +
.../hadoop/hbase/util/CompoundBloomFilter.java | 197 -------
.../hbase/util/CompoundBloomFilterBase.java | 70 ---
.../hbase/util/CompoundBloomFilterWriter.java | 276 ----------
.../org/apache/hadoop/hbase/util/HBaseFsck.java | 8 +-
.../hbase/io/TestHalfStoreFileReader.java | 12 +-
.../hbase/io/hfile/TestHFileBlockIndex.java | 13 +-
.../hadoop/hbase/io/hfile/TestHFileSeek.java | 2 +-
.../hbase/io/hfile/TestHFileWriterV2.java | 8 +-
.../hbase/io/hfile/TestHFileWriterV3.java | 8 +-
.../regionserver/TestCompoundBloomFilter.java | 5 +-
.../hbase/regionserver/TestStoreFile.java | 10 +-
26 files changed, 969 insertions(+), 843 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
index 7ddcfe6..f276449 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
@@ -770,8 +770,10 @@ public final class CellUtil {
sb.append(KeyValue.humanReadableTimestamp(cell.getTimestamp()));
sb.append('/');
sb.append(Type.codeToType(cell.getTypeByte()));
- sb.append("/vlen=");
- sb.append(cell.getValueLength());
+ if (!(cell instanceof KeyValue.KeyOnlyKeyValue)) {
+ sb.append("/vlen=");
+ sb.append(cell.getValueLength());
+ }
sb.append("/seqid=");
sb.append(cell.getSequenceId());
return sb.toString();
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index 7d678fd..9ac6912 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -2075,24 +2075,6 @@ public class Bytes implements Comparable<Bytes> {
}
/**
- * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR
- *
- * @param arr array of byte arrays to search for
- * @param key the key you want to find
- * @param offset the offset in the key you want to find
- * @param length the length of the key
- * @return zero-based index of the key, if the key is present in the array.
- * Otherwise, a value -(i + 1) such that the key is between arr[i -
- * 1] and arr[i] non-inclusively, where i is in [0, i], if we define
- * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
- * means that this function can return 2N + 1 different values
- * ranging from -(N + 1) to N - 1.
- */
- public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
- return binarySearch(arr, key, offset, length, (CellComparator) null);
- }
-
- /**
* Binary search for keys in indexes.
*
* @param arr array of byte arrays to search for
@@ -2111,17 +2093,16 @@ public class Bytes implements Comparable<Bytes> {
@Deprecated
public static int binarySearch(byte [][]arr, byte []key, int offset,
int length, RawComparator<?> comparator) {
- return binarySearch(arr, key, offset, length, (CellComparator)null);
+ return binarySearch(arr, key, offset, length);
}
/**
- * Binary search for keys in indexes.
+ * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR.
*
* @param arr array of byte arrays to search for
* @param key the key you want to find
* @param offset the offset in the key you want to find
* @param length the length of the key
- * @param comparator a comparator to compare.
* @return zero-based index of the key, if the key is present in the array.
* Otherwise, a value -(i + 1) such that the key is between arr[i -
* 1] and arr[i] non-inclusively, where i is in [0, i], if we define
@@ -2129,23 +2110,18 @@ public class Bytes implements Comparable<Bytes> {
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
*/
- public static int binarySearch(byte [][]arr, byte []key, int offset,
- int length, CellComparator comparator) {
+ public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
int low = 0;
int high = arr.length - 1;
KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
r.setKey(key, offset, length);
while (low <= high) {
- int mid = (low+high) >>> 1;
+ int mid = (low + high) >>> 1;
// we have to compare in this order, because the comparator order
// has special logic when the 'left side' is a special key.
- int cmp = 0;
- if (comparator != null) {
- cmp = comparator.compare(r, arr[mid], 0, arr[mid].length);
- } else {
- cmp = Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length);
- }
+ int cmp = Bytes.BYTES_RAWCOMPARATOR
+ .compare(key, offset, length, arr[mid], 0, arr[mid].length);
// key lives above the midpoint
if (cmp > 0)
low = mid + 1;
@@ -2156,7 +2132,7 @@ public class Bytes implements Comparable<Bytes> {
else
return mid;
}
- return - (low+1);
+ return -(low + 1);
}
/**
@@ -2172,7 +2148,7 @@ public class Bytes implements Comparable<Bytes> {
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
* @return the index of the block
- * @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)}
+ * @deprecated Use {@link Bytes#binarySearch(Cell[], Cell, CellComparator)}
*/
@Deprecated
public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
@@ -2212,16 +2188,14 @@ public class Bytes implements Comparable<Bytes> {
* ranging from -(N + 1) to N - 1.
* @return the index of the block
*/
- public static int binarySearch(byte[][] arr, Cell key, Comparator<Cell> comparator) {
+ public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) {
int low = 0;
int high = arr.length - 1;
- KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
while (low <= high) {
int mid = (low+high) >>> 1;
// we have to compare in this order, because the comparator order
// has special logic when the 'left side' is a special key.
- r.setKey(arr[mid], 0, arr[mid].length);
- int cmp = comparator.compare(key, r);
+ int cmp = comparator.compare(key, arr[mid]);
// key lives above the midpoint
if (cmp > 0)
low = mid + 1;
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
index 78c6734..7a4a333 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
@@ -59,7 +59,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {
protected final Cell splitCell;
- private byte[] firstKey = null;
+ private Cell firstKey = null;
private boolean firstKeySeeked = false;
@@ -262,7 +262,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {
@Override
public boolean seekBefore(Cell key) throws IOException {
if (top) {
- Cell fk = new KeyValue.KeyOnlyKeyValue(getFirstKey(), 0, getFirstKey().length);
+ Cell fk = getFirstKey();
if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) {
return false;
}
@@ -319,18 +319,18 @@ public class HalfStoreFileReader extends StoreFile.Reader {
}
@Override
- public byte[] midkey() throws IOException {
+ public Cell midkey() throws IOException {
// Returns null to indicate file is not splitable.
return null;
}
@Override
- public byte[] getFirstKey() {
+ public Cell getFirstKey() {
if (!firstKeySeeked) {
HFileScanner scanner = getScanner(true, true, false);
try {
if (scanner.seekTo()) {
- this.firstKey = Bytes.toBytes(scanner.getKey());
+ this.firstKey = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(scanner.getKey()));
}
firstKeySeeked = true;
} catch (IOException e) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
new file mode 100644
index 0000000..11436ce
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
@@ -0,0 +1,201 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilter;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * A Bloom filter implementation built on top of
+ * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
+ * a set of fixed-size Bloom filters written out at the time of
+ * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
+ * block stream, and loaded on demand at query time. This class only provides
+ * reading capabilities.
+ */
+@InterfaceAudience.Private
+public class CompoundBloomFilter extends CompoundBloomFilterBase
+ implements BloomFilter {
+
+ /** Used to load chunks on demand */
+ private HFile.Reader reader;
+
+ private HFileBlockIndex.BlockIndexReader index;
+
+ private int hashCount;
+ private Hash hash;
+
+ private long[] numQueriesPerChunk;
+ private long[] numPositivesPerChunk;
+
+ /**
+ * De-serialization for compound Bloom filter metadata. Must be consistent
+ * with what {@link CompoundBloomFilterWriter} does.
+ *
+ * @param meta serialized Bloom filter metadata without any magic blocks
+ * @throws IOException
+ */
+ public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
+ throws IOException {
+ this.reader = reader;
+
+ totalByteSize = meta.readLong();
+ hashCount = meta.readInt();
+ hashType = meta.readInt();
+ totalKeyCount = meta.readLong();
+ totalMaxKeys = meta.readLong();
+ numChunks = meta.readInt();
+ byte[] comparatorClassName = Bytes.readByteArray(meta);
+ // The writer would have return 0 as the vint length for the case of
+ // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be
+ // null
+ if (comparatorClassName.length != 0) {
+ comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
+ }
+
+ hash = Hash.getInstance(hashType);
+ if (hash == null) {
+ throw new IllegalArgumentException("Invalid hash type: " + hashType);
+ }
+ // We will pass null for ROW block
+ if(comparator == null) {
+ index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
+ } else {
+ index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
+ }
+ index.readRootIndex(meta, numChunks);
+ }
+
+ @Override
+ public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuffer bloom) {
+ // We try to store the result in this variable so we can update stats for
+ // testing, but when an error happens, we log a message and return.
+
+ int block = index.rootBlockContainingKey(key, keyOffset,
+ keyLength);
+ return checkContains(key, keyOffset, keyLength, block);
+ }
+
+ private boolean checkContains(byte[] key, int keyOffset, int keyLength, int block) {
+ boolean result;
+ if (block < 0) {
+ result = false; // This key is not in the file.
+ } else {
+ HFileBlock bloomBlock;
+ try {
+ // We cache the block and use a positional read.
+ bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
+ index.getRootBlockDataSize(block), true, true, false, true,
+ BlockType.BLOOM_CHUNK, null);
+ } catch (IOException ex) {
+ // The Bloom filter is broken, turn it off.
+ throw new IllegalArgumentException(
+ "Failed to load Bloom block for key "
+ + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
+ }
+
+ ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
+ result = BloomFilterUtil.contains(key, keyOffset, keyLength,
+ bloomBuf, bloomBlock.headerSize(),
+ bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
+ }
+
+ if (numQueriesPerChunk != null && block >= 0) {
+ // Update statistics. Only used in unit tests.
+ ++numQueriesPerChunk[block];
+ if (result)
+ ++numPositivesPerChunk[block];
+ }
+
+ return result;
+ }
+
+ @Override
+ public boolean contains(Cell keyCell, ByteBuffer bloom) {
+ // We try to store the result in this variable so we can update stats for
+ // testing, but when an error happens, we log a message and return.
+ int block = index.rootBlockContainingKey(keyCell);
+ // TODO : Will be true KeyValue for now.
+ // When Offheap comes in we can add an else condition to work
+ // on the bytes in offheap
+ KeyValue kvKey = (KeyValue) keyCell;
+ return checkContains(kvKey.getBuffer(), kvKey.getKeyOffset(), kvKey.getKeyLength(), block);
+ }
+
+ public boolean supportsAutoLoading() {
+ return true;
+ }
+
+ public int getNumChunks() {
+ return numChunks;
+ }
+
+ public void enableTestingStats() {
+ numQueriesPerChunk = new long[numChunks];
+ numPositivesPerChunk = new long[numChunks];
+ }
+
+ public String formatTestingStats() {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < numChunks; ++i) {
+ sb.append("chunk #");
+ sb.append(i);
+ sb.append(": queries=");
+ sb.append(numQueriesPerChunk[i]);
+ sb.append(", positives=");
+ sb.append(numPositivesPerChunk[i]);
+ sb.append(", positiveRatio=");
+ sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
+ sb.append(";\n");
+ }
+ return sb.toString();
+ }
+
+ public long getNumQueriesForTesting(int chunk) {
+ return numQueriesPerChunk[chunk];
+ }
+
+ public long getNumPositivesForTesting(int chunk) {
+ return numPositivesPerChunk[chunk];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(BloomFilterUtil.formatStats(this));
+ sb.append(BloomFilterUtil.STATS_RECORD_SEP +
+ "Number of chunks: " + numChunks);
+ sb.append(BloomFilterUtil.STATS_RECORD_SEP +
+ ((comparator != null) ? "Comparator: "
+ + comparator.getClass().getSimpleName() : "Comparator: "
+ + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
+ return sb.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
new file mode 100644
index 0000000..c42ee20
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
@@ -0,0 +1,71 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilterBase;
+
+import org.apache.hadoop.hbase.CellComparator;
+
+@InterfaceAudience.Private
+public class CompoundBloomFilterBase implements BloomFilterBase {
+
+ /**
+ * At read time, the total number of chunks. At write time, the number of
+ * chunks created so far. The first chunk has an ID of 0, and the current
+ * chunk has the ID of numChunks - 1.
+ */
+ protected int numChunks;
+
+ /**
+ * The Bloom filter version. There used to be a DynamicByteBloomFilter which
+ * had version 2.
+ */
+ public static final int VERSION = 3;
+
+ /** Target error rate for configuring the filter and for information */
+ protected float errorRate;
+
+ /** The total number of keys in all chunks */
+ protected long totalKeyCount;
+ protected long totalByteSize;
+ protected long totalMaxKeys;
+
+ /** Hash function type to use, as defined in {@link org.apache.hadoop.hbase.util.Hash} */
+ protected int hashType;
+ /** Comparator used to compare Bloom filter keys */
+ protected CellComparator comparator;
+
+ @Override
+ public long getMaxKeys() {
+ return totalMaxKeys;
+ }
+
+ @Override
+ public long getKeyCount() {
+ return totalKeyCount;
+ }
+
+ @Override
+ public long getByteSize() {
+ return totalByteSize;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
new file mode 100644
index 0000000..cd60562
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
@@ -0,0 +1,277 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.Queue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.util.BloomFilterChunk;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
+import org.apache.hadoop.hbase.util.BloomFilterWriter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Adds methods required for writing a compound Bloom filter to the data
+ * section of an {@link org.apache.hadoop.hbase.io.hfile.HFile} to the
+ * {@link CompoundBloomFilter} class.
+ */
+@InterfaceAudience.Private
+public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
+ implements BloomFilterWriter, InlineBlockWriter {
+
+ private static final Log LOG =
+ LogFactory.getLog(CompoundBloomFilterWriter.class);
+
+ /** The current chunk being written to */
+ private BloomFilterChunk chunk;
+
+ /** Previous chunk, so that we can create another similar chunk */
+ private BloomFilterChunk prevChunk;
+
+ /** Maximum fold factor */
+ private int maxFold;
+
+ /** The size of individual Bloom filter chunks to create */
+ private int chunkByteSize;
+
+ /** A Bloom filter chunk enqueued for writing */
+ private static class ReadyChunk {
+ int chunkId;
+ byte[] firstKey;
+ BloomFilterChunk chunk;
+ }
+
+ private Queue<ReadyChunk> readyChunks = new LinkedList<ReadyChunk>();
+
+ /** The first key in the current Bloom filter chunk. */
+ private byte[] firstKeyInChunk = null;
+
+ private HFileBlockIndex.BlockIndexWriter bloomBlockIndexWriter =
+ new HFileBlockIndex.BlockIndexWriter();
+
+ /** Whether to cache-on-write compound Bloom filter chunks */
+ private boolean cacheOnWrite;
+
+ /**
+ * @param chunkByteSizeHint
+ * each chunk's size in bytes. The real chunk size might be different
+ * as required by the fold factor.
+ * @param errorRate
+ * target false positive rate
+ * @param hashType
+ * hash function type to use
+ * @param maxFold
+ * maximum degree of folding allowed
+ */
+ public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
+ int hashType, int maxFold, boolean cacheOnWrite,
+ CellComparator comparator) {
+ chunkByteSize = BloomFilterUtil.computeFoldableByteSize(
+ chunkByteSizeHint * 8L, maxFold);
+
+ this.errorRate = errorRate;
+ this.hashType = hashType;
+ this.maxFold = maxFold;
+ this.cacheOnWrite = cacheOnWrite;
+ this.comparator = comparator;
+ }
+
+ @Override
+ public boolean shouldWriteBlock(boolean closing) {
+ enqueueReadyChunk(closing);
+ return !readyChunks.isEmpty();
+ }
+
+ /**
+ * Enqueue the current chunk if it is ready to be written out.
+ *
+ * @param closing true if we are closing the file, so we do not expect new
+ * keys to show up
+ */
+ private void enqueueReadyChunk(boolean closing) {
+ if (chunk == null ||
+ (chunk.getKeyCount() < chunk.getMaxKeys() && !closing)) {
+ return;
+ }
+
+ if (firstKeyInChunk == null) {
+ throw new NullPointerException("Trying to enqueue a chunk, " +
+ "but first key is null: closing=" + closing + ", keyCount=" +
+ chunk.getKeyCount() + ", maxKeys=" + chunk.getMaxKeys());
+ }
+
+ ReadyChunk readyChunk = new ReadyChunk();
+ readyChunk.chunkId = numChunks - 1;
+ readyChunk.chunk = chunk;
+ readyChunk.firstKey = firstKeyInChunk;
+ readyChunks.add(readyChunk);
+
+ long prevMaxKeys = chunk.getMaxKeys();
+ long prevByteSize = chunk.getByteSize();
+
+ chunk.compactBloom();
+
+ if (LOG.isTraceEnabled() && prevByteSize != chunk.getByteSize()) {
+ LOG.trace("Compacted Bloom chunk #" + readyChunk.chunkId + " from ["
+ + prevMaxKeys + " max keys, " + prevByteSize + " bytes] to ["
+ + chunk.getMaxKeys() + " max keys, " + chunk.getByteSize()
+ + " bytes]");
+ }
+
+ totalMaxKeys += chunk.getMaxKeys();
+ totalByteSize += chunk.getByteSize();
+
+ firstKeyInChunk = null;
+ prevChunk = chunk;
+ chunk = null;
+ }
+
+ /**
+ * Adds a Bloom filter key. This key must be greater than the previous key,
+ * as defined by the comparator this compound Bloom filter is configured
+ * with. For efficiency, key monotonicity is not checked here. See
+ * {@link org.apache.hadoop.hbase.regionserver.StoreFile.Writer#append(
+ * org.apache.hadoop.hbase.Cell)} for the details of deduplication.
+ */
+ @Override
+ public void add(byte[] bloomKey, int keyOffset, int keyLength) {
+ if (bloomKey == null)
+ throw new NullPointerException();
+
+ enqueueReadyChunk(false);
+
+ if (chunk == null) {
+ if (firstKeyInChunk != null) {
+ throw new IllegalStateException("First key in chunk already set: "
+ + Bytes.toStringBinary(firstKeyInChunk));
+ }
+ firstKeyInChunk = Arrays.copyOfRange(bloomKey, keyOffset, keyOffset
+ + keyLength);
+
+ if (prevChunk == null) {
+ // First chunk
+ chunk = BloomFilterUtil.createBySize(chunkByteSize, errorRate,
+ hashType, maxFold);
+ } else {
+ // Use the same parameters as the last chunk, but a new array and
+ // a zero key count.
+ chunk = prevChunk.createAnother();
+ }
+
+ if (chunk.getKeyCount() != 0) {
+ throw new IllegalStateException("keyCount=" + chunk.getKeyCount()
+ + " > 0");
+ }
+
+ chunk.allocBloom();
+ ++numChunks;
+ }
+
+ chunk.add(bloomKey, keyOffset, keyLength);
+ ++totalKeyCount;
+ }
+
+ @Override
+ public void writeInlineBlock(DataOutput out) throws IOException {
+ // We don't remove the chunk from the queue here, because we might need it
+ // again for cache-on-write.
+ ReadyChunk readyChunk = readyChunks.peek();
+
+ BloomFilterChunk readyChunkBloom = readyChunk.chunk;
+ readyChunkBloom.writeBloom(out);
+ }
+
+ @Override
+ public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
+ ReadyChunk readyChunk = readyChunks.remove();
+ bloomBlockIndexWriter.addEntry(readyChunk.firstKey, offset, onDiskSize);
+ }
+
+ @Override
+ public BlockType getInlineBlockType() {
+ return BlockType.BLOOM_CHUNK;
+ }
+
+ private class MetaWriter implements Writable {
+ protected MetaWriter() {}
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ throw new IOException("Cant read with this class.");
+ }
+
+ /**
+ * This is modeled after {@link BloomFilterChunk.MetaWriter} for simplicity,
+ * although the two metadata formats do not have to be consistent. This
+ * does have to be consistent with how {@link
+ * CompoundBloomFilter#CompoundBloomFilter(DataInput,
+ * org.apache.hadoop.hbase.io.hfile.HFile.Reader)} reads fields.
+ */
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(VERSION);
+
+ out.writeLong(getByteSize());
+ out.writeInt(prevChunk.getHashCount());
+ out.writeInt(prevChunk.getHashType());
+ out.writeLong(getKeyCount());
+ out.writeLong(getMaxKeys());
+
+ // Fields that don't have equivalents in ByteBloomFilter.
+ out.writeInt(numChunks);
+ if (comparator != null) {
+ Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName()));
+ } else {
+ // Internally writes a 0 vint if the byte[] is null
+ Bytes.writeByteArray(out, null);
+ }
+
+ // Write a single-level index without compression or block header.
+ bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");
+ }
+ }
+
+ @Override
+ public void compactBloom() {
+ }
+
+ @Override
+ public Writable getMetaWriter() {
+ return new MetaWriter();
+ }
+
+ @Override
+ public Writable getDataWriter() {
+ return null;
+ }
+
+ @Override
+ public boolean getCacheOnWrite() {
+ return cacheOnWrite;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index 6c8260d..35458a2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -394,13 +394,13 @@ public class HFile {
byte[] getLastKey();
- byte[] midkey() throws IOException;
+ Cell midkey() throws IOException;
long length();
long getEntries();
- byte[] getFirstKey();
+ Cell getFirstKey();
long indexSize();
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index e6e1fff..50a5baa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
@@ -51,10 +52,11 @@ import org.apache.hadoop.util.StringUtils;
/**
* Provides functionality to write ({@link BlockIndexWriter}) and read
- * ({@link BlockIndexReader}) single-level and multi-level block indexes.
+ * ({@link org.apache.hadoop.hbase.io.hfile.BlockIndexReader})
+ * single-level and multi-level block indexes.
*
* Examples of how to use the block index writer can be found in
- * {@link org.apache.hadoop.hbase.util.CompoundBloomFilterWriter} and
+ * {@link org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter} and
* {@link HFileWriterImpl}. Examples of how to use the reader can be
* found in {@link HFileWriterImpl} and TestHFileBlockIndex.
*/
@@ -96,124 +98,179 @@ public class HFileBlockIndex {
2 * Bytes.SIZEOF_INT;
/**
- * The reader will always hold the root level index in the memory. Index
- * blocks at all other levels will be cached in the LRU cache in practice,
- * although this API does not enforce that.
- *
- * All non-root (leaf and intermediate) index blocks contain what we call a
- * "secondary index": an array of offsets to the entries within the block.
- * This allows us to do binary search for the entry corresponding to the
- * given key without having to deserialize the block.
+ * An implementation of the BlockIndexReader that deals with block keys which are plain
+ * byte[] like MetaBlock or the Bloom Block for ROW bloom.
+ * Does not need a comparator. It can work on Bytes.BYTES_RAWCOMPARATOR
*/
- public static class BlockIndexReader implements HeapSize {
- /** Needed doing lookup on blocks. */
- private final CellComparator comparator;
+ static class ByteArrayKeyBlockIndexReader extends BlockIndexReader {
- // Root-level data.
- // TODO : Convert these to Cells (ie) KeyValue.KeyOnlyKV
private byte[][] blockKeys;
- private long[] blockOffsets;
- private int[] blockDataSizes;
- private int rootCount = 0;
- // Mid-key metadata.
- private long midLeafBlockOffset = -1;
- private int midLeafBlockOnDiskSize = -1;
- private int midKeyEntry = -1;
+ public ByteArrayKeyBlockIndexReader(final int treeLevel,
+ final CachingBlockReader cachingBlockReader) {
+ this(treeLevel);
+ this.cachingBlockReader = cachingBlockReader;
+ }
- /** Pre-computed mid-key */
- private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
+ public ByteArrayKeyBlockIndexReader(final int treeLevel) {
+ // Can be null for METAINDEX block
+ searchTreeLevel = treeLevel;
+ }
+
+ protected long calculateHeapSizeForBlockKeys(long heapSize) {
+ // Calculating the size of blockKeys
+ if (blockKeys != null) {
+ heapSize += ClassSize.REFERENCE;
+ // Adding array + references overhead
+ heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
+
+ // Adding bytes
+ for (byte[] key : blockKeys) {
+ heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
+ }
+ }
+ return heapSize;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return blockKeys.length == 0;
+ }
/**
- * The number of levels in the block index tree. One if there is only root
- * level, two for root and leaf levels, etc.
+ * @param i
+ * from 0 to {@link #getRootBlockCount() - 1}
*/
- private int searchTreeLevel;
+ public byte[] getRootBlockKey(int i) {
+ return blockKeys[i];
+ }
- /** A way to read {@link HFile} blocks at a given offset */
- private CachingBlockReader cachingBlockReader;
+ @Override
+ public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
+ boolean cacheBlocks, boolean pread, boolean isCompaction,
+ DataBlockEncoding expectedDataBlockEncoding) throws IOException {
+ // this would not be needed
+ return null;
+ }
+
+ @Override
+ public Cell midkey() throws IOException {
+ // Not needed here
+ return null;
+ }
+
+ protected void initialize(int numEntries) {
+ blockKeys = new byte[numEntries][];
+ }
+
+ protected void add(final byte[] key, final long offset, final int dataSize) {
+ blockOffsets[rootCount] = offset;
+ blockKeys[rootCount] = key;
+ blockDataSizes[rootCount] = dataSize;
+ rootCount++;
+ }
+
+ @Override
+ public int rootBlockContainingKey(byte[] key, int offset, int length, CellComparator comp) {
+ int pos = Bytes.binarySearch(blockKeys, key, offset, length);
+ // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
+ // binarySearch's javadoc.
+
+ if (pos >= 0) {
+ // This means this is an exact match with an element of blockKeys.
+ assert pos < blockKeys.length;
+ return pos;
+ }
+
+ // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
+ // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
+ // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
+ // key < blockKeys[0], meaning the file does not contain the given key.
+
+ int i = -pos - 1;
+ assert 0 <= i && i <= blockKeys.length;
+ return i - 1;
+ }
- public BlockIndexReader(final CellComparator c, final int treeLevel,
+ @Override
+ public int rootBlockContainingKey(Cell key) {
+ // Should not be called on this because here it deals only with byte[]
+ throw new UnsupportedOperationException(
+ "Cannot search for a key that is of Cell type. Only plain byte array keys " +
+ "can be searched for");
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("size=" + rootCount).append("\n");
+ for (int i = 0; i < rootCount; i++) {
+ sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
+ .append("\n offset=").append(blockOffsets[i])
+ .append(", dataSize=" + blockDataSizes[i]).append("\n");
+ }
+ return sb.toString();
+ }
+
+ }
+
+ /**
+ * An implementation of the BlockIndexReader that deals with block keys which are the key
+ * part of a cell like the Data block index or the ROW_COL bloom blocks
+ * This needs a comparator to work with the Cells
+ */
+ static class CellBasedKeyBlockIndexReader extends BlockIndexReader {
+
+ private Cell[] blockKeys;
+ /** Pre-computed mid-key */
+ private AtomicReference<Cell> midKey = new AtomicReference<Cell>();
+ /** Needed doing lookup on blocks. */
+ private CellComparator comparator;
+
+ public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel,
final CachingBlockReader cachingBlockReader) {
this(c, treeLevel);
this.cachingBlockReader = cachingBlockReader;
}
- public BlockIndexReader(final CellComparator c, final int treeLevel)
- {
+ public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel) {
// Can be null for METAINDEX block
comparator = c;
searchTreeLevel = treeLevel;
}
- /**
- * @return true if the block index is empty.
- */
- public boolean isEmpty() {
- return blockKeys.length == 0;
- }
+ protected long calculateHeapSizeForBlockKeys(long heapSize) {
+ if (blockKeys != null) {
+ heapSize += ClassSize.REFERENCE;
+ // Adding array + references overhead
+ heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
- /**
- * Verifies that the block index is non-empty and throws an
- * {@link IllegalStateException} otherwise.
- */
- public void ensureNonEmpty() {
- if (blockKeys.length == 0) {
- throw new IllegalStateException("Block index is empty or not loaded");
+ // Adding blockKeys
+ for (Cell key : blockKeys) {
+ heapSize += ClassSize.align(CellUtil.estimatedHeapSizeOf(key));
+ }
}
+ // Add comparator and the midkey atomicreference
+ heapSize += 2 * ClassSize.REFERENCE;
+ return heapSize;
}
- /**
- * Return the data block which contains this key. This function will only
- * be called when the HFile version is larger than 1.
- *
- * @param key the key we are looking for
- * @param currentBlock the current block, to avoid re-reading the same block
- * @param cacheBlocks
- * @param pread
- * @param isCompaction
- * @param expectedDataBlockEncoding the data block encoding the caller is
- * expecting the data block to be in, or null to not perform this
- * check and return the block irrespective of the encoding
- * @return reader a basic way to load blocks
- * @throws IOException
- */
- public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
- boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
- throws IOException {
- BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
- cacheBlocks,
- pread, isCompaction, expectedDataBlockEncoding);
- if (blockWithScanInfo == null) {
- return null;
- } else {
- return blockWithScanInfo.getHFileBlock();
- }
+ @Override
+ public boolean isEmpty() {
+ return blockKeys.length == 0;
}
/**
- * Return the BlockWithScanInfo which contains the DataBlock with other scan
- * info such as nextIndexedKey. This function will only be called when the
- * HFile version is larger than 1.
- *
- * @param key
- * the key we are looking for
- * @param currentBlock
- * the current block, to avoid re-reading the same block
- * @param cacheBlocks
- * @param pread
- * @param isCompaction
- * @param expectedDataBlockEncoding the data block encoding the caller is
- * expecting the data block to be in, or null to not perform this
- * check and return the block irrespective of the encoding.
- * @return the BlockWithScanInfo which contains the DataBlock with other
- * scan info such as nextIndexedKey.
- * @throws IOException
+ * @param i
+ * from 0 to {@link #getRootBlockCount() - 1}
*/
+ public Cell getRootBlockKey(int i) {
+ return blockKeys[i];
+ }
+ @Override
public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
- boolean cacheBlocks,
- boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
- throws IOException {
+ boolean cacheBlocks, boolean pread, boolean isCompaction,
+ DataBlockEncoding expectedDataBlockEncoding) throws IOException {
int rootLevelIndex = rootBlockContainingKey(key);
if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
return null;
@@ -227,7 +284,7 @@ public class HFileBlockIndex {
int currentOnDiskSize = blockDataSizes[rootLevelIndex];
if (rootLevelIndex < blockKeys.length - 1) {
- nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
+ nextIndexedKey = blockKeys[rootLevelIndex + 1];
} else {
nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
}
@@ -314,18 +371,12 @@ public class HFileBlockIndex {
return blockWithScanInfo;
}
- /**
- * An approximation to the {@link HFile}'s mid-key. Operates on block
- * boundaries, and does not go inside blocks. In other words, returns the
- * first key of the middle block of the file.
- *
- * @return the first key of the middle block
- */
- public byte[] midkey() throws IOException {
+ @Override
+ public Cell midkey() throws IOException {
if (rootCount == 0)
throw new IOException("HFile empty");
- byte[] targetMidKey = this.midKey.get();
+ Cell targetMidKey = this.midKey.get();
if (targetMidKey != null) {
return targetMidKey;
}
@@ -348,7 +399,8 @@ public class HFileBlockIndex {
keyRelOffset;
int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
+ SECONDARY_INDEX_ENTRY_OVERHEAD;
- targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
+ byte[] bytes = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
+ targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
} else {
// The middle of the root-level index.
targetMidKey = blockKeys[rootCount / 2];
@@ -358,14 +410,175 @@ public class HFileBlockIndex {
return targetMidKey;
}
+ protected void initialize(int numEntries) {
+ blockKeys = new Cell[numEntries];
+ }
+
/**
- * @param i from 0 to {@link #getRootBlockCount() - 1}
+ * Adds a new entry in the root block index. Only used when reading.
+ *
+ * @param key Last key in the block
+ * @param offset file offset where the block is stored
+ * @param dataSize the uncompressed data size
*/
- public byte[] getRootBlockKey(int i) {
- return blockKeys[i];
+ protected void add(final byte[] key, final long offset, final int dataSize) {
+ blockOffsets[rootCount] = offset;
+ // Create the blockKeys as Cells once when the reader is opened
+ blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
+ blockDataSizes[rootCount] = dataSize;
+ rootCount++;
+ }
+
+ @Override
+ public int rootBlockContainingKey(final byte[] key, int offset, int length,
+ CellComparator comp) {
+ // This should always be called with Cell not with a byte[] key
+ throw new UnsupportedOperationException("Cannot find for a key containing plain byte " +
+ "array. Only cell based keys can be searched for");
+ }
+
+ @Override
+ public int rootBlockContainingKey(Cell key) {
+ // Here the comparator should not be null as this happens for the root-level block
+ int pos = Bytes.binarySearch(blockKeys, key, comparator);
+ // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
+ // binarySearch's javadoc.
+
+ if (pos >= 0) {
+ // This means this is an exact match with an element of blockKeys.
+ assert pos < blockKeys.length;
+ return pos;
+ }
+
+ // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
+ // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
+ // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
+ // key < blockKeys[0], meaning the file does not contain the given key.
+
+ int i = -pos - 1;
+ assert 0 <= i && i <= blockKeys.length;
+ return i - 1;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("size=" + rootCount).append("\n");
+ for (int i = 0; i < rootCount; i++) {
+ sb.append("key=").append((blockKeys[i]))
+ .append("\n offset=").append(blockOffsets[i])
+ .append(", dataSize=" + blockDataSizes[i]).append("\n");
+ }
+ return sb.toString();
+ }
+ }
+ /**
+ * The reader will always hold the root level index in the memory. Index
+ * blocks at all other levels will be cached in the LRU cache in practice,
+ * although this API does not enforce that.
+ *
+ * All non-root (leaf and intermediate) index blocks contain what we call a
+ * "secondary index": an array of offsets to the entries within the block.
+ * This allows us to do binary search for the entry corresponding to the
+ * given key without having to deserialize the block.
+ */
+ static abstract class BlockIndexReader implements HeapSize {
+
+ protected long[] blockOffsets;
+ protected int[] blockDataSizes;
+ protected int rootCount = 0;
+
+ // Mid-key metadata.
+ protected long midLeafBlockOffset = -1;
+ protected int midLeafBlockOnDiskSize = -1;
+ protected int midKeyEntry = -1;
+
+ /**
+ * The number of levels in the block index tree. One if there is only root
+ * level, two for root and leaf levels, etc.
+ */
+ protected int searchTreeLevel;
+
+ /** A way to read {@link HFile} blocks at a given offset */
+ protected CachingBlockReader cachingBlockReader;
+
+ /**
+ * @return true if the block index is empty.
+ */
+ public abstract boolean isEmpty();
+
+ /**
+ * Verifies that the block index is non-empty and throws an
+ * {@link IllegalStateException} otherwise.
+ */
+ public void ensureNonEmpty() {
+ if (isEmpty()) {
+ throw new IllegalStateException("Block index is empty or not loaded");
+ }
}
/**
+ * Return the data block which contains this key. This function will only
+ * be called when the HFile version is larger than 1.
+ *
+ * @param key the key we are looking for
+ * @param currentBlock the current block, to avoid re-reading the same block
+ * @param cacheBlocks
+ * @param pread
+ * @param isCompaction
+ * @param expectedDataBlockEncoding the data block encoding the caller is
+ * expecting the data block to be in, or null to not perform this
+ * check and return the block irrespective of the encoding
+ * @return reader a basic way to load blocks
+ * @throws IOException
+ */
+ public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
+ boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
+ throws IOException {
+ BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
+ cacheBlocks,
+ pread, isCompaction, expectedDataBlockEncoding);
+ if (blockWithScanInfo == null) {
+ return null;
+ } else {
+ return blockWithScanInfo.getHFileBlock();
+ }
+ }
+
+ /**
+ * Return the BlockWithScanInfo which contains the DataBlock with other scan
+ * info such as nextIndexedKey. This function will only be called when the
+ * HFile version is larger than 1.
+ *
+ * @param key
+ * the key we are looking for
+ * @param currentBlock
+ * the current block, to avoid re-reading the same block
+ * @param cacheBlocks
+ * @param pread
+ * @param isCompaction
+ * @param expectedDataBlockEncoding the data block encoding the caller is
+ * expecting the data block to be in, or null to not perform this
+ * check and return the block irrespective of the encoding.
+ * @return the BlockWithScanInfo which contains the DataBlock with other
+ * scan info such as nextIndexedKey.
+ * @throws IOException
+ */
+ public abstract BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
+ boolean cacheBlocks,
+ boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
+ throws IOException;
+
+ /**
+ * An approximation to the {@link HFile}'s mid-key. Operates on block
+ * boundaries, and does not go inside blocks. In other words, returns the
+ * first key of the middle block of the file.
+ *
+ * @return the first key of the middle block
+ */
+ public abstract Cell midkey() throws IOException;
+
+ /**
* @param i from 0 to {@link #getRootBlockCount() - 1}
*/
public long getRootBlockOffset(int i) {
@@ -402,27 +615,8 @@ public class HFileBlockIndex {
// When we want to find the meta index block or bloom block for ROW bloom
// type Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we need the
// CellComparator.
- public int rootBlockContainingKey(final byte[] key, int offset, int length,
- CellComparator comp) {
- int pos = Bytes.binarySearch(blockKeys, key, offset, length, comp);
- // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
- // binarySearch's javadoc.
-
- if (pos >= 0) {
- // This means this is an exact match with an element of blockKeys.
- assert pos < blockKeys.length;
- return pos;
- }
-
- // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
- // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
- // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
- // key < blockKeys[0], meaning the file does not contain the given key.
-
- int i = -pos - 1;
- assert 0 <= i && i <= blockKeys.length;
- return i - 1;
- }
+ public abstract int rootBlockContainingKey(final byte[] key, int offset, int length,
+ CellComparator comp);
/**
* Finds the root-level index block containing the given key.
@@ -438,7 +632,7 @@ public class HFileBlockIndex {
// Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we
// need the CellComparator.
public int rootBlockContainingKey(final byte[] key, int offset, int length) {
- return rootBlockContainingKey(key, offset, length, comparator);
+ return rootBlockContainingKey(key, offset, length, null);
}
/**
@@ -447,41 +641,7 @@ public class HFileBlockIndex {
* @param key
* Key to find
*/
- public int rootBlockContainingKey(final Cell key) {
- // Here the comparator should not be null as this happens for the root-level block
- int pos = Bytes.binarySearch(blockKeys, key, comparator);
- // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
- // binarySearch's javadoc.
-
- if (pos >= 0) {
- // This means this is an exact match with an element of blockKeys.
- assert pos < blockKeys.length;
- return pos;
- }
-
- // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
- // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
- // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
- // key < blockKeys[0], meaning the file does not contain the given key.
-
- int i = -pos - 1;
- assert 0 <= i && i <= blockKeys.length;
- return i - 1;
- }
-
- /**
- * Adds a new entry in the root block index. Only used when reading.
- *
- * @param key Last key in the block
- * @param offset file offset where the block is stored
- * @param dataSize the uncompressed data size
- */
- private void add(final byte[] key, final long offset, final int dataSize) {
- blockOffsets[rootCount] = offset;
- blockKeys[rootCount] = key;
- blockDataSizes[rootCount] = dataSize;
- rootCount++;
- }
+ public abstract int rootBlockContainingKey(final Cell key);
/**
* The indexed key at the ith position in the nonRootIndex. The position starts at 0.
@@ -489,7 +649,7 @@ public class HFileBlockIndex {
* @param i the ith position
* @return The indexed key at the ith position in the nonRootIndex.
*/
- private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
+ protected byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
int numEntries = nonRootIndex.getInt(0);
if (i < 0 || i >= numEntries) {
return null;
@@ -653,10 +813,9 @@ public class HFileBlockIndex {
* @param numEntries the number of root-level index entries
* @throws IOException
*/
- public void readRootIndex(DataInput in, final int numEntries)
- throws IOException {
+ public void readRootIndex(DataInput in, final int numEntries) throws IOException {
blockOffsets = new long[numEntries];
- blockKeys = new byte[numEntries][];
+ initialize(numEntries);
blockDataSizes = new int[numEntries];
// If index size is zero, no index was written.
@@ -670,6 +829,10 @@ public class HFileBlockIndex {
}
}
+ protected abstract void initialize(int numEntries);
+
+ protected abstract void add(final byte[] key, final long offset, final int dataSize);
+
/**
* Read in the root-level index from the given input stream. Must match
* what was written into the root level by
@@ -712,36 +875,15 @@ public class HFileBlockIndex {
}
@Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("size=" + rootCount).append("\n");
- for (int i = 0; i < rootCount; i++) {
- sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
- .append("\n offset=").append(blockOffsets[i])
- .append(", dataSize=" + blockDataSizes[i]).append("\n");
- }
- return sb.toString();
- }
-
- @Override
public long heapSize() {
- long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
+ // The BlockIndexReader does not have the blockKey, comparator and the midkey atomic reference
+ long heapSize = ClassSize.align(3 * ClassSize.REFERENCE +
2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
// Mid-key metadata.
heapSize += MID_KEY_METADATA_SIZE;
- // Calculating the size of blockKeys
- if (blockKeys != null) {
- // Adding array + references overhead
- heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
- * ClassSize.REFERENCE);
-
- // Adding bytes
- for (byte[] key : blockKeys) {
- heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
- }
- }
+ heapSize = calculateHeapSizeForBlockKeys(heapSize);
if (blockOffsets != null) {
heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
@@ -756,6 +898,7 @@ public class HFileBlockIndex {
return ClassSize.align(heapSize);
}
+ protected abstract long calculateHeapSizeForBlockKeys(long heapSize);
}
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
index aac10f2..cb7dc62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
@@ -410,7 +410,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
}
try {
- System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
+ System.out.println("Mid-key: " + (CellUtil.getCellKeyAsString(reader.midkey())));
} catch (Exception e) {
System.out.println ("Unable to retrieve the midkey");
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index d184d42..642b6c7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.NoTagsKeyValue;
-import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
@@ -73,10 +72,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
private static final Log LOG = LogFactory.getLog(HFileReaderImpl.class);
/** Data block index reader keeping the root data index in memory */
- private HFileBlockIndex.BlockIndexReader dataBlockIndexReader;
+ private HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
/** Meta block index reader -- always single level */
- private HFileBlockIndex.BlockIndexReader metaBlockIndexReader;
+ private HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
private final FixedFileTrailer trailer;
@@ -189,10 +188,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
// Comparator class name is stored in the trailer in version 2.
comparator = trailer.createComparator();
- dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
+ dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator,
trailer.getNumDataIndexLevels(), this);
- metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
- null, 1);
+ metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
// Parse load-on-open data.
@@ -309,7 +307,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
}
private String toStringFirstKey() {
- return KeyValue.keyToString(getFirstKey());
+ if(getFirstKey() == null)
+ return null;
+ return CellUtil.getCellKeyAsString(getFirstKey());
}
private String toStringLastKey() {
@@ -341,7 +341,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
* first KeyValue.
*/
@Override
- public byte [] getFirstKey() {
+ public Cell getFirstKey() {
if (dataBlockIndexReader == null) {
throw new BlockIndexNotLoadedException();
}
@@ -357,8 +357,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
*/
@Override
public byte[] getFirstRowKey() {
- byte[] firstKey = getFirstKey();
- return firstKey == null? null: KeyValueUtil.createKeyValueFromKey(firstKey).getRow();
+ Cell firstKey = getFirstKey();
+ // We have to copy the row part to form the row key alone
+ return firstKey == null? null: CellUtil.cloneRow(firstKey);
}
/**
@@ -1215,7 +1216,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
// Per meta key from any given file, synchronize reads for said block. This
// is OK to do for meta blocks because the meta block index is always
// single-level.
- synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
+ synchronized (metaBlockIndexReader
+ .getRootBlockKey(block)) {
// Check cache for block. If found return.
long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset);
@@ -1387,7 +1389,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
* @throws IOException
*/
@Override
- public byte[] midkey() throws IOException {
+ public Cell midkey() throws IOException {
return dataBlockIndexReader.midkey();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
index ee2644d..f4eaaf9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -597,12 +598,12 @@ public class HRegionFileSystem {
} else {
//check if smaller than first key
KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
- byte[] firstKey = f.createReader().getFirstKey();
+ Cell firstKey = f.createReader().getFirstKey();
// If firstKey is null means storefile is empty.
if (firstKey == null) {
return null;
}
- if (f.getReader().getComparator().compare(splitKey, firstKey, 0, firstKey.length) < 0) {
+ if (f.getReader().getComparator().compare(splitKey, firstKey) < 0) {
return null;
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 4de61ae..33d4e1e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1838,9 +1838,8 @@ public class HStore implements Store {
return false;
}
// TODO: Cache these keys rather than make each time?
- byte [] fk = r.getFirstKey();
- if (fk == null) return false;
- KeyValue firstKV = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length);
+ Cell firstKV = r.getFirstKey();
+ if (firstKV == null) return false;
byte [] lk = r.getLastKey();
KeyValue lastKV = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length);
KeyValue firstOnRow = state.getTargetKey();
@@ -1884,9 +1883,9 @@ public class HStore implements Store {
*/
private boolean seekToScanner(final HFileScanner scanner,
final KeyValue firstOnRow,
- final KeyValue firstKV)
+ final Cell firstKV)
throws IOException {
- KeyValue kv = firstOnRow;
+ Cell kv = firstOnRow;
// If firstOnRow < firstKV, set to firstKV
if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV;
int result = scanner.seekTo(kv);
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
index eba3689..fc94d3d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
@@ -678,21 +678,20 @@ public class StoreFile {
// Get first, last, and mid keys. Midkey is the key that starts block
// in middle of hfile. Has column and timestamp. Need to return just
// the row we want to split on as midkey.
- byte [] midkey = this.reader.midkey();
+ Cell midkey = this.reader.midkey();
if (midkey != null) {
- KeyValue mk = KeyValueUtil.createKeyValueFromKey(midkey, 0, midkey.length);
- byte [] fk = this.reader.getFirstKey();
- KeyValue firstKey = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length);
+ Cell firstKey = this.reader.getFirstKey();
byte [] lk = this.reader.getLastKey();
KeyValue lastKey = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length);
// if the midkey is the same as the first or last keys, we cannot (ever) split this region.
- if (comparator.compareRows(mk, firstKey) == 0 || comparator.compareRows(mk, lastKey) == 0) {
+ if (comparator.compareRows(midkey, firstKey) == 0
+ || comparator.compareRows(midkey, lastKey) == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("cannot split because midkey is the same as first or last row");
}
return null;
}
- return mk.getRow();
+ return CellUtil.cloneRow(midkey);
}
return null;
}
@@ -1371,8 +1370,7 @@ public class StoreFile {
.createLastOnRow(scan.getStartRow()) : KeyValueUtil.createLastOnRow(scan
.getStopRow());
// TODO this is in hot path? Optimize and avoid 2 extra object creations.
- KeyValue.KeyOnlyKeyValue firstKeyKV =
- new KeyValue.KeyOnlyKeyValue(this.getFirstKey(), 0, this.getFirstKey().length);
+ Cell firstKeyKV = this.getFirstKey();
KeyValue.KeyOnlyKeyValue lastKeyKV =
new KeyValue.KeyOnlyKeyValue(this.getLastKey(), 0, this.getLastKey().length);
boolean nonOverLapping = ((getComparator().compare(firstKeyKV, largestScanKeyValue)) > 0
@@ -1493,7 +1491,7 @@ public class StoreFile {
return reader.getLastRowKey();
}
- public byte[] midkey() throws IOException {
+ public Cell midkey() throws IOException {
return reader.midkey();
}
@@ -1513,7 +1511,7 @@ public class StoreFile {
return deleteFamilyCnt;
}
- public byte[] getFirstKey() {
+ public Cell getFirstKey() {
return reader.getFirstKey();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
index a80a201..5b6cb36 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import com.google.common.annotations.VisibleForTesting;
/**
- * The basic building block for the {@link CompoundBloomFilter}
+ * The basic building block for the {@link org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter}
*/
@InterfaceAudience.Private
public class BloomFilterChunk implements BloomFilterBase {
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
index aecbdf8..99951f0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
@@ -28,6 +28,9 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterBase;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.regionserver.BloomType;
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
deleted file mode 100644
index 984742f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import java.io.DataInput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
-import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileBlock;
-import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
-
-/**
- * A Bloom filter implementation built on top of {@link BloomFilterChunk},
- * encapsulating a set of fixed-size Bloom filters written out at the time of
- * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
- * block stream, and loaded on demand at query time. This class only provides
- * reading capabilities.
- */
-@InterfaceAudience.Private
-public class CompoundBloomFilter extends CompoundBloomFilterBase
- implements BloomFilter {
-
- /** Used to load chunks on demand */
- private HFile.Reader reader;
-
- private HFileBlockIndex.BlockIndexReader index;
-
- private int hashCount;
- private Hash hash;
-
- private long[] numQueriesPerChunk;
- private long[] numPositivesPerChunk;
-
- /**
- * De-serialization for compound Bloom filter metadata. Must be consistent
- * with what {@link CompoundBloomFilterWriter} does.
- *
- * @param meta serialized Bloom filter metadata without any magic blocks
- * @throws IOException
- */
- public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
- throws IOException {
- this.reader = reader;
-
- totalByteSize = meta.readLong();
- hashCount = meta.readInt();
- hashType = meta.readInt();
- totalKeyCount = meta.readLong();
- totalMaxKeys = meta.readLong();
- numChunks = meta.readInt();
- byte[] comparatorClassName = Bytes.readByteArray(meta);
- // The writer would have return 0 as the vint length for the case of
- // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be
- // null
- if (comparatorClassName.length != 0) {
- comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
- }
-
- hash = Hash.getInstance(hashType);
- if (hash == null) {
- throw new IllegalArgumentException("Invalid hash type: " + hashType);
- }
- // We will pass null for ROW block
- index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
- index.readRootIndex(meta, numChunks);
- }
-
- @Override
- public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuffer bloom) {
- // We try to store the result in this variable so we can update stats for
- // testing, but when an error happens, we log a message and return.
-
- int block = index.rootBlockContainingKey(key, keyOffset,
- keyLength);
- return checkContains(key, keyOffset, keyLength, block);
- }
-
- private boolean checkContains(byte[] key, int keyOffset, int keyLength, int block) {
- boolean result;
- if (block < 0) {
- result = false; // This key is not in the file.
- } else {
- HFileBlock bloomBlock;
- try {
- // We cache the block and use a positional read.
- bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
- index.getRootBlockDataSize(block), true, true, false, true,
- BlockType.BLOOM_CHUNK, null);
- } catch (IOException ex) {
- // The Bloom filter is broken, turn it off.
- throw new IllegalArgumentException(
- "Failed to load Bloom block for key "
- + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
- }
-
- ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
- result = BloomFilterUtil.contains(key, keyOffset, keyLength,
- bloomBuf, bloomBlock.headerSize(),
- bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
- }
-
- if (numQueriesPerChunk != null && block >= 0) {
- // Update statistics. Only used in unit tests.
- ++numQueriesPerChunk[block];
- if (result)
- ++numPositivesPerChunk[block];
- }
-
- return result;
- }
-
- @Override
- public boolean contains(Cell keyCell, ByteBuffer bloom) {
- // We try to store the result in this variable so we can update stats for
- // testing, but when an error happens, we log a message and return.
- int block = index.rootBlockContainingKey(keyCell);
- // TODO : Will be true KeyValue for now.
- // When Offheap comes in we can add an else condition to work
- // on the bytes in offheap
- KeyValue kvKey = (KeyValue) keyCell;
- return checkContains(kvKey.getBuffer(), kvKey.getKeyOffset(), kvKey.getKeyLength(), block);
- }
-
- public boolean supportsAutoLoading() {
- return true;
- }
-
- public int getNumChunks() {
- return numChunks;
- }
-
- public void enableTestingStats() {
- numQueriesPerChunk = new long[numChunks];
- numPositivesPerChunk = new long[numChunks];
- }
-
- public String formatTestingStats() {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < numChunks; ++i) {
- sb.append("chunk #");
- sb.append(i);
- sb.append(": queries=");
- sb.append(numQueriesPerChunk[i]);
- sb.append(", positives=");
- sb.append(numPositivesPerChunk[i]);
- sb.append(", positiveRatio=");
- sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
- sb.append(";\n");
- }
- return sb.toString();
- }
-
- public long getNumQueriesForTesting(int chunk) {
- return numQueriesPerChunk[chunk];
- }
-
- public long getNumPositivesForTesting(int chunk) {
- return numPositivesPerChunk[chunk];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(BloomFilterUtil.formatStats(this));
- sb.append(BloomFilterUtil.STATS_RECORD_SEP +
- "Number of chunks: " + numChunks);
- sb.append(BloomFilterUtil.STATS_RECORD_SEP +
- ((comparator != null) ? "Comparator: "
- + comparator.getClass().getSimpleName() : "Comparator: "
- + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
- return sb.toString();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
deleted file mode 100644
index 7c29ab2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-
-import org.apache.hadoop.hbase.CellComparator;
-
-@InterfaceAudience.Private
-public class CompoundBloomFilterBase implements BloomFilterBase {
-
- /**
- * At read time, the total number of chunks. At write time, the number of
- * chunks created so far. The first chunk has an ID of 0, and the current
- * chunk has the ID of numChunks - 1.
- */
- protected int numChunks;
-
- /**
- * The Bloom filter version. There used to be a DynamicByteBloomFilter which
- * had version 2.
- */
- public static final int VERSION = 3;
-
- /** Target error rate for configuring the filter and for information */
- protected float errorRate;
-
- /** The total number of keys in all chunks */
- protected long totalKeyCount;
- protected long totalByteSize;
- protected long totalMaxKeys;
-
- /** Hash function type to use, as defined in {@link Hash} */
- protected int hashType;
- /** Comparator used to compare Bloom filter keys */
- protected CellComparator comparator;
-
- @Override
- public long getMaxKeys() {
- return totalMaxKeys;
- }
-
- @Override
- public long getKeyCount() {
- return totalKeyCount;
- }
-
- @Override
- public long getByteSize() {
- return totalByteSize;
- }
-
-}