You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by mj...@apache.org on 2017/06/20 18:55:50 UTC
accumulo git commit: ACCUMULO-4656 - clean up formatting
Repository: accumulo
Updated Branches:
refs/heads/1.8 e94cc38ce -> 5194ae723
ACCUMULO-4656 - clean up formatting
may have had a different formatter applied which shortened these comments
unrelated to the this ticket, but need to close another GH PR since we don't have access so, closes #268
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/5194ae72
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/5194ae72
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/5194ae72
Branch: refs/heads/1.8
Commit: 5194ae7233a8605a99f6c6254c25cbc578218f87
Parents: e94cc38
Author: Michael Wall <mj...@apache.org>
Authored: Tue Jun 20 14:55:33 2017 -0400
Committer: Michael Wall <mj...@apache.org>
Committed: Tue Jun 20 14:55:33 2017 -0400
----------------------------------------------------------------------
.../apache/accumulo/core/file/rfile/RFile.java | 60 +++++++-------------
1 file changed, 20 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5194ae72/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 4b19b75..2f08d24 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -89,43 +89,28 @@ public class RFile {
private static final int RINDEX_MAGIC = 0x20637474;
- static final int RINDEX_VER_8 = 8; // Added sample storage. There is a sample locality group for each locality
- // group. Sample are built using a Sampler and
- // sampler configuration. The Sampler and its configuration are stored in RFile.
- // Persisting the method of producing the
+ static final int RINDEX_VER_8 = 8; // Added sample storage. There is a sample locality group for each locality group. Sample are built using a Sampler and
+ // sampler configuration. The Sampler and its configuration are stored in RFile. Persisting the method of producing the
// sample allows a user of RFile to determine if the sample is useful.
//
- // Selected smaller keys for index by doing two things. First internal stats were
- // used to look for keys that were below
- // average in size for the index. Also keys that were statistically large were
- // excluded from the index. Second shorter keys
+ // Selected smaller keys for index by doing two things. First internal stats were used to look for keys that were below
+ // average in size for the index. Also keys that were statistically large were excluded from the index. Second shorter keys
// (that may not exist in data) were generated for the index.
- static final int RINDEX_VER_7 = 7; // Added support for prefix encoding and encryption. Before this change only
- // exact matches within a key field were deduped
- // for consecutive keys. After this change, if consecutive key fields have the
- // same prefix then the prefix is only stored
+ static final int RINDEX_VER_7 = 7; // Added support for prefix encoding and encryption. Before this change only exact matches within a key field were deduped
+ // for consecutive keys. After this change, if consecutive key fields have the same prefix then the prefix is only stored
// once.
- static final int RINDEX_VER_6 = 6; // Added support for multilevel indexes. Before this the index was one list with
- // an entry for each data block. For large
- // files, a large index needed to be read into memory before any seek could be
- // done. After this change the index is a fat
- // tree, and opening a large rfile is much faster. Like the previous version of
- // Rfile, each index node in the tree is kept
+ static final int RINDEX_VER_6 = 6; // Added support for multilevel indexes. Before this the index was one list with an entry for each data block. For large
+ // files, a large index needed to be read into memory before any seek could be done. After this change the index is a fat
+ // tree, and opening a large rfile is much faster. Like the previous version of Rfile, each index node in the tree is kept
// in memory serialized and used in its serialized form.
// static final int RINDEX_VER_5 = 5; // unreleased
- static final int RINDEX_VER_4 = 4; // Added support for seeking using serialized indexes. After this change index is
- // no longer deserialized when rfile opened.
- // Entire serialized index is read into memory as single byte array. For seeks,
- // serialized index is used to find blocks
- // (the binary search deserializes the specific entries its needs). This resulted
- // in less memory usage (no object overhead)
+ static final int RINDEX_VER_4 = 4; // Added support for seeking using serialized indexes. After this change index is no longer deserialized when rfile opened.
+ // Entire serialized index is read into memory as single byte array. For seeks, serialized index is used to find blocks
+ // (the binary search deserializes the specific entries its needs). This resulted in less memory usage (no object overhead)
// and faster open times for RFiles.
- static final int RINDEX_VER_3 = 3; // Initial released version of RFile. R is for relative encoding. A keys is
- // encoded relative to the previous key. The
- // initial version deduped key fields that were the same for consecutive keys.
- // For sorted data this is a common occurrence.
- // This version supports locality groups. Each locality group has an index
- // pointing to set of data blocks. Each data block
+ static final int RINDEX_VER_3 = 3; // Initial released version of RFile. R is for relative encoding. A keys is encoded relative to the previous key. The
+ // initial version deduped key fields that were the same for consecutive keys. For sorted data this is a common occurrence.
+ // This version supports locality groups. Each locality group has an index pointing to set of data blocks. Each data block
// contains relatively encoded keys and values.
// Buffer sample data so that many sample data blocks are stored contiguously.
@@ -381,8 +366,7 @@ public class RFile {
public void flushIfNeeded() throws IOException {
if (dataSize > sampleBufferSize) {
- // the reason to write out all but one key is so that closeBlock() can always eventually be called with
- // true
+ // the reason to write out all but one key is so that closeBlock() can always eventually be called with true
List<SampleEntry> subList = entries.subList(0, entries.size() - 1);
if (subList.size() > 0) {
@@ -460,8 +444,7 @@ public class RFile {
avergageKeySize = keyLenStats.getMean();
}
- // Possibly produce a shorter key that does not exist in data. Even if a key can be shortened, it may
- // not be below average.
+ // Possibly produce a shorter key that does not exist in data. Even if a key can be shortened, it may not be below average.
Key closeKey = KeyShortener.shorten(prevKey, key);
if ((closeKey.getSize() <= avergageKeySize || blockWriter.getRawSize() > maxBlockSize) && !isGiantKey(closeKey)) {
@@ -921,10 +904,8 @@ public class RFile {
}
if (entriesLeft == 0 && startKey.compareTo(getTopKey()) > 0 && startKey.compareTo(iiter.peekPrevious().getKey()) <= 0) {
- // In the empty space at the end of a block. This can occur when keys are shortened in the index
- // creating index entries that do not exist in the
- // block. These shortened index entires fall between the last key in a block and first key in the
- // next block, but may not exist in the data.
+ // In the empty space at the end of a block. This can occur when keys are shortened in the index creating index entries that do not exist in the
+ // block. These shortened index entires fall between the last key in a block and first key in the next block, but may not exist in the data.
// Just proceed to the next block.
reseek = false;
}
@@ -952,8 +933,7 @@ public class RFile {
}
if (iiter.hasPrevious())
- prevKey = new Key(iiter.peekPrevious().getKey()); // initially prevKey is the last key of the
- // prev block
+ prevKey = new Key(iiter.peekPrevious().getKey()); // initially prevKey is the last key of the prev block
else
prevKey = new Key(); // first block in the file, so set prev key to minimal key