You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by mj...@apache.org on 2017/06/20 18:25:07 UTC
[1/3] accumulo git commit: Update PrintInfo.java
Repository: accumulo
Updated Branches:
refs/heads/1.8 0dee0d854 -> e94cc38ce
Update PrintInfo.java
Update MultiLevelIndex.java
Update RFile.java
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/5757c89c
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/5757c89c
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/5757c89c
Branch: refs/heads/1.8
Commit: 5757c89c2ef519a96c001e8451cea70152a2c8af
Parents: 0dee0d8
Author: matthpeterson <mp...@appliedtech-group.com>
Authored: Mon Jun 19 10:34:07 2017 -0400
Committer: Michael Wall <mj...@gmail.com>
Committed: Tue Jun 20 12:54:35 2017 -0400
----------------------------------------------------------------------
.../accumulo/core/file/rfile/MultiLevelIndex.java | 9 +++++++++
.../apache/accumulo/core/file/rfile/PrintInfo.java | 4 +++-
.../org/apache/accumulo/core/file/rfile/RFile.java | 16 ++++++++++++----
3 files changed, 24 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5757c89c/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
index f99560e..7ac253d 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
@@ -23,6 +23,7 @@ import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
+import java.io.PrintStream;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collections;
@@ -82,6 +83,14 @@ public class MultiLevelIndex {
}
}
+ public void printInfo(PrintStream out) {
+ out.println("Key: " + key.toString() +
+ " NumEntries: " + entries +
+ " Offset: " + offset +
+ " CompressedSize: " + compressedSize +
+ " RawSize: " + rawSize);
+ }
+
@Override
public void write(DataOutput out) throws IOException {
key.write(out);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5757c89c/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
index 6b94cee..8e388e5 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
@@ -55,6 +55,8 @@ public class PrintInfo implements KeywordExecutable {
boolean hash = false;
@Parameter(names = {"--histogram"}, description = "print a histogram of the key-value sizes")
boolean histogram = false;
+ @Parameter(names = {"--printIndex"}, description = "prints information about all the index entries")
+ boolean printIndex = false;
@Parameter(names = {"--useSample"}, description = "Use sample data for --dump, --vis, --histogram options")
boolean useSample = false;
@Parameter(names = {"--keyStats"}, description = "print key length statistics for index and all data")
@@ -153,7 +155,7 @@ public class PrintInfo implements KeywordExecutable {
if (opts.vis || opts.hash)
iter.registerMetrics(vmg);
- iter.printInfo();
+ iter.printInfo(opts.printIndex);
System.out.println();
org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] {arg});
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5757c89c/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 26343ba..7ffc5c8 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -284,7 +284,7 @@ public class RFile {
indexWriter.close(out);
}
- public void printInfo(boolean isSample) throws IOException {
+ public void printInfo(boolean isSample, boolean includeIndexDetails) throws IOException {
PrintStream out = System.out;
out.printf("%-24s : %s\n", (isSample ? "Sample " : "") + "Locality group ", (isDefaultLG ? "<DEFAULT>" : name));
if (version == RINDEX_VER_3 || version == RINDEX_VER_4 || version == RINDEX_VER_6 || version == RINDEX_VER_7) {
@@ -309,7 +309,11 @@ public class RFile {
long numKeys = 0;
IndexIterator countIter = indexReader.lookup(new Key());
while (countIter.hasNext()) {
- numKeys += countIter.next().getNumEntries();
+ IndexEntry indexEntry = countIter.next();
+ numKeys += indexEntry.getNumEntries();
+ if (includeIndexDetails) {
+ indexEntry.printInfo(out);
+ }
}
out.printf("\t%-22s : %,d\n", "Num entries", numKeys);
@@ -1380,12 +1384,16 @@ public class RFile {
}
public void printInfo() throws IOException {
+ printInfo(false);
+ }
+
+ public void printInfo(boolean includeIndexDetails) throws IOException {
System.out.printf("%-24s : %d\n", "RFile Version", rfileVersion);
System.out.println();
for (LocalityGroupMetadata lgm : localityGroups) {
- lgm.printInfo(false);
+ lgm.printInfo(false, includeIndexDetails);
}
if (sampleGroups.size() > 0) {
@@ -1397,7 +1405,7 @@ public class RFile {
System.out.println();
for (LocalityGroupMetadata lgm : sampleGroups) {
- lgm.printInfo(true);
+ lgm.printInfo(true, includeIndexDetails);
}
}
}
[2/3] accumulo git commit: ACCUMULO-4656 print all index levels
Posted by mj...@apache.org.
ACCUMULO-4656 print all index levels
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/7543ea2b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/7543ea2b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/7543ea2b
Branch: refs/heads/1.8
Commit: 7543ea2b31977c20460c7622f605913d3f3ba9dc
Parents: 5757c89
Author: Keith Turner <kt...@apache.org>
Authored: Tue Jun 20 09:42:56 2017 -0400
Committer: Michael Wall <mj...@gmail.com>
Committed: Tue Jun 20 12:54:37 2017 -0400
----------------------------------------------------------------------
.../core/file/rfile/MultiLevelIndex.java | 50 ++++++++++++++++----
.../apache/accumulo/core/file/rfile/RFile.java | 11 +++--
2 files changed, 48 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/7543ea2b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
index 7ac253d..e129b7b 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
@@ -83,14 +83,6 @@ public class MultiLevelIndex {
}
}
- public void printInfo(PrintStream out) {
- out.println("Key: " + key.toString() +
- " NumEntries: " + entries +
- " Offset: " + offset +
- " CompressedSize: " + compressedSize +
- " RawSize: " + rawSize);
- }
-
@Override
public void write(DataOutput out) throws IOException {
key.write(out);
@@ -863,9 +855,49 @@ public class MultiLevelIndex {
getIndexInfo(rootBlock, sizes, counts);
}
+ private void printIndex(IndexBlock ib, String prefix, PrintStream out) throws IOException {
+ List<IndexEntry> index = ib.getIndex();
+
+ StringBuilder sb = new StringBuilder();
+ sb.append(prefix);
+
+ sb.append("Level: ");
+ sb.append(ib.getLevel());
+
+ int resetLen = sb.length();
+
+ String recursePrefix = prefix + " ";
+
+ for (IndexEntry ie : index) {
+
+ sb.setLength(resetLen);
+
+ sb.append(" Key: ");
+ sb.append(ie.key);
+ sb.append(" NumEntries: ");
+ sb.append(ie.entries);
+ sb.append(" Offset: ");
+ sb.append(ie.offset);
+ sb.append(" CompressedSize: ");
+ sb.append(ie.compressedSize);
+ sb.append(" RawSize : ");
+ sb.append(ie.rawSize);
+
+ out.println(sb.toString());
+
+ if (ib.getLevel() > 0) {
+ IndexBlock cib = getIndexBlock(ie);
+ printIndex(cib, recursePrefix, out);
+ }
+ }
+ }
+
+ public void printIndex(String prefix, PrintStream out) throws IOException {
+ printIndex(rootBlock, prefix, out);
+ }
+
public Key getLastKey() {
return rootBlock.getIndex().get(rootBlock.getIndex().size() - 1).getKey();
}
}
-
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/7543ea2b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 7ffc5c8..3b11d51 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -311,13 +311,16 @@ public class RFile {
while (countIter.hasNext()) {
IndexEntry indexEntry = countIter.next();
numKeys += indexEntry.getNumEntries();
- if (includeIndexDetails) {
- indexEntry.printInfo(out);
- }
}
out.printf("\t%-22s : %,d\n", "Num entries", numKeys);
out.printf("\t%-22s : %s\n", "Column families", (isDefaultLG && columnFamilies == null ? "<UNKNOWN>" : columnFamilies.keySet()));
+
+ if (includeIndexDetails) {
+ out.printf("\t%-22s :\n", "Index Entries", lastKey);
+ String prefix = String.format("\t ", "");
+ indexReader.printIndex(prefix, out);
+ }
}
}
@@ -1386,7 +1389,7 @@ public class RFile {
public void printInfo() throws IOException {
printInfo(false);
}
-
+
public void printInfo(boolean includeIndexDetails) throws IOException {
System.out.printf("%-24s : %d\n", "RFile Version", rfileVersion);
[3/3] accumulo git commit: ACCUMULO-4656 - clean up warnings from
printf
Posted by mj...@apache.org.
ACCUMULO-4656 - clean up warnings from printf
closes #269. thanks @matthpeterson
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/e94cc38c
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/e94cc38c
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/e94cc38c
Branch: refs/heads/1.8
Commit: e94cc38ce7769b59d52eb45423ca8c60ad3b4c1a
Parents: 7543ea2
Author: Michael Wall <mj...@apache.org>
Authored: Tue Jun 20 14:19:33 2017 -0400
Committer: Michael Wall <mj...@apache.org>
Committed: Tue Jun 20 14:24:30 2017 -0400
----------------------------------------------------------------------
.../apache/accumulo/core/file/rfile/RFile.java | 64 +++++++++++++-------
1 file changed, 42 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/e94cc38c/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 3b11d51..4b19b75 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -89,28 +89,43 @@ public class RFile {
private static final int RINDEX_MAGIC = 0x20637474;
- static final int RINDEX_VER_8 = 8; // Added sample storage. There is a sample locality group for each locality group. Sample are built using a Sampler and
- // sampler configuration. The Sampler and its configuration are stored in RFile. Persisting the method of producing the
+ static final int RINDEX_VER_8 = 8; // Added sample storage. There is a sample locality group for each locality
+ // group. Sample are built using a Sampler and
+ // sampler configuration. The Sampler and its configuration are stored in RFile.
+ // Persisting the method of producing the
// sample allows a user of RFile to determine if the sample is useful.
//
- // Selected smaller keys for index by doing two things. First internal stats were used to look for keys that were below
- // average in size for the index. Also keys that were statistically large were excluded from the index. Second shorter keys
+ // Selected smaller keys for index by doing two things. First internal stats were
+ // used to look for keys that were below
+ // average in size for the index. Also keys that were statistically large were
+ // excluded from the index. Second shorter keys
// (that may not exist in data) were generated for the index.
- static final int RINDEX_VER_7 = 7; // Added support for prefix encoding and encryption. Before this change only exact matches within a key field were deduped
- // for consecutive keys. After this change, if consecutive key fields have the same prefix then the prefix is only stored
+ static final int RINDEX_VER_7 = 7; // Added support for prefix encoding and encryption. Before this change only
+ // exact matches within a key field were deduped
+ // for consecutive keys. After this change, if consecutive key fields have the
+ // same prefix then the prefix is only stored
// once.
- static final int RINDEX_VER_6 = 6; // Added support for multilevel indexes. Before this the index was one list with an entry for each data block. For large
- // files, a large index needed to be read into memory before any seek could be done. After this change the index is a fat
- // tree, and opening a large rfile is much faster. Like the previous version of Rfile, each index node in the tree is kept
+ static final int RINDEX_VER_6 = 6; // Added support for multilevel indexes. Before this the index was one list with
+ // an entry for each data block. For large
+ // files, a large index needed to be read into memory before any seek could be
+ // done. After this change the index is a fat
+ // tree, and opening a large rfile is much faster. Like the previous version of
+ // Rfile, each index node in the tree is kept
// in memory serialized and used in its serialized form.
// static final int RINDEX_VER_5 = 5; // unreleased
- static final int RINDEX_VER_4 = 4; // Added support for seeking using serialized indexes. After this change index is no longer deserialized when rfile opened.
- // Entire serialized index is read into memory as single byte array. For seeks, serialized index is used to find blocks
- // (the binary search deserializes the specific entries its needs). This resulted in less memory usage (no object overhead)
+ static final int RINDEX_VER_4 = 4; // Added support for seeking using serialized indexes. After this change index is
+ // no longer deserialized when rfile opened.
+ // Entire serialized index is read into memory as single byte array. For seeks,
+ // serialized index is used to find blocks
+ // (the binary search deserializes the specific entries its needs). This resulted
+ // in less memory usage (no object overhead)
// and faster open times for RFiles.
- static final int RINDEX_VER_3 = 3; // Initial released version of RFile. R is for relative encoding. A keys is encoded relative to the previous key. The
- // initial version deduped key fields that were the same for consecutive keys. For sorted data this is a common occurrence.
- // This version supports locality groups. Each locality group has an index pointing to set of data blocks. Each data block
+ static final int RINDEX_VER_3 = 3; // Initial released version of RFile. R is for relative encoding. A keys is
+ // encoded relative to the previous key. The
+ // initial version deduped key fields that were the same for consecutive keys.
+ // For sorted data this is a common occurrence.
+ // This version supports locality groups. Each locality group has an index
+ // pointing to set of data blocks. Each data block
// contains relatively encoded keys and values.
// Buffer sample data so that many sample data blocks are stored contiguously.
@@ -317,8 +332,8 @@ public class RFile {
out.printf("\t%-22s : %s\n", "Column families", (isDefaultLG && columnFamilies == null ? "<UNKNOWN>" : columnFamilies.keySet()));
if (includeIndexDetails) {
- out.printf("\t%-22s :\n", "Index Entries", lastKey);
- String prefix = String.format("\t ", "");
+ out.printf("\t%-22s :\nIndex Entries", lastKey);
+ String prefix = String.format("\t ");
indexReader.printIndex(prefix, out);
}
}
@@ -366,7 +381,8 @@ public class RFile {
public void flushIfNeeded() throws IOException {
if (dataSize > sampleBufferSize) {
- // the reason to write out all but one key is so that closeBlock() can always eventually be called with true
+ // the reason to write out all but one key is so that closeBlock() can always eventually be called with
+ // true
List<SampleEntry> subList = entries.subList(0, entries.size() - 1);
if (subList.size() > 0) {
@@ -444,7 +460,8 @@ public class RFile {
avergageKeySize = keyLenStats.getMean();
}
- // Possibly produce a shorter key that does not exist in data. Even if a key can be shortened, it may not be below average.
+ // Possibly produce a shorter key that does not exist in data. Even if a key can be shortened, it may
+ // not be below average.
Key closeKey = KeyShortener.shorten(prevKey, key);
if ((closeKey.getSize() <= avergageKeySize || blockWriter.getRawSize() > maxBlockSize) && !isGiantKey(closeKey)) {
@@ -904,8 +921,10 @@ public class RFile {
}
if (entriesLeft == 0 && startKey.compareTo(getTopKey()) > 0 && startKey.compareTo(iiter.peekPrevious().getKey()) <= 0) {
- // In the empty space at the end of a block. This can occur when keys are shortened in the index creating index entries that do not exist in the
- // block. These shortened index entires fall between the last key in a block and first key in the next block, but may not exist in the data.
+ // In the empty space at the end of a block. This can occur when keys are shortened in the index
+ // creating index entries that do not exist in the
+ // block. These shortened index entires fall between the last key in a block and first key in the
+ // next block, but may not exist in the data.
// Just proceed to the next block.
reseek = false;
}
@@ -933,7 +952,8 @@ public class RFile {
}
if (iiter.hasPrevious())
- prevKey = new Key(iiter.peekPrevious().getKey()); // initially prevKey is the last key of the prev block
+ prevKey = new Key(iiter.peekPrevious().getKey()); // initially prevKey is the last key of the
+ // prev block
else
prevKey = new Key(); // first block in the file, so set prev key to minimal key