You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by to...@apache.org on 2011/10/21 20:45:32 UTC
svn commit: r1187494 - in /hbase/trunk: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
Author: todd
Date: Fri Oct 21 18:45:31 2011
New Revision: 1187494
URL: http://svn.apache.org/viewvc?rev=1187494&view=rev
Log:
HBASE-3929 Add option to HFile tool to produce basic stats
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1187494&r1=1187493&r2=1187494&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Fri Oct 21 18:45:31 2011
@@ -649,6 +649,8 @@ Release 0.92.0 - Unreleased
HBASE-4486 Improve Javadoc for HTableDescriptor (Akash Ashok)
HBASE-4604 hbase.client.TestHTablePool could start a single
cluster instead of one per method (nkeywal)
+ HBASE-3929 Add option to HFile tool to produce basic stats (Matteo
+ Bertozzi and todd via todd)
TASKS
@@ -4276,4 +4278,4 @@ Below are the list of changes before 200
(Edward Yoon via Stack).
61. HADOOP-1709 Make HRegionInterface more like that of HTable
HADOOP-1725 Client find of table regions should not include offlined, split parents
-=
\ No newline at end of file
+=
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java?rev=1187494&r1=1187493&r2=1187494&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java Fri Oct 21 18:45:31 2011
@@ -64,6 +64,7 @@ public class HFilePrettyPrinter {
private boolean printKey;
private boolean shouldPrintMeta;
private boolean printBlocks;
+ private boolean printStats;
private boolean checkRow;
private boolean checkFamily;
@@ -88,6 +89,7 @@ public class HFilePrettyPrinter {
"File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
options.addOption("r", "region", true,
"Region to scan. Pass region name; e.g. '.META.,,1'");
+ options.addOption("s", "stats", false, "Print statistics");
}
public boolean parseOptions(String args[]) throws ParseException,
@@ -105,6 +107,7 @@ public class HFilePrettyPrinter {
printKey = cmd.hasOption("e") || printValue;
shouldPrintMeta = cmd.hasOption("m");
printBlocks = cmd.hasOption("b");
+ printStats = cmd.hasOption("s");
checkRow = cmd.hasOption("k");
checkFamily = cmd.hasOption("a");
@@ -189,12 +192,14 @@ public class HFilePrettyPrinter {
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
- if (verbose || printKey || checkRow || checkFamily) {
+ KeyValueStatsCollector fileStats = null;
+ if (verbose || printKey || checkRow || checkFamily || printStats) {
// scan over file and read key/value's and check if requested
HFileScanner scanner = reader.getScanner(false, false, false);
- scanner.seekTo();
- scanKeysValues(file, scanner);
+ fileStats = new KeyValueStatsCollector();
+ if (scanner.seekTo())
+ scanKeysValues(file, fileStats, scanner);
}
// print meta data
@@ -207,14 +212,23 @@ public class HFilePrettyPrinter {
System.out.println(reader.getDataBlockIndexReader());
}
+ if (printStats) {
+ fileStats.finish();
+ System.out.println("Stats:\n" + fileStats);
+ }
+
reader.close();
}
- private void scanKeysValues(Path file, HFileScanner scanner)
+ private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner)
throws IOException {
KeyValue pkv = null;
do {
KeyValue kv = scanner.getKeyValue();
+ // collect stats
+ if (printStats) {
+ fileStats.collect(kv);
+ }
// dump key value
if (printKey) {
System.out.print("K: " + kv);
@@ -305,4 +319,83 @@ public class HFilePrettyPrinter {
}
}
+ private static class LongStats {
+ private long min = Long.MAX_VALUE;
+ private long max = Long.MIN_VALUE;
+ private long sum = 0;
+ private long count = 0;
+
+ void collect(long d) {
+ if (d < min) min = d;
+ if (d > max) max = d;
+ sum += d;
+ count++;
+ }
+
+ public String toString() {
+ return "count: " + count +
+ "\tmin: " + min +
+ "\tmax: " + max +
+ "\tmean: " + ((double)sum/count);
+ }
+ }
+
+ private static class KeyValueStatsCollector {
+ LongStats keyLen = new LongStats();
+ LongStats valLen = new LongStats();
+ LongStats rowSizeBytes = new LongStats();
+ LongStats rowSizeCols = new LongStats();
+
+ long curRowBytes = 0;
+ long curRowCols = 0;
+
+ byte[] biggestRow = null;
+
+ private KeyValue prevKV = null;
+ private long maxRowBytes = 0;
+
+ public void collect(KeyValue kv) {
+ keyLen.collect(kv.getKeyLength());
+ valLen.collect(kv.getValueLength());
+ if (prevKV != null &&
+ KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
+ // new row
+ collectRow();
+ }
+ curRowBytes += kv.getLength();
+ curRowCols++;
+ prevKV = kv;
+ }
+
+ private void collectRow() {
+ rowSizeBytes.collect(curRowBytes);
+ rowSizeCols.collect(curRowCols);
+
+ if (curRowBytes > maxRowBytes && prevKV != null) {
+ biggestRow = prevKV.getRow();
+ }
+
+ curRowBytes = 0;
+ curRowCols = 0;
+ }
+
+ public void finish() {
+ if (curRowCols > 0) {
+ collectRow();
+ }
+ }
+
+ @Override
+ public String toString() {
+ if (prevKV == null)
+ return "no data available for statistics";
+
+ return
+ "Key length: " + keyLen + "\n" +
+ "Val length: " + valLen + "\n" +
+ "Row size (bytes): " + rowSizeBytes + "\n" +
+ "Row size (columns): " + rowSizeCols + "\n" +
+ "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
+ }
+ }
}