You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/08/15 01:36:05 UTC
svn commit: r804408 - in /hadoop/hbase/branches/0.20: CHANGES.txt
src/java/org/apache/hadoop/hbase/io/hfile/HFile.java
Author: stack
Date: Fri Aug 14 23:36:04 2009
New Revision: 804408
URL: http://svn.apache.org/viewvc?rev=804408&view=rev
Log:
HBASE-1766 Add advanced features to HFile.main() to be able to analyze storefile problems
Modified:
hadoop/hbase/branches/0.20/CHANGES.txt
hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java
Modified: hadoop/hbase/branches/0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=804408&r1=804407&r2=804408&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.20/CHANGES.txt Fri Aug 14 23:36:04 2009
@@ -317,6 +317,8 @@
HBASE-1745 [tools] Tool to kick region out of inTransistion
HBASE-1757 REST server runs out of fds
HBASE-1768 REST server has upper limit of 5k PUT
+ HBASE-1766 Add advanced features to HFile.main() to be able to analyze
+ storefile problems
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=804408&r1=804407&r2=804408&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/io/hfile/HFile.java Fri Aug 14 23:36:04 2009
@@ -31,18 +31,27 @@
import java.util.List;
import java.util.Map;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.PosixParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.compress.Compressor;
@@ -1594,65 +1603,156 @@
return (int)(l & 0x00000000ffffffffL);
}
-
+ /**
+ * Returns all files belonging to the given region directory. Could return an
+ * empty list.
+ *
+ * @param fs The file system reference.
+ * @param regionDir The region directory to scan.
+ * @return The list of files found.
+ * @throws IOException When scanning the files fails.
+ */
+ static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
+ throws IOException {
+ List<Path> res = new ArrayList<Path>();
+ PathFilter dirFilter = new FSUtils.DirFilter(fs);
+ FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
+ for(FileStatus dir : familyDirs) {
+ FileStatus[] files = fs.listStatus(dir.getPath());
+ for (FileStatus file : files) {
+ if (!file.isDir()) {
+ res.add(file.getPath());
+ }
+ }
+ }
+ return res;
+ }
+
public static void main(String []args) throws IOException {
- if (args.length < 1) {
- System.out.println("usage: <filename> -- dumps hfile stats");
- return;
- }
-
- HBaseConfiguration conf = new HBaseConfiguration();
-
- FileSystem fs = FileSystem.get(conf);
-
- Path path = new Path(args[0]);
-
- if (!fs.exists(path)) {
- System.out.println("File doesnt exist: " + path);
- return;
- }
-
- HFile.Reader reader = new HFile.Reader(fs, path, null, false);
- Map<byte[],byte[]> fileInfo = reader.loadFileInfo();
-
- // scan thru and count the # of unique rows.
-// HashSet<Integer> rows = new HashSet<Integer>(reader.getEntries()/4);
-// long start = System.currentTimeMillis();
-// HFileScanner scanner = reader.getScanner();
-// HStoreKey hsk;
-// scanner.seekTo();
-// do {
-// hsk = new HStoreKey(scanner.getKey());
-// rows.add(Bytes.hashCode(hsk.getRow()));
-// } while (scanner.next());
-// long end = System.currentTimeMillis();
-
-
- HFileScanner scanner = reader.getScanner();
- scanner.seekTo();
- KeyValue kv;
- do {
- kv = scanner.getKeyValue();
- System.out.println("K: " + Bytes.toStringBinary(kv.getKey()) +
- " V: " + Bytes.toStringBinary(kv.getValue()));
- } while (scanner.next());
-
- System.out.println("Block index size as per heapsize: " + reader.indexSize());
- System.out.println(reader.toString());
- System.out.println(reader.getTrailerInfo());
- System.out.println("Fileinfo:");
- for ( Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
- System.out.print(Bytes.toString(e.getKey()) + " = " );
-
- if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY"))==0) {
- long seqid = Bytes.toLong(e.getValue());
- System.out.println(seqid);
- } else {
- System.out.println(Bytes.toStringBinary(e.getValue()));
+ try {
+ // create options
+ Options options = new Options();
+ options.addOption("v", "verbose", false, "verbose output");
+ options.addOption("p", "printkv", false, "print key/value pairs");
+ options.addOption("m", "printmeta", false, "print meta data of file");
+ options.addOption("k", "checkrow", false, "enable row order check");
+ options.addOption("a", "checkfamily", false, "enable family check");
+ options.addOption("f", "file", true, "file to scan");
+ options.addOption("r", "region", true, "region to scan");
+ if (args.length == 0) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("HFile ", options, true);
+ System.exit(-1);
+ }
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmd = parser.parse(options, args);
+ boolean verbose = cmd.hasOption("v");
+ boolean printKeyValue = cmd.hasOption("p");
+ boolean printMeta = cmd.hasOption("m");
+ boolean checkRow = cmd.hasOption("k");
+ boolean checkFamily = cmd.hasOption("a");
+ // get configuration, file system and get list of files
+ HBaseConfiguration conf = new HBaseConfiguration();
+ FileSystem fs = FileSystem.get(conf);
+ ArrayList<Path> files = new ArrayList<Path>();
+ if (cmd.hasOption("f")) {
+ files.add(new Path(cmd.getOptionValue("f")));
+ }
+ if (cmd.hasOption("r")) {
+ String regionName = cmd.getOptionValue("r");
+ byte[] rn = Bytes.toBytes(regionName);
+ byte[][] hri = HRegionInfo.parseRegionName(rn);
+ Path rootDir = FSUtils.getRootDir(conf);
+ Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
+ int enc = HRegionInfo.encodeRegionName(rn);
+ Path regionDir = new Path(tableDir, Integer.toString(enc));
+ if (verbose) System.out.println("region dir -> " + regionDir);
+ List<Path> regionFiles = getStoreFiles(fs, regionDir);
+ System.out.println("Number of region files found -> " +
+ regionFiles.size());
+ if (verbose) {
+ int i = 1;
+ for (Path p : regionFiles) {
+ System.out.println("Found file[" + i++ + "] -> " + p);
+ }
+ }
+ files.addAll(regionFiles);
}
-
+ // iterate over all files found
+ System.out.println("\nStart scan of files...\n");
+ for (Path file : files) {
+ if (verbose) System.out.println("Scanning -> " + file);
+ if (!fs.exists(file)) {
+ System.err.println("ERROR, file doesnt exist: " + file);
+ continue;
+ }
+ // create reader and load file info
+ HFile.Reader reader = new HFile.Reader(fs, file, null, false);
+ Map<byte[],byte[]> fileInfo = reader.loadFileInfo();
+ // scan over file and read key/value's and check if requested
+ HFileScanner scanner = reader.getScanner();
+ scanner.seekTo();
+ KeyValue pkv = null;
+ int count = 0;
+ do {
+ KeyValue kv = scanner.getKeyValue();
+ // dump key value
+ if (printKeyValue) {
+ System.out.println("K: " + Bytes.toStringBinary(kv.getKey()) +
+ " V: " + Bytes.toStringBinary(kv.getValue()));
+ }
+ // check if rows are in order
+ if (checkRow && pkv != null) {
+ if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
+ System.err.println("WARNING, previous row is greater then" +
+ " current row\n\tfilename -> " + file +
+ "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) +
+ "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
+ }
+ }
+ // check if families are consistent
+ if (checkFamily) {
+ String fam = Bytes.toString(kv.getFamily());
+ if (!file.toString().contains(fam)) {
+ System.err.println("WARNING, filename does not match kv family," +
+ "\n\tfilename -> " + file +
+ "\n\tkeyvalue -> " + Bytes.toStringBinary(kv.getKey()));
+ }
+ if (pkv != null && Bytes.compareTo(pkv.getFamily(), kv.getFamily()) != 0) {
+ System.err.println("WARNING, previous kv has different family" +
+ " compared to current key\n\tfilename -> " + file +
+ "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) +
+ "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
+ }
+ }
+ pkv = kv;
+ count++;
+ } while (scanner.next());
+ if (verbose || printKeyValue) {
+ System.out.println("Scanned kv count -> " + count);
+ }
+ // print meta data
+ if (printMeta) {
+ System.out.println("Block index size as per heapsize: " + reader.indexSize());
+ System.out.println(reader.toString());
+ System.out.println(reader.getTrailerInfo());
+ System.out.println("Fileinfo:");
+ for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
+ System.out.print(Bytes.toString(e.getKey()) + " = " );
+ if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY"))==0) {
+ long seqid = Bytes.toLong(e.getValue());
+ System.out.println(seqid);
+ } else {
+ System.out.println(Bytes.toStringBinary(e.getValue()));
+ }
+ }
+ }
+ reader.close();
+ }
+ System.out.println("\nDone.");
+ } catch (Exception e) {
+ e.printStackTrace();
}
-
- reader.close();
}
+
}