You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:07:21 UTC

svn commit: r1181423 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase: io/hfile/HFile.java regionserver/Store.java regionserver/StoreFile.java

Author: nspiegelberg
Date: Tue Oct 11 02:07:21 2011
New Revision: 1181423

URL: http://svn.apache.org/viewvc?rev=1181423&view=rev
Log:
HFile pretty printer fix; and some misc cleanup of int to long

Summary:
(i) If -m (meta only) is the only option requested, the HFile pretty printer
was still scanning the whole file.

(ii) Changed a bunch of ints to longs in HFile for future proofing. The HFile
FixedFileTrailer still has the number of keys as an "int". This is harmless. The
code actually does support larger than 2G keys, it is just that the stats
reporting can be a little off. Deferring the persistent change for future.

Test Plan:
Will run unit tests.

Ran HFile pretty printer on a large test file, and confirmed it wasn't going
into the long scan loop.

DiffCamp Revision: 175196
Reviewed By: nspiegelberg
CC: nspiegelberg
Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1181423&r1=1181422&r2=1181423&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Tue Oct 11 02:07:21 2011
@@ -233,7 +233,7 @@ public class HFile {
     private long totalBytes = 0;
 
     // Total # of key/value entries, ie: how many times add() was called.
-    private int entryCount = 0;
+    private long entryCount = 0;
 
     // Used calculating average key and value lengths.
     private long keylength = 0;
@@ -678,7 +678,18 @@ public class HFile {
       trailer.metaIndexCount = metaNames.size();
 
       trailer.totalUncompressedBytes = totalBytes;
-      trailer.entryCount = entryCount;
+
+      // the entryCount in the HFile is currently only used for
+      // reporting, and for bloom calculations. This fix only
+      // avoids the counter from wrapping around to -ve values.
+      // If/when we change the FixedFileTrailer format in future,
+      // we can modify the entryCount to a long.
+      if (entryCount > Integer.MAX_VALUE)
+      {
+        trailer.entryCount = Integer.MAX_VALUE;
+      } else {
+        trailer.entryCount = (int)entryCount;
+      }
 
       trailer.compressionCodec = this.compressAlgo.ordinal();
 
@@ -1156,7 +1167,7 @@ public class HFile {
     /**
      * @return number of KV entries in this HFile
      */
-    public int getEntries() {
+    public long getEntries() {
       if (!this.isFileInfoLoaded()) {
         throw new RuntimeException("File info not loaded");
       }
@@ -1189,7 +1200,7 @@ public class HFile {
     /**
      * @return number of K entries in this HFile's filter.  Returns KV count if no filter.
      */
-    public int getFilterEntries() {
+    public long getFilterEntries() {
       return getEntries();
     }
 
@@ -1524,6 +1535,12 @@ public class HFile {
     // How many meta block index entries (aka: meta block count)
     int metaIndexCount;
     long totalUncompressedBytes;
+
+    // Note: An HFile today can contain more than Integer.MAX_VALUE keys.
+    // However, the entryCount (not being used for much today) will
+    // cap out at Integer.MAX_VALUE.
+    // If/when we change the trailer format, we should change the
+    // entryCount datatype to a long.
     int entryCount;
     int compressionCodec;
     int version = 1;
@@ -1925,6 +1942,7 @@ public class HFile {
         }
         files.addAll(regionFiles);
       }
+
       // iterate over all files found
       for (Path file : files) {
         if (verbose) System.out.println("Scanning -> " + file);
@@ -1934,49 +1952,54 @@ public class HFile {
         }
         // create reader and load file info
         HFile.Reader reader = new HFile.Reader(fs, file, null, false);
+
         Map<byte[],byte[]> fileInfo = reader.loadFileInfo();
-        // scan over file and read key/value's and check if requested
-        HFileScanner scanner = reader.getScanner(false, false);
-        scanner.seekTo();
-        KeyValue pkv = null;
         int count = 0;
-        do {
-          KeyValue kv = scanner.getKeyValue();
-          // dump key value
-          if (printKeyValue) {
-            System.out.println("K: " + kv +
-              " V: " + Bytes.toStringBinary(kv.getValue()));
-          }
-          // check if rows are in order
-          if (checkRow && pkv != null) {
-            if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
-              System.err.println("WARNING, previous row is greater then" +
-                " current row\n\tfilename -> " + file +
-                "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) +
-                "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
+        if (verbose || printKeyValue || checkRow || checkFamily) {
+          // scan over file and read key/value's and check if requested
+          HFileScanner scanner = reader.getScanner(false, false);
+          scanner.seekTo();
+          KeyValue pkv = null;
+          do {
+            KeyValue kv = scanner.getKeyValue();
+            // dump key value
+            if (printKeyValue) {
+              System.out.println("K: " + kv +
+                  " V: " + Bytes.toStringBinary(kv.getValue()));
             }
-          }
-          // check if families are consistent
-          if (checkFamily) {
-            String fam = Bytes.toString(kv.getFamily());
-            if (!file.toString().contains(fam)) {
-              System.err.println("WARNING, filename does not match kv family," +
-                "\n\tfilename -> " + file +
-                "\n\tkeyvalue -> " + Bytes.toStringBinary(kv.getKey()));
+            // check if rows are in order
+            if (checkRow && pkv != null) {
+              if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
+                System.err.println("WARNING, previous row is greater then" +
+                    " current row\n\tfilename -> " + file +
+                    "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) +
+                    "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
+              }
             }
-            if (pkv != null && Bytes.compareTo(pkv.getFamily(), kv.getFamily()) != 0) {
-              System.err.println("WARNING, previous kv has different family" +
-                " compared to current key\n\tfilename -> " + file +
-                "\n\tprevious -> " +  Bytes.toStringBinary(pkv.getKey()) +
-                "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
+            // check if families are consistent
+            if (checkFamily) {
+              String fam = Bytes.toString(kv.getFamily());
+              if (!file.toString().contains(fam)) {
+                System.err.println("WARNING, filename does not match kv family," +
+                    "\n\tfilename -> " + file +
+                    "\n\tkeyvalue -> " + Bytes.toStringBinary(kv.getKey()));
+              }
+              if (pkv != null && Bytes.compareTo(pkv.getFamily(), kv.getFamily()) != 0) {
+                System.err.println("WARNING, previous kv has different family" +
+                    " compared to current key\n\tfilename -> " + file +
+                    "\n\tprevious -> " +  Bytes.toStringBinary(pkv.getKey()) +
+                    "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
+              }
             }
-          }
-          pkv = kv;
-          count++;
-        } while (scanner.next());
+            pkv = kv;
+            count++;
+          } while (scanner.next());
+        }
+
         if (verbose || printKeyValue) {
           System.out.println("Scanned kv count -> " + count);
         }
+
         // print meta data
         if (printMeta) {
           System.out.println("Block index size as per heapsize: " + reader.indexSize());

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1181423&r1=1181422&r2=1181423&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Tue Oct 11 02:07:21 2011
@@ -507,7 +507,7 @@ public class Store implements HeapSize {
   /*
    * @return Writer for a new StoreFile in the tmp dir.
    */
-  private StoreFile.Writer createWriterInTmp(int maxKeyCount)
+  private StoreFile.Writer createWriterInTmp(long maxKeyCount)
   throws IOException {
     return StoreFile.createWriter(this.fs, region.getTmpDir(), this.blocksize,
         this.compression, this.comparator, this.conf,
@@ -844,13 +844,13 @@ public class Store implements HeapSize {
                                final boolean majorCompaction, final long maxId)
       throws IOException {
     // calculate maximum key count after compaction (for blooms)
-    int maxKeyCount = 0;
+    long maxKeyCount = 0;
     for (StoreFile file : filesToCompact) {
       StoreFile.Reader r = file.getReader();
       if (r != null) {
         // NOTE: getFilterEntries could cause under-sized blooms if the user
         //       switches bloom type (e.g. from ROW to ROWCOL)
-        int keyCount = (r.getBloomFilterType() == family.getBloomFilterType())
+        long keyCount = (r.getBloomFilterType() == family.getBloomFilterType())
             ? r.getFilterEntries() : r.getEntries();
         maxKeyCount += keyCount;
         LOG.info("Compacting: " + file + "; keyCount = " + keyCount + "; Bloom Type = " + r.getBloomFilterType().toString());

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181423&r1=1181422&r2=1181423&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:07:21 2011
@@ -553,7 +553,7 @@ public class StoreFile {
                                               final KeyValue.KVComparator c,
                                               final Configuration conf,
                                               BloomType bloomType,
-                                              int maxKeySize)
+                                              long maxKeySize)
       throws IOException {
 
     if (!fs.exists(dir)) {
@@ -685,7 +685,7 @@ public class StoreFile {
      */
     public Writer(FileSystem fs, Path path, int blocksize,
         Compression.Algorithm compress, final Configuration conf,
-        final KVComparator comparator, BloomType bloomType, int maxKeys)
+        final KVComparator comparator, BloomType bloomType, long maxKeys)
         throws IOException {
 
       writer = new HFile.Writer(
@@ -711,7 +711,7 @@ public class StoreFile {
 
         if (maxKeys < tooBig) {
           try {
-            bloom = new ByteBloomFilter(maxKeys, err,
+            bloom = new ByteBloomFilter((int)maxKeys, err,
                 Hash.getHashType(conf), maxFold);
             bloom.allocBloom();
             bt = bloomType;
@@ -1037,7 +1037,7 @@ public class StoreFile {
       }
     }
 
-    public int getFilterEntries() {
+    public long getFilterEntries() {
       return (this.bloomFilter != null) ? this.bloomFilter.getKeyCount()
           : reader.getFilterEntries();
     }
@@ -1062,7 +1062,7 @@ public class StoreFile {
       return reader.length();
     }
 
-    public int getEntries() {
+    public long getEntries() {
       return reader.getEntries();
     }