You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:07:42 UTC

svn commit: r1181428 - /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java

Author: nspiegelberg
Date: Tue Oct 11 02:07:42 2011
New Revision: 1181428

URL: http://svn.apache.org/viewvc?rev=1181428&view=rev
Log:
Minor Refactoring: Compaction Algorithm

Summary:
Changed the new compaction algorithm to use a recursive relation like the
original.  This allows us to get rid of that funky 'normalSkew' logic.  Also,
ensure that compactionThreshold >= 2

Test Plan:
mvn clean test
cluster testing

DiffCamp Revision: 176571
Reviewed By: kannan
CC: nspiegelberg, kannan
Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1181428&r1=1181427&r2=1181428&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Tue Oct 11 02:07:42 2011
@@ -173,8 +173,8 @@ public class Store implements HeapSize {
 
     // By default, we compact if an HStore has more than
     // MIN_COMMITS_FOR_COMPACTION map files
-    this.compactionThreshold =
-      conf.getInt("hbase.hstore.compactionThreshold", 3);
+    this.compactionThreshold = Math.max(2,
+      conf.getInt("hbase.hstore.compactionThreshold", 3));
 
     // Check if this is in-memory store
     this.inMemory = family.isInMemory();
@@ -589,13 +589,13 @@ public class Store implements HeapSize {
    * <p>We don't want to hold the structureLock for the whole time, as a compact()
    * can be lengthy and we want to allow cache-flushes during this period.
    *
-   * @param mc True to force a major compaction regardless of thresholds
+   * @param forceMajor True to force a major compaction regardless of thresholds
    * @return row to split around if a split is needed, null otherwise
    * @throws IOException
    */
-  StoreSize compact(final boolean mc) throws IOException {
+  StoreSize compact(final boolean forceMajor) throws IOException {
     boolean forceSplit = this.region.shouldSplit(false);
-    boolean majorcompaction = mc;
+    boolean majorcompaction = forceMajor;
     synchronized (compactLock) {
       this.lastCompactSize = 0;
 
@@ -619,8 +619,8 @@ public class Store implements HeapSize {
         return checkSplit(forceSplit);
       }
 
-      // get store file sizes for incremental compacting selection.
-      /* normal skew:
+      /* get store file sizes for incremental compacting selection.
+       * normal skew:
        *
        *         older ----> newer
        *     _
@@ -631,10 +631,10 @@ public class Store implements HeapSize {
        *    | |  | |  | |  | | | | | |
        *    | |  | |  | |  | | | | | |
        */
-      boolean normalSkew = true;
       int countOfFiles = filesToCompact.size();
       long [] fileSizes = new long[countOfFiles];
-      for (int i = 0; i < countOfFiles; i++) {
+      long [] sumSize = new long[countOfFiles];
+      for (int i = countOfFiles-1; i >= 0; --i) {
         StoreFile file = filesToCompact.get(i);
         Path path = file.getPath();
         if (path == null) {
@@ -647,11 +647,11 @@ public class Store implements HeapSize {
           return null;
         }
         fileSizes[i] = file.getReader().length();
-
-        // normal case: file[i] < file[i-1] unless they unconditionally compact
-        if (normalSkew && i > 0) {
-          normalSkew = fileSizes[i] < Math.max(fileSizes[i-1],minCompactSize);
-        }
+        // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
+        int tooFar = i + this.maxFilesToCompact - 1;
+        sumSize[i] = fileSizes[i]
+                   + ((i+1    < countOfFiles) ? sumSize[i+1]      : 0)
+                   - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
       }
 
       // never run major compaction if we have too many files to avoid OOM
@@ -662,47 +662,30 @@ public class Store implements HeapSize {
       long totalSize = 0;
       if (!majorcompaction && !references) {
         // we're doing a minor compaction, let's see what files are applicable
-        int start = countOfFiles;
-        int end = countOfFiles;
-        if (normalSkew) {
-          // Start at the newest file
-          while (start > 0) {
-            /* Stop when you find a file too big to consider for compaction.
-             * This means it is:
-             *   (1) an already-compacted, large file (i.e. > minCompactSize)
-             *   (2) compactRatio times larger than sum(other_compact_files)
-             * Given normal skew, any older files will also be too large
-             */
-            if (fileSizes[start-1] >
-                Math.max(minCompactSize, (long)(totalSize * compactRatio))) {
-              break;
-            }
-            // Include the tested file
-            --start;
-            totalSize += fileSizes[start];
-
-            // Only allow a certain number of files.
-            if (end - start > this.maxFilesToCompact) {
-              /* If fileSizes.size() >> maxFilesToCompact, we will recurse on
-               * compact().  First consider to oldest files to avoid a
-               * situation where we always compact the last X and the last file
-               * becomes an aggregate of the previous compactions.
-               */
-              --end;
-              totalSize -= fileSizes[end-1];
-            }
-          }
-        } else {
-          // our normal operation skew is off. probably because someone
-          // imported a StoreFile.  unconditionally compact to restore order
-          start = Math.max(end - this.maxFilesToCompact, 0);
-          for (int pos = start; pos < end; ++pos) {
-            totalSize += fileSizes[pos];
-          }
-          LOG.info("StoreFiles in " + this.storeNameStr
-              + " do not follow normal operational skew.  "
-              + "Unconditionally compacting to restore order.");
-        }
+        int start = 0;
+        double r = this.compactRatio;
+
+        /* Start at the oldest file and stop when you find the first file that
+         * meets compaction criteria:
+         *   (1) a recently-flushed, small file (i.e. <= minCompactSize)
+         *      OR
+         *   (2) within the compactRatio of sum(newer_files)
+         * Given normal skew, any newer files will also meet this criteria
+         *
+         * Additional Note:
+         * If fileSizes.size() >> maxFilesToCompact, we will recurse on
+         * compact().  Consider the oldest files first to avoid a
+         * situation where we always compact [end-threshold,end).  Then, the
+         * last file becomes an aggregate of the previous compactions.
+         */
+        while(countOfFiles - start >= this.compactionThreshold &&
+              fileSizes[start] >
+                Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
+          ++start;
+        }
+        int end = Math.min(countOfFiles, start + this.maxFilesToCompact);
+        totalSize = fileSizes[start]
+                  + ((start+1 < countOfFiles) ? sumSize[start+1] : 0);
 
         // if we don't have enough files to compact, just wait
         if (end - start < this.compactionThreshold) {
@@ -856,7 +839,10 @@ public class Store implements HeapSize {
         long keyCount = (r.getBloomFilterType() == family.getBloomFilterType())
             ? r.getFilterEntries() : r.getEntries();
         maxKeyCount += keyCount;
-        LOG.info("Compacting: " + file + "; keyCount = " + keyCount + "; Bloom Type = " + r.getBloomFilterType().toString());
+        LOG.info("Compacting: " + file +
+          "; keyCount = " + keyCount +
+          "; Bloom Type = " + r.getBloomFilterType().toString() +
+          "; Size = " + StringUtils.humanReadableInt(r.length()) );
       }
     }
     LOG.info("Estimated total keyCount for output of compaction = " + maxKeyCount);