You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ec...@apache.org on 2013/06/10 20:04:28 UTC

svn commit: r1491546 - /hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java

Author: eclark
Date: Mon Jun 10 18:04:28 2013
New Revision: 1491546

URL: http://svn.apache.org/r1491546
Log:
HBASE-8283 Backport HBASE-7842 Add compaction policy that explores more storefile groups to 0.94

Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1491546&r1=1491545&r2=1491546&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Mon Jun 10 18:04:28 2013
@@ -1553,9 +1553,6 @@ public class Store extends SchemaConfigu
 
     if (!majorcompaction &&
         !hasReferences(compactSelection.getFilesToCompact())) {
-      // we're doing a minor compaction, let's see what files are applicable
-      int start = 0;
-      double r = compactSelection.getCompactSelectionRatio();
 
       // remove bulk import files that request to be excluded from minors
       compactSelection.getFilesToCompact().removeAll(Collections2.filter(
@@ -1576,25 +1573,48 @@ public class Store extends SchemaConfigu
         compactSelection.emptyFileList();
         return compactSelection;
       }
-
-      /* TODO: add sorting + unit test back in when HBASE-2856 is fixed
-      // Sort files by size to correct when normal skew is altered by bulk load.
-      Collections.sort(filesToCompact, StoreFile.Comparators.FILE_SIZE);
-       */
-
-      // get store file sizes for incremental compacting selection.
-      int countOfFiles = compactSelection.getFilesToCompact().size();
-      long [] fileSizes = new long[countOfFiles];
-      long [] sumSize = new long[countOfFiles];
-      for (int i = countOfFiles-1; i >= 0; --i) {
-        StoreFile file = compactSelection.getFilesToCompact().get(i);
-        fileSizes[i] = file.getReader().length();
-        // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
-        int tooFar = i + this.maxFilesToCompact - 1;
-        sumSize[i] = fileSizes[i]
-                   + ((i+1    < countOfFiles) ? sumSize[i+1]      : 0)
-                   - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
+      if (conf.getBoolean("hbase.hstore.useExploringCompation", false)) {
+        compactSelection = exploringCompactionSelection(compactSelection);
+      } else {
+        compactSelection = defaultCompactionSelection(compactSelection);
       }
+    } else {
+      if(majorcompaction) {
+        if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) {
+          LOG.debug("Warning, compacting more than " + this.maxFilesToCompact +
+            " files, probably because of a user-requested major compaction");
+          if(priority != PRIORITY_USER) {
+            LOG.error("Compacting more than max files on a non user-requested compaction");
+          }
+        }
+      } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) {
+        // all files included in this compaction, up to max
+        int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact;
+        compactSelection.getFilesToCompact().subList(0, pastMax).clear();
+      }
+    }
+    return compactSelection;
+  }
+
+  private CompactSelection defaultCompactionSelection(CompactSelection compactSelection) {
+    // we're doing a minor compaction, let's see what files are applicable
+    int start = 0;
+
+    double r = compactSelection.getCompactSelectionRatio();
+
+    // get store file sizes for incremental compacting selection.
+    int countOfFiles = compactSelection.getFilesToCompact().size();
+    long [] fileSizes = new long[countOfFiles];
+    long [] sumSize = new long[countOfFiles];
+    for (int i = countOfFiles-1; i >= 0; --i) {
+      StoreFile file = compactSelection.getFilesToCompact().get(i);
+      fileSizes[i] = file.getReader().length();
+      // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
+      int tooFar = i + this.maxFilesToCompact - 1;
+      sumSize[i] = fileSizes[i]
+          + ((i+1    < countOfFiles) ? sumSize[i+1]      : 0)
+          - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
+    }
 
       /* Start at the oldest file and stop when you find the first file that
        * meets compaction criteria:
@@ -1609,43 +1629,143 @@ public class Store extends SchemaConfigu
        * situation where we always compact [end-threshold,end).  Then, the
        * last file becomes an aggregate of the previous compactions.
        */
-      while(countOfFiles - start >= this.minFilesToCompact &&
-            fileSizes[start] >
-              Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
-        ++start;
-      }
-      int end = Math.min(countOfFiles, start + this.maxFilesToCompact);
-      long totalSize = fileSizes[start]
-                     + ((start+1 < countOfFiles) ? sumSize[start+1] : 0);
-      compactSelection = compactSelection.getSubList(start, end);
-
-      // if we don't have enough files to compact, just wait
-      if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipped compaction of " + this
+    while(countOfFiles - start >= this.minFilesToCompact &&
+        fileSizes[start] >
+            Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
+      ++start;
+    }
+    int end = Math.min(countOfFiles, start + this.maxFilesToCompact);
+    long totalSize = fileSizes[start]
+        + ((start+1 < countOfFiles) ? sumSize[start+1] : 0);
+    compactSelection = compactSelection.getSubList(start, end);
+
+    // if we don't have enough files to compact, just wait
+    if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Skipped compaction of " + this
             + ".  Only " + (end - start) + " file(s) of size "
             + StringUtils.humanReadableInt(totalSize)
             + " have met compaction criteria.");
-        }
-        compactSelection.emptyFileList();
-        return compactSelection;
       }
-    } else {
-      if(majorcompaction) {
-        if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) {
-          LOG.debug("Warning, compacting more than " + this.maxFilesToCompact +
-            " files, probably because of a user-requested major compaction");
-          if(priority != PRIORITY_USER) {
-            LOG.error("Compacting more than max files on a non user-requested compaction");
-          }
+      compactSelection.emptyFileList();
+      return compactSelection;
+    }
+    return compactSelection;
+  }
+
+  private CompactSelection exploringCompactionSelection(CompactSelection compactSelection) {
+
+    List<StoreFile> candidates = compactSelection.getFilesToCompact();
+    int futureFiles = filesCompacting.isEmpty() ? 0 : 1;
+    boolean mayBeStuck = (candidates.size() - filesCompacting.size() + futureFiles)
+        >= blockingStoreFileCount;
+    // Start off choosing nothing.
+    List<StoreFile> bestSelection = new ArrayList<StoreFile>(0);
+    List<StoreFile> smallest = new ArrayList<StoreFile>(0);
+    long bestSize = 0;
+    long smallestSize = Long.MAX_VALUE;
+    double r = compactSelection.getCompactSelectionRatio();
+
+    // Consider every starting place.
+    for (int startIndex = 0; startIndex < candidates.size(); startIndex++) {
+      // Consider every different sub list permutation in between start and end with min files.
+      for (int currentEnd = startIndex + minFilesToCompact - 1;
+           currentEnd < candidates.size(); currentEnd++) {
+        List<StoreFile> potentialMatchFiles = candidates.subList(startIndex, currentEnd + 1);
+
+        // Sanity checks
+        if (potentialMatchFiles.size() < minFilesToCompact) {
+          continue;
+        }
+        if (potentialMatchFiles.size() > maxFilesToCompact) {
+          continue;
+        }
+
+        // Compute the total size of files that will
+        // have to be read if this set of files is compacted.
+        long size = getCompactionSize(potentialMatchFiles);
+
+        // Store the smallest set of files.  This stored set of files will be used
+        // if it looks like the algorithm is stuck.
+        if (size < smallestSize) {
+          smallest = potentialMatchFiles;
+          smallestSize = size;
+        }
+
+        if (size >= minCompactSize
+            && !filesInRatio(potentialMatchFiles, r)) {
+          continue;
+        }
+
+        if (size > maxCompactSize) {
+          continue;
+        }
+
+        // Keep if this gets rid of more files.  Or the same number of files for less io.
+        if (potentialMatchFiles.size() > bestSelection.size()
+            || (potentialMatchFiles.size() == bestSelection.size() && size < bestSize)) {
+          bestSelection = potentialMatchFiles;
+          bestSize = size;
         }
-      } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) {
-        // all files included in this compaction, up to max
-        int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact;
-        compactSelection.getFilesToCompact().subList(0, pastMax).clear();
       }
     }
+
+    if (bestSelection.size() == 0 && mayBeStuck) {
+      smallest = new ArrayList<StoreFile>(smallest);
+      compactSelection.getFilesToCompact().clear();
+      compactSelection.getFilesToCompact().addAll(smallest);
+    } else {
+      bestSelection = new ArrayList<StoreFile>(bestSelection);
+      compactSelection.getFilesToCompact().clear();
+      compactSelection.getFilesToCompact().addAll(bestSelection);
+    }
+
     return compactSelection;
+
+  }
+
+  /**
+   * Check that all files satisfy the ratio
+   *
+   * @param files set of files to examine.
+   * @param currentRatio The raio
+   * @return if all files are in ratio.
+   */
+  private boolean filesInRatio(final List<StoreFile> files, final double currentRatio) {
+    if (files.size() < 2) {
+      return true;
+    }
+    long totalFileSize = 0;
+    for (int i = 0; i < files.size(); i++) {
+      totalFileSize += files.get(i).getReader().length();
+    }
+    for (int i = 0; i < files.size(); i++) {
+      long singleFileSize = files.get(i).getReader().length();
+      long sumAllOtherFilesize = totalFileSize - singleFileSize;
+
+      if ((singleFileSize > sumAllOtherFilesize * currentRatio)
+          && (sumAllOtherFilesize >= this.minCompactSize)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Get the number of bytes a proposed compaction would have to read.
+   *
+   * @param files Set of files in a proposed compaction.
+   * @return size in bytes.
+   */
+  private long getCompactionSize(final List<StoreFile> files) {
+    long size = 0;
+    if (files == null) {
+      return size;
+    }
+    for (StoreFile f : files) {
+      size += f.getReader().length();
+    }
+    return size;
   }
 
   /**