You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/11/18 00:08:56 UTC
svn commit: r718430 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HStore.java src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java

Author: stack
Date: Mon Nov 17 15:08:56 2008
New Revision: 718430

URL: http://svn.apache.org/viewvc?rev=718430&view=rev
Log:
HBASE-947 [Optimization] Major compaction should remove deletes as well as the deleted cell

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=718430&r1=718429&r2=718430&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Nov 17 15:08:56 2008
@@ -124,6 +124,8 @@
    HBASE-999   Up versions on historian and keep history of deleted regions for a
                while rather than delete immediately
    HBASE-938   Major compaction period is not checked periodically
+   HBASE-947   [Optimization] Major compaction should remove deletes as well as
+               the deleted cell
         
   NEW FEATURES
    HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=718430&r1=718429&r2=718430&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Mon Nov 17 15:08:56 2008
@@ -435,7 +435,6 @@
       curfile = new HStoreFile(conf, fs, basedir, this.info,
         family.getName(), fid, reference);
       long storeSeqId = -1;
-      boolean majorCompaction = false;
       try {
         storeSeqId = curfile.loadInfo(fs);
         if (storeSeqId > this.maxSeqId) {
@@ -1044,11 +1043,45 @@
   }
   
   /*
+   * @param r List to reverse
+   * @return A reversed array of content of <code>readers</code>
+   */
+  private MapFile.Reader [] reverse(final List<MapFile.Reader> r) {
+    List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(r);
+    Collections.reverse(copy);
+    return copy.toArray(new MapFile.Reader[0]);
+  }
+
+  /*
+   * @param rdrs List of readers
+   * @param keys Current keys
+   * @param done Which readers are done
+   * @return The lowest current key in passed <code>rdrs</code>
+   */
+  private int getLowestKey(final MapFile.Reader [] rdrs,
+      final HStoreKey [] keys, final boolean [] done) {
+    int lowestKey = -1;
+    for (int i = 0; i < rdrs.length; i++) {
+      if (done[i]) {
+        continue;
+      }
+      if (lowestKey < 0) {
+        lowestKey = i;
+      } else {
+        if (keys[i].compareTo(keys[lowestKey]) < 0) {
+          lowestKey = i;
+        }
+      }
+    }
+    return lowestKey;
+  }
+
+  /*
    * Compact a list of MapFile.Readers into MapFile.Writer.
    * 
-   * We work by iterating through the readers in parallel. We always increment
-   * the lowest-ranked one. Updates to a single row/column will appear ranked
-   * by timestamp.
+   * We work by iterating through the readers in parallel looking at newest
+   * store file first. We always increment the lowest-ranked one. Updates to a
+   * single row/column will appear ranked by timestamp.
    * @param compactedOut Where to write compaction.
    * @param pReaders List of readers sorted oldest to newest.
    * @param majorCompaction True to force a major compaction regardless of
@@ -1058,14 +1091,12 @@
   private void compact(final MapFile.Writer compactedOut,
       final List<MapFile.Reader> pReaders, final boolean majorCompaction)
   throws IOException {
-    // Reverse order so newest is first.
-    List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
-    Collections.reverse(copy);
-    MapFile.Reader[] rdrs = copy.toArray(new MapFile.Reader[0]);
+    // Reverse order so newest store file is first.
+    MapFile.Reader[] rdrs = reverse(pReaders);
     try {
-      HStoreKey[] keys = new HStoreKey[rdrs.length];
-      ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length];
-      boolean[] done = new boolean[rdrs.length];
+      HStoreKey [] keys = new HStoreKey[rdrs.length];
+      ImmutableBytesWritable [] vals = new ImmutableBytesWritable[rdrs.length];
+      boolean [] done = new boolean[rdrs.length];
       for(int i = 0; i < rdrs.length; i++) {
         keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY, this.info);
         vals[i] = new ImmutableBytesWritable();
@@ -1085,56 +1116,67 @@
 
       long now = System.currentTimeMillis();
       int timesSeen = 0;
-      byte [] lastRow = null;
-      byte [] lastColumn = null;
+      HStoreKey lastSeen = new HStoreKey();
+      HStoreKey lastDelete = null;
       while (numDone < done.length) {
-        int smallestKey = -1;
-        for (int i = 0; i < rdrs.length; i++) {
-          if (done[i]) {
-            continue;
-          }
-          if (smallestKey < 0) {
-            smallestKey = i;
-          } else {
-            if (keys[i].compareTo(keys[smallestKey]) < 0) {
-              smallestKey = i;
-            }
-          }
-        }
-        HStoreKey sk = keys[smallestKey];
-        if (HStoreKey.equalsTwoRowKeys(info,lastRow, sk.getRow())
-            && Bytes.equals(lastColumn, sk.getColumn())) {
+        // Get lowest key in all store files.
+        int lowestKey = getLowestKey(rdrs, keys, done);
+        HStoreKey sk = keys[lowestKey];
+        // If its same row and column as last key, increment times seen.
+        if (HStoreKey.equalsTwoRowKeys(info, lastSeen.getRow(), sk.getRow())
+            && Bytes.equals(lastSeen.getColumn(), sk.getColumn())) {
           timesSeen++;
+          // Reset last delete if not exact timestamp -- lastDelete only stops
+          // exactly the same key making it out to the compacted store file.
+          if (lastDelete != null &&
+              lastDelete.getTimestamp() != sk.getTimestamp()) {
+            lastDelete = null;
+          }
         } else {
           timesSeen = 1;
+          lastDelete = null;
         }
 
         // Don't write empty rows or columns.  Only remove cells on major
         // compaction.  Remove if expired of > VERSIONS
         if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
-          boolean expired = false;
-          if (!majorCompaction ||
-              (timesSeen <= family.getMaxVersions() &&
-                !(expired = isExpired(sk, ttl, now)))) {
-              compactedOut.append(sk, vals[smallestKey]);
-          }
-          if (expired) {
-            // HBASE-855 remove one from timesSeen because it did not make it
-            // past expired check -- don't count against max versions.
-            timesSeen--;
+          ImmutableBytesWritable value = vals[lowestKey];
+          if (!majorCompaction) {
+            // Write out all values if not a major compaction.
+            compactedOut.append(sk, value);
+          } else {
+            boolean expired = false;
+            boolean deleted = false;
+            if (timesSeen <= family.getMaxVersions() &&
+                !(expired = isExpired(sk, ttl, now))) {
+              // If this value key is same as a deleted key, skip
+              if (lastDelete != null && sk.equals(lastDelete)) {
+                deleted = true;
+              } else if (HLogEdit.isDeleted(value.get())) {
+                // If a deleted value, skip
+                deleted = true;
+                lastDelete = new HStoreKey(sk);
+              } else {
+                compactedOut.append(sk, vals[lowestKey]);
+              }
+            }
+            if (expired || deleted) {
+              // HBASE-855 remove one from timesSeen because it did not make it
+              // past expired check -- don't count against max versions.
+              timesSeen--;
+            }
           }
         }
 
         // Update last-seen items
-        lastRow = sk.getRow();
-        lastColumn = sk.getColumn();
+        lastSeen = new HStoreKey(sk);
 
         // Advance the smallest key.  If that reader's all finished, then 
         // mark it as done.
-        if (!rdrs[smallestKey].next(keys[smallestKey], vals[smallestKey])) {
-          done[smallestKey] = true;
-          rdrs[smallestKey].close();
-          rdrs[smallestKey] = null;
+        if (!rdrs[lowestKey].next(keys[lowestKey], vals[lowestKey])) {
+          done[lowestKey] = true;
+          rdrs[lowestKey].close();
+          rdrs[lowestKey] = null;
           numDone++;
         }
       }

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java?rev=718430&r1=718429&r2=718430&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java Mon Nov 17 15:08:56 2008
@@ -103,34 +103,39 @@
     assertTrue(cellValues.length == 3);
     r.flushcache();
     r.compactStores();
-    // Always 3 version if that is what max versions is.
+    // Always 3 versions if that is what max versions is.
     byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
     // Increment the least significant character so we get to next row.
     secondRowBytes[START_KEY_BYTES.length - 1]++;
     cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100/*Too many*/);
-    LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " + cellValues.length);
+    LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " +
+      cellValues.length);
     assertTrue(cellValues.length == 3);
 
     // Now add deletes to memcache and then flush it.  That will put us over
     // the compaction threshold of 3 store files.  Compacting these store files
     // should result in a compacted store file that has no references to the
     // deleted row.
-    r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(), null);
+    r.deleteAll(secondRowBytes, COLUMN_FAMILY_TEXT, System.currentTimeMillis(),
+      null);
     // Assert deleted.
-    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
     r.flushcache();
-    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
     // Add a bit of data and flush.  Start adding at 'bbb'.
     createSmallerStoreFile(this.r);
     r.flushcache();
-    // Assert that the first row is still deleted.
-    cellValues = r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
-    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    // Assert that the second row is still deleted.
+    cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
+    assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
     // Force major compaction.
     r.compactStores(true);
     assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
-    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
-    // Make sure the store files do have some 'aaa' keys in them.
+    assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    // Make sure the store files do have some 'aaa' keys in them -- exactly 3.
+    // Also, that compacted store files do not have any secondRowBytes because
+    // they were deleted.
+    int count = 0;
     boolean containsStartRow = false;
     for (MapFile.Reader reader: this.r.stores.
         get(Bytes.mapKey(COLUMN_FAMILY_TEXT_MINUS_COLON)).getReaders()) {
@@ -140,14 +145,16 @@
       while(reader.next(key, val)) {
         if (Bytes.equals(key.getRow(), STARTROW)) {
           containsStartRow = true;
-          break;
+          count++;
+        } else {
+          // After major compaction, should be none of these rows in compacted
+          // file.
+          assertFalse(Bytes.equals(key.getRow(), secondRowBytes));
         }
       }
-      if (containsStartRow) {
-        break;
-      }
     }
     assertTrue(containsStartRow);
+    assertTrue(count == 3);
     // Do a simple TTL test.
     final int ttlInSeconds = 1;
     for (HStore store: this.r.stores.values()) {
@@ -155,6 +162,11 @@
     }
     Thread.sleep(ttlInSeconds * 1000);
     r.compactStores(true);
+    count = count();
+    assertTrue(count == 0);
+  }
+  
+  private int count() throws IOException {
     int count = 0;
     for (MapFile.Reader reader: this.r.stores.
         get(Bytes.mapKey(COLUMN_FAMILY_TEXT_MINUS_COLON)).getReaders()) {
@@ -165,7 +177,7 @@
         count++;
       }
     }
-    assertTrue(count == 0);
+    return count;
   }
 
   private void createStoreFile(final HRegion region) throws IOException {