You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/08/31 06:20:00 UTC

svn commit: r690634 - in /hadoop/hbase/branches/0.2: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HRegion.java src/java/org/apache/hadoop/hbase/regionserver/HStore.java src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java

Author: stack
Date: Sat Aug 30 21:19:59 2008
New Revision: 690634

URL: http://svn.apache.org/viewvc?rev=690634&view=rev
Log:
HBASE-855 compaction can return less versions then we should in some cases

Modified:
    hadoop/hbase/branches/0.2/CHANGES.txt
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java

Modified: hadoop/hbase/branches/0.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/CHANGES.txt?rev=690634&r1=690633&r2=690634&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.2/CHANGES.txt Sat Aug 30 21:19:59 2008
@@ -47,6 +47,8 @@
                one time (Billy Pearson via Stack)
    HBASE-858   HBase-799 Created an error in util.migration.v5.HColumnDescriptor
                (Jean-Daniel Cryans via Stack)
+   HBASE-855   compaction can return less versions then we should in some cases
+               (Billy Pearson via Stack)
 
   IMPROVEMENTS
    HBASE-801  When a table haven't disable, shell could response in a "user

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=690634&r1=690633&r2=690634&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Sat Aug 30 21:19:59 2008
@@ -870,7 +870,7 @@
     return compactStores(false);
   }
 
-  /**
+  /*
    * Called by compaction thread and after region is opened to compact the
    * HStores if necessary.
    *
@@ -885,7 +885,8 @@
    * @return mid key if split is needed
    * @throws IOException
    */
-  private byte [] compactStores(final boolean majorCompaction) throws IOException {
+  byte [] compactStores(final boolean majorCompaction)
+  throws IOException {
     splitsAndClosesLock.readLock().lock();
     try {
       byte [] midKey = null;

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=690634&r1=690633&r2=690634&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Sat Aug 30 21:19:59 2008
@@ -821,7 +821,8 @@
         // size of the second, skip the largest, and continue to next...,
         // until we meet the compactionThreshold limit.
         for (point = 0; point < compactionThreshold - 1; point++) {
-          if (fileSizes[point] < fileSizes[point + 1] * 2 && maxFilesToCompact < (countOfFiles - point)) {
+          if (fileSizes[point] < fileSizes[point + 1] * 2 &&
+              maxFilesToCompact < (countOfFiles - point)) {
             break;
           }
           skipped += fileSizes[point];
@@ -917,10 +918,10 @@
   private void compact(final MapFile.Writer compactedOut,
       final List<MapFile.Reader> pReaders, final boolean majorCompaction)
   throws IOException {
-    // Reverse order so we newest is first.
+    // Reverse order so newest is first.
     List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
     Collections.reverse(copy);
-    MapFile.Reader[] rdrs = pReaders.toArray(new MapFile.Reader[copy.size()]);
+    MapFile.Reader[] rdrs = copy.toArray(new MapFile.Reader[0]);
     try {
       HStoreKey[] keys = new HStoreKey[rdrs.length];
       ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length];
@@ -947,11 +948,6 @@
       byte [] lastRow = null;
       byte [] lastColumn = null;
       while (numDone < done.length) {
-        // Find the reader with the smallest key.  If two files have same key
-        // but different values -- i.e. one is delete and other is non-delete
-        // value -- we will find the first, the one that was written later and
-        // therefore the one whose value should make it out to the compacted
-        // store file.
         int smallestKey = -1;
         for (int i = 0; i < rdrs.length; i++) {
           if (done[i]) {
@@ -970,7 +966,7 @@
             && Bytes.equals(lastColumn, sk.getColumn())) {
           timesSeen++;
         } else {
-          timesSeen = 0;
+          timesSeen = 1;
         }
 
         // Added majorCompaction here to make sure all versions make it to 
@@ -980,10 +976,13 @@
           // Keep old versions until we have maxVersions worth.
           // Then just skip them.
           if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
-            // Only write out objects which have a non-zero length key and
-            // value
+            // Only write out objects with non-zero length key and value
             if (!isExpired(sk, ttl, now)) {
               compactedOut.append(sk, vals[smallestKey]);
+            } else {
+              // HBASE-855 remove one from timesSeen because it did not make it
+              // past expired check -- don't count against max versions.
+              timesSeen--;
             }
           }
         }

Modified: hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java?rev=690634&r1=690633&r2=690634&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java (original)
+++ hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java Sat Aug 30 21:19:59 2008
@@ -96,8 +96,7 @@
     }
     // Add more content.  Now there are about 5 versions of each column.
     // Default is that there only 3 (MAXVERSIONS) versions allowed per column.
-    // Assert > 3 and then after compaction, assert that only 3 versions
-    // available.
+    // Assert == 3 when we ask for versions.
     addContent(new HRegionIncommon(r), Bytes.toString(COLUMN_FAMILY));
     Cell[] cellValues = 
       r.get(STARTROW, COLUMN_FAMILY_TEXT, 100 /*Too many*/);
@@ -105,49 +104,32 @@
     assertTrue(cellValues.length == 3);
     r.flushcache();
     r.compactStores();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
-    // Now assert that there are 4 versions of a record only: thats the
-    // 3 versions that should be in the compacted store and then the one more
-    // we added when we flushed. But could be 3 only if the flush happened
-    // before the compaction started though we tried to have the threads run
-    // concurrently (On hudson this happens).
     byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
     // Increment the least significant character so we get to next row.
     secondRowBytes[START_KEY_BYTES.length - 1]++;
     cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, 100/*Too many*/);
     LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " + cellValues.length);
-    // Commented out because fails on an hp+ubuntu single-processor w/ 1G and
-    // "Intel(R) Pentium(R) 4 CPU 3.20GHz" though passes on all local
-    // machines and even on hudson.  On said machine, its reporting in the
-    // LOG line above that there are 3 items in row so it should pass the
-    // below test.
-    assertTrue(cellValues.length == 3 || cellValues.length == 4);
+    // Always 3 version if that is what max versions is.
+    assertTrue(cellValues.length == 3);
 
     // Now add deletes to memcache and then flush it.  That will put us over
     // the compaction threshold of 3 store files.  Compacting these store files
     // should result in a compacted store file that has no references to the
     // deleted row.
     r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis());
-    // Now, before compacting, remove all instances of the first row so can
-    // verify that it is removed as we compact.
     // Assert all delted.
     assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, 100 /*Too many*/));
     r.flushcache();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
     assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, 100 /*Too many*/));
-    // Add a bit of data and flush it so we for sure have the compaction limit
-    // for store files.  Usually by this time we will have but if compaction
-    // included the flush that ran 'concurrently', there may be just the
-    // compacted store and the flush above when we added deletes.  Add more
-    // content to be certain.
+    // Add a bit of data and flush. Start adding at 'aaa'
     createSmallerStoreFile(this.r);
     r.flushcache();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 3);
-    r.compactStores();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
     // Assert that the first row is still deleted.
-    cellValues = r.get(STARTROW, COLUMN_FAMILY_TEXT, 100 /*Too many*/);
-    assertNull(cellValues);
+    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    // Force major compaction.
+    r.compactStores(true);
+    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
+    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
     // Make sure the store files do have some 'aaa' keys in them.
     boolean containsStartRow = false;
     for (MapFile.Reader reader: this.r.stores.