You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by br...@apache.org on 2008/06/18 18:28:24 UTC

svn commit: r669211 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HStore.java src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java

Author: bryanduxbury
Date: Wed Jun 18 09:28:23 2008
New Revision: 669211

URL: http://svn.apache.org/viewvc?rev=669211&view=rev
Log:
HBASE-694 HStore.rowAtOrBeforeFromMapFile() fails to locate the row if # of mapfiles >= 2
-Added new test to TestGet2 to highlight multi-storefile getClosestBefore issue
-Removed erroneous return that caused 2nd and subsequent mapfiles to be skipped
-Split HStore#rowKeyAtOrBeforeFromMapfile into two sub-methods for readability

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=669211&r1=669210&r2=669211&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Wed Jun 18 09:28:23 2008
@@ -55,6 +55,8 @@
    HBASE-686   MemcacheScanner didn't return the first row(if it exists),
                because HScannerInterface's output incorrect (LN via Jim Kellerman)
    HBASE-691   get* and getScanner are different in how they treat column parameter
+   HBASE-694   HStore.rowAtOrBeforeFromMapFile() fails to locate the row if # of mapfiles >= 2
+               (Rong-En Fan via Bryan)
    
   IMPROVEMENTS
    HBASE-559   MR example job to count table rows

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=669211&r1=669210&r2=669211&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Wed Jun 18 09:28:23 2008
@@ -1547,7 +1547,7 @@
     HStoreKey searchKey = null;
     ImmutableBytesWritable readval = new ImmutableBytesWritable();
     HStoreKey readkey = new HStoreKey();
-    HStoreKey strippedKey = null;
+
     synchronized(map) {
       // don't bother with the rest of this if the file is empty
       map.reset();
@@ -1556,171 +1556,194 @@
       }
 
       long now = System.currentTimeMillis();
+      
       // if there aren't any candidate keys yet, we'll do some things slightly
       // different 
       if (candidateKeys.isEmpty()) {
-        searchKey = new HStoreKey(row);
-        
-        // if the row we're looking for is past the end of this mapfile, just
-        // save time and add the last key to the candidates.
-        HStoreKey finalKey = new HStoreKey(); 
-        map.finalKey(finalKey);
-        if (Bytes.compareTo(finalKey.getRow(), row) < 0) {
-          candidateKeys.put(stripTimestamp(finalKey), 
-            new Long(finalKey.getTimestamp()));
-          return;
-        }
-        
-        // seek to the exact row, or the one that would be immediately before it
-        readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
-      
-        if (readkey == null) {
-          // didn't find anything that would match, so return
-          return;
-        }
-      
-        do {
-          // if we have an exact match on row, and it's not a delete, save this
-          // as a candidate key
-          if (Bytes.equals(readkey.getRow(), row)) {
-            if (!HLogEdit.isDeleted(readval.get())) {
-              if (ttl == HConstants.FOREVER || 
-                    now < readkey.getTimestamp() + ttl) {
-                candidateKeys.put(stripTimestamp(readkey), 
-                  new Long(readkey.getTimestamp()));
-              } else {
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("rowAtOrBeforeFromMapFile:" + readkey +
-                    ": expired, skipped");
-                }
-              }
-            }
-          } else if (Bytes.compareTo(readkey.getRow(), row) > 0 ) {
-            // if the row key we just read is beyond the key we're searching for,
-            // then we're done. return.
-            return;
+        rowKeyFromMapFileEmptyKeys(map, row, candidateKeys, now);
+      } else {
+        rowKeyAtOrBeforeExistingCandKeys(map, row, candidateKeys, now);
+      }
+    }
+  }
+  
+  private void rowKeyFromMapFileEmptyKeys(MapFile.Reader map, byte[] row, 
+    SortedMap<HStoreKey, Long> candidateKeys, long now) 
+  throws IOException {
+    
+    HStoreKey searchKey = new HStoreKey(row);
+    ImmutableBytesWritable readval = new ImmutableBytesWritable();
+    HStoreKey readkey = new HStoreKey();
+    
+    // if the row we're looking for is past the end of this mapfile, just
+    // save time and add the last key to the candidates.
+    HStoreKey finalKey = new HStoreKey(); 
+    map.finalKey(finalKey);
+    if (Bytes.compareTo(finalKey.getRow(), row) < 0) {
+      candidateKeys.put(stripTimestamp(finalKey), 
+        new Long(finalKey.getTimestamp()));
+      return;
+    }
+    
+    // seek to the exact row, or the one that would be immediately before it
+    readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
+  
+    if (readkey == null) {
+      // didn't find anything that would match, so return
+      return;
+    }
+  
+    do {
+      // if we have an exact match on row, and it's not a delete, save this
+      // as a candidate key
+      if (Bytes.equals(readkey.getRow(), row)) {
+        if (!HLogEdit.isDeleted(readval.get())) {
+          if (ttl == HConstants.FOREVER || 
+                now < readkey.getTimestamp() + ttl) {
+            candidateKeys.put(stripTimestamp(readkey), 
+              new Long(readkey.getTimestamp()));
           } else {
-            // so, the row key doesn't match, but we haven't gone past the row
-            // we're seeking yet, so this row is a candidate for closest
-            // (assuming that it isn't a delete).
-            if (!HLogEdit.isDeleted(readval.get())) {
-              if (ttl == HConstants.FOREVER || 
-                      now < readkey.getTimestamp() + ttl) {
-                candidateKeys.put(stripTimestamp(readkey), 
-                  new Long(readkey.getTimestamp()));
-              } else {
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("rowAtOrBeforeFromMapFile:" + readkey +
-                    ": expired, skipped");
-                }
-              }
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("rowAtOrBeforeFromMapFile:" + readkey +
+                ": expired, skipped");
             }
-          }        
-        } while(map.next(readkey, readval));
-      
-        // arriving here just means that we consumed the whole rest of the map
-        // without going "past" the key we're searching for. we can just fall
-        // through here.
+          }
+        }
+      } else if (Bytes.compareTo(readkey.getRow(), row) > 0 ) {
+        // if the row key we just read is beyond the key we're searching for,
+        // then we're done. return.
+        return;
       } else {
-        // if there are already candidate keys, we need to start our search 
-        // at the earliest possible key so that we can discover any possible
-        // deletes for keys between the start and the search key.
-        searchKey = new HStoreKey(candidateKeys.firstKey().getRow());
-              
-        // if the row we're looking for is past the end of this mapfile, just
-        // save time and add the last key to the candidates.
-        HStoreKey finalKey = new HStoreKey(); 
-        map.finalKey(finalKey);
-        if (Bytes.compareTo(finalKey.getRow(), searchKey.getRow()) < 0) {
-          strippedKey = stripTimestamp(finalKey);
-          
-          // if the candidate keys has a cell like this one already,
-          // then we might want to update the timestamp we're using on it
-          if (candidateKeys.containsKey(strippedKey)) {
-            long bestCandidateTs = 
-              candidateKeys.get(strippedKey).longValue();
-            if (bestCandidateTs < finalKey.getTimestamp()) {
-              candidateKeys.put(strippedKey, new Long(finalKey.getTimestamp()));
-            } 
+        // so, the row key doesn't match, but we haven't gone past the row
+        // we're seeking yet, so this row is a candidate for closest
+        // (assuming that it isn't a delete).
+        if (!HLogEdit.isDeleted(readval.get())) {
+          if (ttl == HConstants.FOREVER || 
+                  now < readkey.getTimestamp() + ttl) {
+            candidateKeys.put(stripTimestamp(readkey), 
+              new Long(readkey.getTimestamp()));
           } else {
-            // otherwise, this is a new key, so put it up as a candidate
-            candidateKeys.put(strippedKey, new Long(finalKey.getTimestamp()));
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("rowAtOrBeforeFromMapFile:" + readkey +
+                ": expired, skipped");
+            }
           }
         }
-        return;
-      }
-      
-      // seek to the exact row, or the one that would be immediately before it
-      readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
-      
-      if (readkey == null) {
-        // didn't find anything that would match, so return
-        return;
+      }        
+    } while(map.next(readkey, readval));
+  
+    // arriving here just means that we consumed the whole rest of the map
+    // without going "past" the key we're searching for. we can just fall
+    // through here.
+  }
+  
+  
+  private void rowKeyAtOrBeforeExistingCandKeys(MapFile.Reader map, byte[] row,
+    SortedMap<HStoreKey, Long> candidateKeys, long now) 
+  throws IOException {
+    
+    HStoreKey strippedKey = null;
+    ImmutableBytesWritable readval = new ImmutableBytesWritable();
+    HStoreKey readkey = new HStoreKey();
+
+
+    // if there are already candidate keys, we need to start our search 
+    // at the earliest possible key so that we can discover any possible
+    // deletes for keys between the start and the search key.
+    HStoreKey searchKey = new HStoreKey(candidateKeys.firstKey().getRow());
+
+    // if the row we're looking for is past the end of this mapfile, just
+    // save time and add the last key to the candidates.
+    HStoreKey finalKey = new HStoreKey(); 
+    map.finalKey(finalKey);
+    if (Bytes.compareTo(finalKey.getRow(), searchKey.getRow()) < 0) {
+      strippedKey = stripTimestamp(finalKey);
+
+      // if the candidate keys has a cell like this one already,
+      // then we might want to update the timestamp we're using on it
+      if (candidateKeys.containsKey(strippedKey)) {
+        long bestCandidateTs = 
+          candidateKeys.get(strippedKey).longValue();
+        if (bestCandidateTs < finalKey.getTimestamp()) {
+          candidateKeys.put(strippedKey, new Long(finalKey.getTimestamp()));
+        } 
+      } else {
+        // otherwise, this is a new key, so put it up as a candidate
+        candidateKeys.put(strippedKey, new Long(finalKey.getTimestamp()));
       }
-      
-      do {
-        // if we have an exact match on row, and it's not a delete, save this
-        // as a candidate key
-        if (Bytes.equals(readkey.getRow(), row)) {
-          strippedKey = stripTimestamp(readkey);
-          if (!HLogEdit.isDeleted(readval.get())) {
-            if (ttl == HConstants.FOREVER || 
-                    now < readkey.getTimestamp() + ttl) {
-              candidateKeys.put(strippedKey,
+      return;
+    }
+
+    // seek to the exact row, or the one that would be immediately before it
+    readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
+
+    if (readkey == null) {
+      // didn't find anything that would match, so return
+      return;
+    }
+
+    do {
+      // if we have an exact match on row, and it's not a delete, save this
+      // as a candidate key
+      if (Bytes.equals(readkey.getRow(), row)) {
+        strippedKey = stripTimestamp(readkey);
+        if (!HLogEdit.isDeleted(readval.get())) {
+          if (ttl == HConstants.FOREVER || 
+              now < readkey.getTimestamp() + ttl) {
+            candidateKeys.put(strippedKey,
                 new Long(readkey.getTimestamp()));
-            } else {
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("rowAtOrBeforeFromMapFile: " + readkey +
-                  ": expired, skipped");
-              }
-            }
           } else {
-            // if the candidate keys contain any that might match by timestamp,
-            // then check for a match and remove it if it's too young to 
-            // survive the delete 
-            if (candidateKeys.containsKey(strippedKey)) {
-              long bestCandidateTs =
-                candidateKeys.get(strippedKey).longValue();
-              if (bestCandidateTs <= readkey.getTimestamp()) {
-                candidateKeys.remove(strippedKey);
-              } 
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("rowAtOrBeforeFromMapFile: " + readkey +
+                  ": expired, skipped");
             }
           }
-        } else if (Bytes.compareTo(readkey.getRow(), row) > 0 ) {
-          // if the row key we just read is beyond the key we're searching for,
-          // then we're done. return.
-          return;
         } else {
-          strippedKey = stripTimestamp(readkey);
-          
-          // so, the row key doesn't match, but we haven't gone past the row
-          // we're seeking yet, so this row is a candidate for closest 
-          // (assuming that it isn't a delete).
-          if (!HLogEdit.isDeleted(readval.get())) {
-            if (ttl == HConstants.FOREVER || 
-                    now < readkey.getTimestamp() + ttl) {
-              candidateKeys.put(strippedKey, readkey.getTimestamp());
-            } else {
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("rowAtOrBeforeFromMapFile: " + readkey +
-                  ": expired, skipped");
-              }
-            }
+          // if the candidate keys contain any that might match by timestamp,
+          // then check for a match and remove it if it's too young to 
+          // survive the delete 
+          if (candidateKeys.containsKey(strippedKey)) {
+            long bestCandidateTs =
+              candidateKeys.get(strippedKey).longValue();
+            if (bestCandidateTs <= readkey.getTimestamp()) {
+              candidateKeys.remove(strippedKey);
+            } 
+          }
+        }
+      } else if (Bytes.compareTo(readkey.getRow(), row) > 0 ) {
+        // if the row key we just read is beyond the key we're searching for,
+        // then we're done. return.
+        return;
+      } else {
+        strippedKey = stripTimestamp(readkey);
+
+        // so, the row key doesn't match, but we haven't gone past the row
+        // we're seeking yet, so this row is a candidate for closest 
+        // (assuming that it isn't a delete).
+        if (!HLogEdit.isDeleted(readval.get())) {
+          if (ttl == HConstants.FOREVER || 
+              now < readkey.getTimestamp() + ttl) {
+            candidateKeys.put(strippedKey, readkey.getTimestamp());
           } else {
-            // if the candidate keys contain any that might match by timestamp,
-            // then check for a match and remove it if it's too young to 
-            // survive the delete 
-            if (candidateKeys.containsKey(strippedKey)) {
-              long bestCandidateTs = 
-                candidateKeys.get(strippedKey).longValue();
-              if (bestCandidateTs <= readkey.getTimestamp()) {
-                candidateKeys.remove(strippedKey);
-              } 
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("rowAtOrBeforeFromMapFile: " + readkey +
+                  ": expired, skipped");
             }
-          }      
-        }
-      } while(map.next(readkey, readval));
-    }
+          }
+        } else {
+          // if the candidate keys contain any that might match by timestamp,
+          // then check for a match and remove it if it's too young to 
+          // survive the delete 
+          if (candidateKeys.containsKey(strippedKey)) {
+            long bestCandidateTs = 
+              candidateKeys.get(strippedKey).longValue();
+            if (bestCandidateTs <= readkey.getTimestamp()) {
+              candidateKeys.remove(strippedKey);
+            } 
+          }
+        }      
+      }
+    } while(map.next(readkey, readval));    
   }
   
   static HStoreKey stripTimestamp(HStoreKey key) {
@@ -1889,4 +1912,4 @@
       return copy;
     }
   }
-}
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java?rev=669211&r1=669210&r2=669211&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java Wed Jun 18 09:28:23 2008
@@ -250,6 +250,71 @@
     }
   }
 
+  /** For HBASE-694 */
+  public void testGetClosestRowBefore2() throws IOException{
+
+    HRegion region = null;
+    BatchUpdate batchUpdate = null;
+    
+    try {
+      HTableDescriptor htd = createTableDescriptor(getName());
+      region = createNewHRegion(htd, null, null);
+     
+      // set up some test data
+      String t10 = "010";
+      String t20 = "020";
+      String t30 = "030";
+      String t40 = "040";
+      
+      batchUpdate = new BatchUpdate(t10);
+      batchUpdate.put(COLUMNS[0], "t10 bytes".getBytes());
+      region.batchUpdate(batchUpdate);
+      
+      batchUpdate = new BatchUpdate(t30);
+      batchUpdate.put(COLUMNS[0], "t30 bytes".getBytes());
+      region.batchUpdate(batchUpdate);
+      
+      batchUpdate = new BatchUpdate(t40);
+      batchUpdate.put(COLUMNS[0], "t40 bytes".getBytes());
+      region.batchUpdate(batchUpdate);
+
+      // try finding "035"
+      String t35 = "035";
+      Map<byte [], Cell> results = 
+        region.getClosestRowBefore(Bytes.toBytes(t35));
+      assertEquals(new String(results.get(COLUMNS[0]).getValue()), "t30 bytes");
+
+      region.flushcache();
+
+      // try finding "035"
+      results = region.getClosestRowBefore(Bytes.toBytes(t35));
+      assertEquals(new String(results.get(COLUMNS[0]).getValue()), "t30 bytes");
+
+      batchUpdate = new BatchUpdate(t20);
+      batchUpdate.put(COLUMNS[0], "t20 bytes".getBytes());
+      region.batchUpdate(batchUpdate);
+      
+      // try finding "035"
+      results = region.getClosestRowBefore(Bytes.toBytes(t35));
+      assertEquals(new String(results.get(COLUMNS[0]).getValue()), "t30 bytes");
+      
+      region.flushcache();
+
+      // try finding "035"
+      results = region.getClosestRowBefore(Bytes.toBytes(t35));
+      assertEquals(new String(results.get(COLUMNS[0]).getValue()), "t30 bytes");
+    } finally {
+      if (region != null) {
+        try {
+          region.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        region.getLog().closeAndDelete();
+      }
+    }
+  }
+
   /**
    * For HBASE-40
    */