You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/08/13 06:15:59 UTC

svn commit: r685432 - in /hadoop/hbase: branches/0.2/ branches/0.2/src/java/org/apache/hadoop/hbase/master/ branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/ branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/ trunk/ trunk/src/java/or...

Author: stack
Date: Tue Aug 12 21:15:58 2008
New Revision: 685432

URL: http://svn.apache.org/viewvc?rev=685432&view=rev
Log:
HBASE-808,809 MAX_VERSIONS not respected, and Deletall doesn't and inserts after delete don't work as expected

Modified:
    hadoop/hbase/branches/0.2/CHANGES.txt
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
    hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java

Modified: hadoop/hbase/branches/0.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/CHANGES.txt?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.2/CHANGES.txt Tue Aug 12 21:15:58 2008
@@ -18,7 +18,11 @@
    HBASE-813   Add a row counter in the new shell (Jean-Daniel Cryans via Stack)
    HBASE-824   Bug in Hlog we print array of byes for region name
                (Billy Pearson via Stack)
-
+   HBASE-825   Master logs showing byte [] in place of string in logging
+               (Billy Pearson via Stack)
+   HBASE-808,809 MAX_VERSIONS not respected, and Deletall doesn't and inserts
+               after delete don't work as expected
+               (Jean-Daniel Cryans via Stack)
 
   IMPROVEMENTS
    HBASE-801  When a table haven't disable, shell could response in a "user

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Aug 12 21:15:58 2008
@@ -418,7 +418,7 @@
       }
       
       if (isClosing(currentRegion.getRegionName())) {
-        LOG.info("Skipping region " + currentRegion.getRegionName() 
+        LOG.info("Skipping region " + currentRegion.getRegionNameAsString() 
           + " because it is already closing.");
         continue;
       }

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Tue Aug 12 21:15:58 2008
@@ -1144,7 +1144,6 @@
     if (this.closed.get()) {
       throw new IOException("Region " + this + " closed");
     }
-
     // Make sure this is a valid row and valid column
     checkRow(row);
     checkColumn(column);
@@ -1267,10 +1266,9 @@
     }
   }
 
-  /**
+  /*
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
-   * Default access so can be accessed out of {@link HRegionServer}.
+   * row/column/timestamp and those of an older vintage.
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
@@ -1288,11 +1286,12 @@
       storesToCheck = new ArrayList<HStore>(1);
       storesToCheck.add(getStore(origin.getColumn()));
     }
+    long now = System.currentTimeMillis();
     for (HStore targetStore: storesToCheck) {
       if (targetStore != null) {
         // Pass versions without modification since in the store getKeys, it
         // includes the size of the passed <code>keys</code> array when counting.
-        List<HStoreKey> r = targetStore.getKeys(origin, versions);
+        List<HStoreKey> r = targetStore.getKeys(origin, versions, now);
         if (r != null) {
           keys.addAll(r);
         }
@@ -1466,6 +1465,9 @@
     checkReadOnly();
     Integer lid = obtainRowLock(row);
     try {
+      // Delete ALL verisons rather than MAX_VERSIONS.  If we just did
+      // MAX_VERSIONS, then if 2* MAX_VERSION cells, subsequent gets would
+      // get old stuff.
       deleteMultiple(row, column, ts, ALL_VERSIONS);
     } finally {
       releaseRowLock(lid);
@@ -1481,11 +1483,12 @@
   public void deleteAll(final byte [] row, final long ts)
   throws IOException {
     checkReadOnly();
-    Integer lid = obtainRowLock(row);    
+    Integer lid = obtainRowLock(row);
+    long now = System.currentTimeMillis();
     try {
-      for (HStore store : stores.values()){
+      for (HStore store : stores.values()) {
         List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts),
-          ALL_VERSIONS);
+          ALL_VERSIONS, now);
         TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>();
         for (HStoreKey key: keys) {
           edits.put(key, HLogEdit.deleteBytes.get());
@@ -1509,12 +1512,14 @@
   public void deleteFamily(byte [] row, byte [] family, long timestamp)
   throws IOException{
     checkReadOnly();
-    Integer lid = obtainRowLock(row);    
+    Integer lid = obtainRowLock(row);
+    long now = System.currentTimeMillis();
     try {
       // find the HStore for the column family
       HStore store = getStore(family);
       // find all the keys that match our criteria
-      List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp), ALL_VERSIONS);
+      List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp),
+          ALL_VERSIONS, now);
       // delete all the cells
       TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>();
       for (HStoreKey key: keys) {
@@ -1526,11 +1531,10 @@
     }
   }
   
-  /**
+  /*
    * Delete one or many cells.
    * Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of
    * latest cell.
-   * 
    * @param row
    * @param column
    * @param ts Timestamp to start search on.

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Tue Aug 12 21:15:58 2008
@@ -970,52 +970,6 @@
   }
 
   /*
-   * Check if this is cell is deleted.
-   * If a memcache and a deletes, check key does not have an entry filled.
-   * Otherwise, check value is not the <code>HGlobals.deleteBytes</code> value.
-   * If passed value IS deleteBytes, then it is added to the passed
-   * deletes map.
-   * @param hsk
-   * @param value
-   * @param checkMemcache true if the memcache should be consulted
-   * @param deletes Map keyed by column with a value of timestamp. Can be null.
-   * If non-null and passed value is HGlobals.deleteBytes, then we add to this
-   * map.
-   * @return True if this is a deleted cell.  Adds the passed deletes map if
-   * passed value is HGlobals.deleteBytes.
-  */
-  private boolean isDeleted(final HStoreKey hsk, final byte [] value,
-      final boolean checkMemcache, final Map<byte [], List<Long>> deletes) {
-    if (checkMemcache && memcache.isDeleted(hsk)) {
-      return true;
-    }
-    List<Long> timestamps =
-      (deletes == null) ? null: deletes.get(hsk.getColumn());
-    if (timestamps != null &&
-        timestamps.contains(Long.valueOf(hsk.getTimestamp()))) {
-      return true;
-    }
-    if (value == null) {
-      // If a null value, shouldn't be in here.  Mark it as deleted cell.
-      return true;
-    }
-    if (!HLogEdit.isDeleted(value)) {
-      return false;
-    }
-    // Cell has delete value.  Save it into deletes.
-    if (deletes != null) {
-      if (timestamps == null) {
-        timestamps = new ArrayList<Long>();
-        deletes.put(hsk.getColumn(), timestamps);
-      }
-      // We know its not already in the deletes array else we'd have returned
-      // earlier so no need to test if timestamps already has this value.
-      timestamps.add(Long.valueOf(hsk.getTimestamp()));
-    }
-    return true;
-  }
-  
-  /*
    * It's assumed that the compactLock  will be acquired prior to calling this 
    * method!  Otherwise, it is not thread-safe!
    *
@@ -1223,6 +1177,19 @@
       toArray(new MapFile.Reader[this.readers.size()]);
   }
 
+  /*
+   * @param wantedVersions How many versions were asked for.
+   * @return wantedVersions or this families' MAX_VERSIONS.
+   */
+  private int versionsToReturn(final int wantedVersions) {
+    if (wantedVersions <= 0) {
+      throw new IllegalArgumentException("Number of versions must be > 0");
+    }
+    // Make sure we do not return more than maximum versions for this store.
+    return wantedVersions > this.family.getMaxVersions()?
+      this.family.getMaxVersions(): wantedVersions;
+  }
+  
   /**
    * Get the value for the indicated HStoreKey.  Grab the target value and the 
    * previous <code>numVersions - 1</code> values, as well.
@@ -1233,37 +1200,38 @@
    * @return values for the specified versions
    * @throws IOException
    */
-  Cell[] get(HStoreKey key, int numVersions) throws IOException {
-    if (numVersions <= 0) {
-      throw new IllegalArgumentException("Number of versions must be > 0");
-    }
-    
-    this.lock.readLock().lock();
+  Cell[] get(final HStoreKey key, final int numVersions) throws IOException {
+    // This code below is very close to the body of the getKeys method.  Any 
+    // changes in the flow below should also probably be done in getKeys.
+    // TODO: Refactor so same code used.
     long now = System.currentTimeMillis();
+    int versions = versionsToReturn(numVersions);
+    // Keep a list of deleted cell keys.  We need this because as we go through
+    // the memcache and store files, the cell with the delete marker may be
+    // in one store and the old non-delete cell value in a later store.
+    // If we don't keep around the fact that the cell was deleted in a newer
+    // record, we end up returning the old value if user is asking for more
+    // than one version. This List of deletes should not be large since we
+    // are only keeping rows and columns that match those set on the get and
+    // which have delete values.  If memory usage becomes an issue, could
+    // redo as bloom filter.
+    Set<HStoreKey> deletes = new HashSet<HStoreKey>();
+    this.lock.readLock().lock();
     try {
       // Check the memcache
-      List<Cell> results = this.memcache.get(key, numVersions);
+      List<Cell> results = this.memcache.get(key, versions, deletes, now);
       // If we got sufficient versions from memcache, return. 
-      if (results.size() == numVersions) {
+      if (results.size() == versions) {
         return results.toArray(new Cell[results.size()]);
       }
-
-      // Keep a list of deleted cell keys.  We need this because as we go through
-      // the store files, the cell with the delete marker may be in one file and
-      // the old non-delete cell value in a later store file. If we don't keep
-      // around the fact that the cell was deleted in a newer record, we end up
-      // returning the old value if user is asking for more than one version.
-      // This List of deletes should not be large since we are only keeping rows
-      // and columns that match those set on the scanner and which have delete
-      // values.  If memory usage becomes an issue, could redo as bloom filter.
-      Map<byte [], List<Long>> deletes =
-        new TreeMap<byte [], List<Long>>(Bytes.BYTES_COMPARATOR);
-      // This code below is very close to the body of the getKeys method.
       MapFile.Reader[] maparray = getReaders();
-      for(int i = maparray.length - 1; i >= 0; i--) {
+      // Returned array is sorted with the most recent addition last.
+      for(int i = maparray.length - 1;
+          i >= 0 && !hasEnoughVersions(versions, results); i--) {
         MapFile.Reader map = maparray[i];
         synchronized(map) {
           map.reset();
+          // Do the priming read
           ImmutableBytesWritable readval = new ImmutableBytesWritable();
           HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
           if (readkey == null) {
@@ -1276,32 +1244,17 @@
           if (!readkey.matchesRowCol(key)) {
             continue;
           }
-          if (!isDeleted(readkey, readval.get(), true, deletes)) {
-            if (!isExpired(readkey, ttl, now)) {
-              results.add(new Cell(readval.get(), readkey.getTimestamp()));
-            }
-            // Perhaps only one version is wanted.  I could let this
-            // test happen later in the for loop test but it would cost
-            // the allocation of an ImmutableBytesWritable.
-            if (hasEnoughVersions(numVersions, results)) {
-              break;
-            }
+          if (get(readkey, readval.get(), versions, results, deletes, now)) {
+            break;
           }
           for (readval = new ImmutableBytesWritable();
-              map.next(readkey, readval) &&
-              readkey.matchesRowCol(key) &&
-              !hasEnoughVersions(numVersions, results);
+              map.next(readkey, readval) && readkey.matchesRowCol(key);
               readval = new ImmutableBytesWritable()) {
-            if (!isDeleted(readkey, readval.get(), true, deletes)) {
-              if (!isExpired(readkey, ttl, now)) {
-                results.add(new Cell(readval.get(), readkey.getTimestamp()));
-              }
+            if (get(readkey, readval.get(), versions, results, deletes, now)) {
+              break;
             }
           }
         }
-        if (hasEnoughVersions(numVersions, results)) {
-          break;
-        }
       }
       return results.size() == 0 ?
         null : results.toArray(new Cell[results.size()]);
@@ -1310,54 +1263,83 @@
     }
   }
   
-  /**
+  /*
+   * Look at one key/value.
+   * @param key
+   * @param value
+   * @param versions
+   * @param results
+   * @param deletes
+   * @param now
+   * @return True if we have enough versions.
+   */
+  private boolean get(final HStoreKey key, final byte [] value,
+      final int versions, final List<Cell> results,
+      final Set<HStoreKey> deletes, final long now) {
+    if (!HLogEdit.isDeleted(value)) {
+      if (notExpiredAndNotInDeletes(this.ttl, key, now, deletes)) {
+        results.add(new Cell(value, key.getTimestamp()));
+      }
+      // Perhaps only one version is wanted.  I could let this
+      // test happen later in the for loop test but it would cost
+      // the allocation of an ImmutableBytesWritable.
+      if (hasEnoughVersions(versions, results)) {
+        return true;
+      }
+    } else {
+      // Is this copy necessary?
+      deletes.add(new HStoreKey(key));
+    }
+    return false;
+  }
+
+  /*
    * Small method to check if we are over the max number of versions
    * or we acheived this family max versions. 
    * The later happens when we have the situation described in HBASE-621.
-   * @param numVersions
-   * @param results
+   * @param versions
+   * @param c
    * @return 
    */
-  private boolean hasEnoughVersions(final int numVersions,
-      final List<Cell> results) {
-    return (results.size() >= numVersions || results.size() >= family
-            .getMaxVersions());
+  private boolean hasEnoughVersions(final int versions, final List<Cell> c) {
+    return c.size() >= versions;
   }
 
   /**
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
+   * row/column/timestamp and those of an older vintage.
    * Default access so can be accessed out of {@link HRegionServer}.
    * @param origin Where to start searching.
-   * @param versions How many versions to return. Pass
-   * {@link HConstants.ALL_VERSIONS} to retrieve all. Versions will include
-   * size of passed <code>allKeys</code> in its count.
-   * @param allKeys List of keys prepopulated by keys we found in memcache.
-   * This method returns this passed list with all matching keys found in
-   * stores appended.
-   * @return The passed <code>allKeys</code> with <code>versions</code> of
-   * matching keys found in store files appended.
+   * @param numVersions How many versions to return. Pass
+   * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @return Matching keys.
    * @throws IOException
    */
-  List<HStoreKey> getKeys(final HStoreKey origin, final int versions)
+  List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
+    final long now)
   throws IOException {
-      
-    List<HStoreKey> keys = this.memcache.getKeys(origin, versions);
-    if (keys.size() >= versions) {
-      return keys;
-    }
-    
-    // This code below is very close to the body of the get method.
+    // This code below is very close to the body of the get method.  Any 
+    // changes in the flow below should also probably be done in get.  TODO:
+    // Refactor so same code used.
+    Set<HStoreKey> deletes = new HashSet<HStoreKey>();
     this.lock.readLock().lock();
-    long now = System.currentTimeMillis();
     try {
+      // Check the memcache
+      List<HStoreKey> keys =
+        this.memcache.getKeys(origin, versions, deletes, now);
+      // If we got sufficient versions from memcache, return. 
+      if (keys.size() >= versions) {
+        return keys;
+      }
       MapFile.Reader[] maparray = getReaders();
-      for(int i = maparray.length - 1; i >= 0; i--) {
+      // Returned array is sorted with the most recent addition last.
+      for(int i = maparray.length - 1;
+          i >= 0 && keys.size() < versions; i--) {
         MapFile.Reader map = maparray[i];
         synchronized(map) {
           map.reset();
-          
-          // do the priming read
+          // Do the priming read
           ImmutableBytesWritable readval = new ImmutableBytesWritable();
           HStoreKey readkey = (HStoreKey)map.getClosest(origin, readval);
           if (readkey == null) {
@@ -1367,24 +1349,23 @@
             // BEFORE.
             continue;
           }
-          
-          do{
+          do {
             // if the row matches, we might want this one.
             if (rowMatches(origin, readkey)) {
               // if the cell matches, then we definitely want this key.
               if (cellMatches(origin, readkey)) {
-                // store the key if it isn't deleted or superceeded by what's
+                // Store the key if it isn't deleted or superceeded by what's
                 // in the memcache
-                if (!isDeleted(readkey, readval.get(), false, null) &&
-                    !keys.contains(readkey)) {
-                  if (!isExpired(readkey, ttl, now)) {
+                if (!HLogEdit.isDeleted(readval.get())) {
+                  if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) {
                     keys.add(new HStoreKey(readkey));
                   }
-
-                  // if we've collected enough versions, then exit the loop.
                   if (keys.size() >= versions) {
                     break;
                   }
+                } else {
+                  // Is this copy necessary?
+                  deletes.add(new HStoreKey(readkey));
                 }
               } else {
                 // the cell doesn't match, but there might be more with different
@@ -1398,7 +1379,6 @@
           } while (map.next(readkey, readval)); // advance to the next key
         }
       }
-      
       return keys;
     } finally {
       this.lock.readLock().unlock();
@@ -1446,7 +1426,8 @@
         rowAtOrBeforeFromMapFile(maparray[i], row, candidateKeys, deletes);
       }
       // Return the best key from candidateKeys
-      return candidateKeys.isEmpty()? null: candidateKeys.lastKey().getRow();
+      byte [] result = candidateKeys.isEmpty()? null: candidateKeys.lastKey().getRow();
+      return result;
     } finally {
       this.lock.readLock().unlock();
     }
@@ -1527,10 +1508,11 @@
    */
   static boolean notExpiredAndNotInDeletes(final long ttl,
       final HStoreKey hsk, final long now, final Set<HStoreKey> deletes) {
-    return !isExpired(hsk, ttl, now) && !deletes.contains(hsk);
+    return !isExpired(hsk, ttl, now) &&
+      (deletes == null || !deletes.contains(hsk));
   }
   
-  private static boolean isExpired(final HStoreKey hsk, final long ttl,
+  static boolean isExpired(final HStoreKey hsk, final long ttl,
       final long now) {
     boolean result = ttl != HConstants.FOREVER && now > hsk.getTimestamp() + ttl;
     if (result && LOG.isDebugEnabled()) {

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java Tue Aug 12 21:15:58 2008
@@ -52,7 +52,7 @@
 class Memcache {
   private final Log LOG = LogFactory.getLog(this.getClass().getName());
   
-  private long ttl;
+  private final long ttl;
 
   // Note that since these structures are always accessed with a lock held,
   // so no additional synchronization is required.
@@ -70,16 +70,15 @@
   /**
    * Default constructor. Used for tests.
    */
-  public Memcache()
-  {
-    ttl = HConstants.FOREVER;
+  public Memcache() {
+    this.ttl = HConstants.FOREVER;
   }
 
   /**
    * Constructor.
    * @param ttl The TTL for cache entries, in milliseconds.
    */
-  public Memcache(long ttl) {
+  public Memcache(final long ttl) {
     this.ttl = ttl;
   }
 
@@ -180,16 +179,29 @@
    * @return An array of byte arrays ordered by timestamp.
    */
   List<Cell> get(final HStoreKey key, final int numVersions) {
+    return get(key, numVersions, null, System.currentTimeMillis());
+  }
+  
+  /**
+   * Look back through all the backlog TreeMaps to find the target.
+   * @param key
+   * @param numVersions
+   * @param deletes
+   * @param now
+   * @return An array of byte arrays ordered by timestamp.
+   */
+  List<Cell> get(final HStoreKey key, final int numVersions,
+      final Set<HStoreKey> deletes, final long now) {
     this.lock.readLock().lock();
     try {
       List<Cell> results;
-      // The synchronizations here are because internalGet iterates
+      // The synchronizations here are because the below get iterates
       synchronized (this.memcache) {
-        results = internalGet(this.memcache, key, numVersions);
+        results = get(this.memcache, key, numVersions, deletes, now);
       }
       synchronized (this.snapshot) {
         results.addAll(results.size(),
-          internalGet(this.snapshot, key, numVersions - results.size()));
+          get(this.snapshot, key, numVersions - results.size(), deletes, now));
       }
       return results;
     } finally {
@@ -308,10 +320,7 @@
                   now < itKey.getTimestamp() + ttl) {
               results.put(itCol, new Cell(val, itKey.getTimestamp()));
             } else {
-              victims.add(itKey);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("internalGetFull: " + itKey + ": expired, skipped");
-              }
+              addVictim(victims, itKey);
             }
           }
         }
@@ -399,11 +408,7 @@
               if (deletedOrExpiredRow == null) {
                 deletedOrExpiredRow = new HStoreKey(found_key);
               }
-              victims.add(found_key);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("internalGetRowKeyAtOrBefore:" + found_key +
-                  " expired, skipped");
-              }
+              addVictim(victims, found_key);
             }
           }
         }
@@ -525,7 +530,7 @@
     return new HStoreKey(key.getRow(), key.getColumn());
   }
   
-  /**
+  /*
    * Examine a single map for the desired key.
    *
    * TODO - This is kinda slow.  We need a data structure that allows for 
@@ -534,42 +539,51 @@
    * @param map
    * @param key
    * @param numVersions
+   * @param deletes
    * @return Ordered list of items found in passed <code>map</code>.  If no
    * matching values, returns an empty list (does not return null).
    */
-  private ArrayList<Cell> internalGet(
-      final SortedMap<HStoreKey, byte []> map, final HStoreKey key,
-      final int numVersions) {
-
+  private ArrayList<Cell> get(final SortedMap<HStoreKey, byte []> map,
+      final HStoreKey key, final int numVersions, final Set<HStoreKey> deletes,
+      final long now) {
     ArrayList<Cell> result = new ArrayList<Cell>();
-    
-    // TODO: If get is of a particular version -- numVersions == 1 -- we
-    // should be able to avoid all of the tailmap creations and iterations
-    // below.
-    long now = System.currentTimeMillis();
     List<HStoreKey> victims = new ArrayList<HStoreKey>();
-    SortedMap<HStoreKey, byte []> tailMap = map.tailMap(key);
-    for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
-      HStoreKey itKey = es.getKey();
-      if (itKey.matchesRowCol(key)) {
-        if (!HLogEdit.isDeleted(es.getValue())) { 
-          // Filter out expired results
-          if (ttl == HConstants.FOREVER ||
-                now < itKey.getTimestamp() + ttl) {
-            result.add(new Cell(tailMap.get(itKey), itKey.getTimestamp()));
-            if (numVersions > 0 && result.size() >= numVersions) {
-              break;
+    // Handle special case where only one version wanted.
+    if (numVersions == 1) {
+      byte [] value = map.get(key);
+      if (!isDeleted(value)) {
+        // Filter out expired results
+        if (HStore.notExpiredAndNotInDeletes(ttl, key, now, deletes)) {
+          result.add(new Cell(value, key.getTimestamp()));
+        } else {
+          addVictim(victims, key);
+        }
+      } else {
+        deletes.add(key);
+      }
+    } else {
+      SortedMap<HStoreKey, byte[]> tailMap = map.tailMap(key);
+      for (Map.Entry<HStoreKey, byte[]> es : tailMap.entrySet()) {
+        HStoreKey itKey = es.getKey();
+        if (itKey.matchesRowCol(key)) {
+          if (!isDeleted(es.getValue())) {
+            // Filter out expired results
+            if (HStore.notExpiredAndNotInDeletes(ttl, itKey, now, deletes)) {
+              result.add(new Cell(tailMap.get(itKey), itKey.getTimestamp()));
+              if (numVersions > 0 && result.size() >= numVersions) {
+                break;
+              }
+            } else {
+              addVictim(victims, itKey);
             }
           } else {
-            victims.add(itKey);
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("internalGet: " + itKey + ": expired, skipped");
-            }
+            // Cell holds a delete value.
+            deletes.add(itKey);
           }
+        } else {
+          // By L.N. HBASE-684, map is sorted, so we can't find match any more.
+          break;
         }
-      } else {
-        // By L.N. HBASE-684, map is sorted, so we can't find match any more.
-        break;
       }
     }
     // Remove expired victims from the map.
@@ -579,30 +593,43 @@
     return result;
   }
 
+  /*
+   * Add <code>key</code> to the list of 'victims'.
+   * @param victims
+   * @param key
+   */
+  private void addVictim(final List<HStoreKey> victims, final HStoreKey key) {
+    victims.add(key);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(key + ": expired or in deletes, skipped");
+    }
+  }
+
   /**
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
-   * Default access so can be accessed out of {@link HRegionServer}.
+   * row/column/timestamp and those of an older vintage.
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @param deletes Accumulating list of deletes
    * @return Ordered list of <code>versions</code> keys going from newest back.
    * @throws IOException
    */
-  List<HStoreKey> getKeys(final HStoreKey origin, final int versions) {
+  List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
+      final Set<HStoreKey> deletes, final long now) {
     this.lock.readLock().lock();
     try {
       List<HStoreKey> results;
       synchronized (memcache) {
-        results = internalGetKeys(this.memcache, origin, versions);
+        results = getKeys(this.memcache, origin, versions, deletes, now);
       }
       synchronized (snapshot) {
-        results.addAll(results.size(), internalGetKeys(snapshot, origin,
+        results.addAll(results.size(), getKeys(snapshot, origin,
             versions == HConstants.ALL_VERSIONS ? versions :
-              (versions - results.size())));
+              (versions - results.size()), deletes, now));
       }
       return results;
-      
     } finally {
       this.lock.readLock().unlock();
     }
@@ -612,21 +639,20 @@
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @param deletes
    * @return List of all keys that are of the same row and column and of
    * equal or older timestamp.  If no keys, returns an empty List. Does not
    * return null.
    */
-  private List<HStoreKey> internalGetKeys(
-      final SortedMap<HStoreKey, byte []> map, final HStoreKey origin,
-      final int versions) {
-
-    long now = System.currentTimeMillis();
+  private List<HStoreKey> getKeys(final SortedMap<HStoreKey,
+      byte []> map, final HStoreKey origin, final int versions,
+      final Set<HStoreKey> deletes, final long now) {
     List<HStoreKey> result = new ArrayList<HStoreKey>();
     List<HStoreKey> victims = new ArrayList<HStoreKey>();
     SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
     for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
       HStoreKey key = es.getKey();
-  
       // if there's no column name, then compare rows and timestamps
       if (origin.getColumn() != null && origin.getColumn().length == 0) {
         // if the current and origin row don't match, then we can jump
@@ -639,38 +665,34 @@
         if (key.getTimestamp() > origin.getTimestamp()) {
           continue;
         }
-      }
-      else{ // compare rows and columns
+      } else { // compare rows and columns
         // if the key doesn't match the row and column, then we're done, since 
         // all the cells are ordered.
         if (!key.matchesRowCol(origin)) {
           break;
         }
       }
-      if (!HLogEdit.isDeleted(es.getValue())) {
-        if (ttl == HConstants.FOREVER || now < key.getTimestamp() + ttl) {
+      if (!isDeleted(es.getValue())) {
+        if (HStore.notExpiredAndNotInDeletes(this.ttl, key, now, deletes)) {
           result.add(key);
-        } else {
-          victims.add(key);
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("internalGetKeys: " + key + ": expired, skipped");
+          if (versions > 0 && result.size() >= versions) {
+            break;
           }
+        } else {
+          addVictim(victims, key);
         }
-        if (result.size() >= versions) {
-          // We have enough results.  Return.
-          break;
-        }
+      } else {
+        // Delete
+        deletes.add(key);
       }
     }
-
     // Clean expired victims from the map.
-    for (HStoreKey v: victims)
+    for (HStoreKey v: victims) {
        map.remove(v);
-
+    }
     return result;
   }
 
-
   /**
    * @param key
    * @return True if an entry and its content is {@link HGlobals.deleteBytes}.
@@ -678,7 +700,16 @@
    * the cell has been deleted.
    */
   boolean isDeleted(final HStoreKey key) {
-    return HLogEdit.isDeleted(this.memcache.get(key));
+    return isDeleted(this.memcache.get(key)) ||
+      (this.snapshot != null && isDeleted(this.snapshot.get(key)));
+  }
+  
+  /*
+   * @param b Cell value.
+   * @return True if this is a delete value.
+   */
+  private boolean isDeleted(final byte [] b) {
+    return HLogEdit.isDeleted(b);
   }
 
   /**

Modified: hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java (original)
+++ hadoop/hbase/branches/0.2/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java Tue Aug 12 21:15:58 2008
@@ -27,6 +27,7 @@
 
 import org.apache.hadoop.dfs.MiniDFSCluster;
 import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.HTableDescriptor;
@@ -61,6 +62,76 @@
   }
 
   /**
+   * Test for HBASE-808 and HBASE-809.
+   * @throws Exception
+   */
+  public void testMaxVersionsAndDeleting() throws Exception {
+    HRegion region = null;
+    try {
+      HTableDescriptor htd = createTableDescriptor(getName());
+      region = createNewHRegion(htd, null, null);
+      
+      byte [] column = COLUMNS[0];
+      for (int i = 0; i < 100; i++) {
+        addToRow(region, T00, column, i, T00.getBytes());
+      }
+      checkVersions(region, T00, column);
+      // Flush and retry.
+      region.flushcache();
+      checkVersions(region, T00, column);
+      
+      // Now delete all then retry
+      region.deleteAll(Bytes.toBytes(T00), System.currentTimeMillis());
+      Cell [] cells = region.get(Bytes.toBytes(T00), column,
+        HColumnDescriptor.DEFAULT_VERSIONS);
+      assertTrue(cells == null);
+      region.flushcache();
+      cells = region.get(Bytes.toBytes(T00), column,
+          HColumnDescriptor.DEFAULT_VERSIONS);
+      assertTrue(cells == null);
+      
+      // Now add back the rows
+      for (int i = 0; i < 100; i++) {
+        addToRow(region, T00, column, i, T00.getBytes());
+      }
+      // Run same verifications.
+      checkVersions(region, T00, column);
+      // Flush and retry.
+      region.flushcache();
+      checkVersions(region, T00, column);
+    } finally {
+      if (region != null) {
+        try {
+          region.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        region.getLog().closeAndDelete();
+      }
+    }
+  }
+  
+  private void addToRow(final HRegion r, final String row, final byte [] column,
+      final long ts, final byte [] bytes)
+  throws IOException {
+    BatchUpdate batchUpdate = new BatchUpdate(row, ts);
+    batchUpdate.put(column, bytes);
+    r.batchUpdate(batchUpdate);
+  }
+
+  private void checkVersions(final HRegion region, final String row,
+      final byte [] column)
+  throws IOException {
+    byte [] r = Bytes.toBytes(row);
+    Cell [] cells = region.get(r, column, 100);
+    assertTrue(cells.length == HColumnDescriptor.DEFAULT_VERSIONS);
+    cells = region.get(r, column, 1);
+    assertTrue(cells.length == 1);
+    cells = region.get(r, column, HConstants.ALL_VERSIONS);
+    assertTrue(cells.length == HColumnDescriptor.DEFAULT_VERSIONS);
+  }
+  
+  /**
    * Test file of multiple deletes and with deletes as final key.
    * @see <a href="https://issues.apache.org/jira/browse/HBASE-751">HBASE-751</a>
    */

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Aug 12 21:15:58 2008
@@ -20,6 +20,9 @@
                (Billy Pearson via Stack)
    HBASE-825   Master logs showing byte [] in place of string in logging
                (Billy Pearson via Stack)
+   HBASE-808,809 MAX_VERSIONS not respected, and Deletall doesn't and inserts
+               after delete don't work as expected
+               (Jean-Daniel Cryans via Stack)
 
   IMPROVEMENTS
    HBASE-801  When a table haven't disable, shell could response in a "user

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Tue Aug 12 21:15:58 2008
@@ -1144,7 +1144,6 @@
     if (this.closed.get()) {
       throw new IOException("Region " + this + " closed");
     }
-
     // Make sure this is a valid row and valid column
     checkRow(row);
     checkColumn(column);
@@ -1267,10 +1266,9 @@
     }
   }
 
-  /**
+  /*
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
-   * Default access so can be accessed out of {@link HRegionServer}.
+   * row/column/timestamp and those of an older vintage.
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
@@ -1288,11 +1286,12 @@
       storesToCheck = new ArrayList<HStore>(1);
       storesToCheck.add(getStore(origin.getColumn()));
     }
+    long now = System.currentTimeMillis();
     for (HStore targetStore: storesToCheck) {
       if (targetStore != null) {
         // Pass versions without modification since in the store getKeys, it
         // includes the size of the passed <code>keys</code> array when counting.
-        List<HStoreKey> r = targetStore.getKeys(origin, versions);
+        List<HStoreKey> r = targetStore.getKeys(origin, versions, now);
         if (r != null) {
           keys.addAll(r);
         }
@@ -1469,6 +1468,9 @@
     checkReadOnly();
     Integer lid = getLock(lockid,row);
     try {
+      // Delete ALL verisons rather than MAX_VERSIONS.  If we just did
+      // MAX_VERSIONS, then if 2* MAX_VERSION cells, subsequent gets would
+      // get old stuff.
       deleteMultiple(row, column, ts, ALL_VERSIONS);
     } finally {
       if(lockid == null) releaseRowLock(lid);
@@ -1486,11 +1488,12 @@
       final Integer lockid)
   throws IOException {
     checkReadOnly();
-    Integer lid = getLock(lockid,row);
+    Integer lid = getLock(lockid, row);
+    long now = System.currentTimeMillis();
     try {
-      for (HStore store : stores.values()){
+      for (HStore store : stores.values()) {
         List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts),
-          ALL_VERSIONS);
+          ALL_VERSIONS, now);
         TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>();
         for (HStoreKey key: keys) {
           edits.put(key, HLogEdit.deleteBytes.get());
@@ -1516,12 +1519,14 @@
       final Integer lockid)
   throws IOException{
     checkReadOnly();
-    Integer lid = getLock(lockid,row);
+    Integer lid = getLock(lockid, row);
+    long now = System.currentTimeMillis();
     try {
       // find the HStore for the column family
       HStore store = getStore(family);
       // find all the keys that match our criteria
-      List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp), ALL_VERSIONS);
+      List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp),
+        ALL_VERSIONS, now);
       // delete all the cells
       TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>();
       for (HStoreKey key: keys) {
@@ -1533,11 +1538,10 @@
     }
   }
   
-  /**
+  /*
    * Delete one or many cells.
    * Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of
    * latest cell.
-   * 
    * @param row
    * @param column
    * @param ts Timestamp to start search on.
@@ -1809,7 +1813,7 @@
   private Integer getLock(Integer lockid, byte [] row) 
   throws IOException {
     Integer lid = null;
-    if(lockid == null) {
+    if (lockid == null) {
       lid = obtainRowLock(row);
     } else {
       if(!isRowLocked(lockid)) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Tue Aug 12 21:15:58 2008
@@ -970,52 +970,6 @@
   }
 
   /*
-   * Check if this is cell is deleted.
-   * If a memcache and a deletes, check key does not have an entry filled.
-   * Otherwise, check value is not the <code>HGlobals.deleteBytes</code> value.
-   * If passed value IS deleteBytes, then it is added to the passed
-   * deletes map.
-   * @param hsk
-   * @param value
-   * @param checkMemcache true if the memcache should be consulted
-   * @param deletes Map keyed by column with a value of timestamp. Can be null.
-   * If non-null and passed value is HGlobals.deleteBytes, then we add to this
-   * map.
-   * @return True if this is a deleted cell.  Adds the passed deletes map if
-   * passed value is HGlobals.deleteBytes.
-  */
-  private boolean isDeleted(final HStoreKey hsk, final byte [] value,
-      final boolean checkMemcache, final Map<byte [], List<Long>> deletes) {
-    if (checkMemcache && memcache.isDeleted(hsk)) {
-      return true;
-    }
-    List<Long> timestamps =
-      (deletes == null) ? null: deletes.get(hsk.getColumn());
-    if (timestamps != null &&
-        timestamps.contains(Long.valueOf(hsk.getTimestamp()))) {
-      return true;
-    }
-    if (value == null) {
-      // If a null value, shouldn't be in here.  Mark it as deleted cell.
-      return true;
-    }
-    if (!HLogEdit.isDeleted(value)) {
-      return false;
-    }
-    // Cell has delete value.  Save it into deletes.
-    if (deletes != null) {
-      if (timestamps == null) {
-        timestamps = new ArrayList<Long>();
-        deletes.put(hsk.getColumn(), timestamps);
-      }
-      // We know its not already in the deletes array else we'd have returned
-      // earlier so no need to test if timestamps already has this value.
-      timestamps.add(Long.valueOf(hsk.getTimestamp()));
-    }
-    return true;
-  }
-  
-  /*
    * It's assumed that the compactLock  will be acquired prior to calling this 
    * method!  Otherwise, it is not thread-safe!
    *
@@ -1223,6 +1177,19 @@
       toArray(new MapFile.Reader[this.readers.size()]);
   }
 
+  /*
+   * @param wantedVersions How many versions were asked for.
+   * @return wantedVersions or this families' MAX_VERSIONS.
+   */
+  private int versionsToReturn(final int wantedVersions) {
+    if (wantedVersions <= 0) {
+      throw new IllegalArgumentException("Number of versions must be > 0");
+    }
+    // Make sure we do not return more than maximum versions for this store.
+    return wantedVersions > this.family.getMaxVersions()?
+      this.family.getMaxVersions(): wantedVersions;
+  }
+  
   /**
    * Get the value for the indicated HStoreKey.  Grab the target value and the 
    * previous <code>numVersions - 1</code> values, as well.
@@ -1233,37 +1200,38 @@
    * @return values for the specified versions
    * @throws IOException
    */
-  Cell[] get(HStoreKey key, int numVersions) throws IOException {
-    if (numVersions <= 0) {
-      throw new IllegalArgumentException("Number of versions must be > 0");
-    }
-    
-    this.lock.readLock().lock();
+  Cell[] get(final HStoreKey key, final int numVersions) throws IOException {
+    // This code below is very close to the body of the getKeys method.  Any 
+    // changes in the flow below should also probably be done in getKeys.
+    // TODO: Refactor so same code used.
     long now = System.currentTimeMillis();
+    int versions = versionsToReturn(numVersions);
+    // Keep a list of deleted cell keys.  We need this because as we go through
+    // the memcache and store files, the cell with the delete marker may be
+    // in one store and the old non-delete cell value in a later store.
+    // If we don't keep around the fact that the cell was deleted in a newer
+    // record, we end up returning the old value if user is asking for more
+    // than one version. This List of deletes should not be large since we
+    // are only keeping rows and columns that match those set on the get and
+    // which have delete values.  If memory usage becomes an issue, could
+    // redo as bloom filter.
+    Set<HStoreKey> deletes = new HashSet<HStoreKey>();
+    this.lock.readLock().lock();
     try {
       // Check the memcache
-      List<Cell> results = this.memcache.get(key, numVersions);
+      List<Cell> results = this.memcache.get(key, versions, deletes, now);
       // If we got sufficient versions from memcache, return. 
-      if (results.size() == numVersions) {
+      if (results.size() == versions) {
         return results.toArray(new Cell[results.size()]);
       }
-
-      // Keep a list of deleted cell keys.  We need this because as we go through
-      // the store files, the cell with the delete marker may be in one file and
-      // the old non-delete cell value in a later store file. If we don't keep
-      // around the fact that the cell was deleted in a newer record, we end up
-      // returning the old value if user is asking for more than one version.
-      // This List of deletes should not be large since we are only keeping rows
-      // and columns that match those set on the scanner and which have delete
-      // values.  If memory usage becomes an issue, could redo as bloom filter.
-      Map<byte [], List<Long>> deletes =
-        new TreeMap<byte [], List<Long>>(Bytes.BYTES_COMPARATOR);
-      // This code below is very close to the body of the getKeys method.
       MapFile.Reader[] maparray = getReaders();
-      for(int i = maparray.length - 1; i >= 0; i--) {
+      // Returned array is sorted with the most recent addition last.
+      for(int i = maparray.length - 1;
+          i >= 0 && !hasEnoughVersions(versions, results); i--) {
         MapFile.Reader map = maparray[i];
         synchronized(map) {
           map.reset();
+          // Do the priming read
           ImmutableBytesWritable readval = new ImmutableBytesWritable();
           HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
           if (readkey == null) {
@@ -1276,32 +1244,17 @@
           if (!readkey.matchesRowCol(key)) {
             continue;
           }
-          if (!isDeleted(readkey, readval.get(), true, deletes)) {
-            if (!isExpired(readkey, ttl, now)) {
-              results.add(new Cell(readval.get(), readkey.getTimestamp()));
-            }
-            // Perhaps only one version is wanted.  I could let this
-            // test happen later in the for loop test but it would cost
-            // the allocation of an ImmutableBytesWritable.
-            if (hasEnoughVersions(numVersions, results)) {
-              break;
-            }
+          if (get(readkey, readval.get(), versions, results, deletes, now)) {
+            break;
           }
           for (readval = new ImmutableBytesWritable();
-              map.next(readkey, readval) &&
-              readkey.matchesRowCol(key) &&
-              !hasEnoughVersions(numVersions, results);
+              map.next(readkey, readval) && readkey.matchesRowCol(key);
               readval = new ImmutableBytesWritable()) {
-            if (!isDeleted(readkey, readval.get(), true, deletes)) {
-              if (!isExpired(readkey, ttl, now)) {
-                results.add(new Cell(readval.get(), readkey.getTimestamp()));
-              }
+            if (get(readkey, readval.get(), versions, results, deletes, now)) {
+              break;
             }
           }
         }
-        if (hasEnoughVersions(numVersions, results)) {
-          break;
-        }
       }
       return results.size() == 0 ?
         null : results.toArray(new Cell[results.size()]);
@@ -1310,54 +1263,83 @@
     }
   }
   
-  /**
+  /*
+   * Look at one key/value.
+   * @param key
+   * @param value
+   * @param versions
+   * @param results
+   * @param deletes
+   * @param now
+   * @return True if we have enough versions.
+   */
+  private boolean get(final HStoreKey key, final byte [] value,
+      final int versions, final List<Cell> results,
+      final Set<HStoreKey> deletes, final long now) {
+    if (!HLogEdit.isDeleted(value)) {
+      if (notExpiredAndNotInDeletes(this.ttl, key, now, deletes)) {
+        results.add(new Cell(value, key.getTimestamp()));
+      }
+      // Perhaps only one version is wanted.  I could let this
+      // test happen later in the for loop test but it would cost
+      // the allocation of an ImmutableBytesWritable.
+      if (hasEnoughVersions(versions, results)) {
+        return true;
+      }
+    } else {
+      // Is this copy necessary?
+      deletes.add(new HStoreKey(key));
+    }
+    return false;
+  }
+
+  /*
    * Small method to check if we are over the max number of versions
    * or we acheived this family max versions. 
    * The later happens when we have the situation described in HBASE-621.
-   * @param numVersions
-   * @param results
+   * @param versions
+   * @param c
    * @return 
    */
-  private boolean hasEnoughVersions(final int numVersions,
-      final List<Cell> results) {
-    return (results.size() >= numVersions || results.size() >= family
-            .getMaxVersions());
+  private boolean hasEnoughVersions(final int versions, final List<Cell> c) {
+    return c.size() >= versions;
   }
 
   /**
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
+   * row/column/timestamp and those of an older vintage.
    * Default access so can be accessed out of {@link HRegionServer}.
    * @param origin Where to start searching.
-   * @param versions How many versions to return. Pass
-   * {@link HConstants.ALL_VERSIONS} to retrieve all. Versions will include
-   * size of passed <code>allKeys</code> in its count.
-   * @param allKeys List of keys prepopulated by keys we found in memcache.
-   * This method returns this passed list with all matching keys found in
-   * stores appended.
-   * @return The passed <code>allKeys</code> with <code>versions</code> of
-   * matching keys found in store files appended.
+   * @param numVersions How many versions to return. Pass
+   * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @return Matching keys.
    * @throws IOException
    */
-  List<HStoreKey> getKeys(final HStoreKey origin, final int versions)
+  List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
+    final long now)
   throws IOException {
-      
-    List<HStoreKey> keys = this.memcache.getKeys(origin, versions);
-    if (keys.size() >= versions) {
-      return keys;
-    }
-    
-    // This code below is very close to the body of the get method.
+    // This code below is very close to the body of the get method.  Any 
+    // changes in the flow below should also probably be done in get.  TODO:
+    // Refactor so same code used.
+    Set<HStoreKey> deletes = new HashSet<HStoreKey>();
     this.lock.readLock().lock();
-    long now = System.currentTimeMillis();
     try {
+      // Check the memcache
+      List<HStoreKey> keys =
+        this.memcache.getKeys(origin, versions, deletes, now);
+      // If we got sufficient versions from memcache, return. 
+      if (keys.size() >= versions) {
+        return keys;
+      }
       MapFile.Reader[] maparray = getReaders();
-      for(int i = maparray.length - 1; i >= 0; i--) {
+      // Returned array is sorted with the most recent addition last.
+      for(int i = maparray.length - 1;
+          i >= 0 && keys.size() < versions; i--) {
         MapFile.Reader map = maparray[i];
         synchronized(map) {
           map.reset();
-          
-          // do the priming read
+          // Do the priming read
           ImmutableBytesWritable readval = new ImmutableBytesWritable();
           HStoreKey readkey = (HStoreKey)map.getClosest(origin, readval);
           if (readkey == null) {
@@ -1367,24 +1349,23 @@
             // BEFORE.
             continue;
           }
-          
-          do{
+          do {
             // if the row matches, we might want this one.
             if (rowMatches(origin, readkey)) {
               // if the cell matches, then we definitely want this key.
               if (cellMatches(origin, readkey)) {
-                // store the key if it isn't deleted or superceeded by what's
+                // Store the key if it isn't deleted or superceeded by what's
                 // in the memcache
-                if (!isDeleted(readkey, readval.get(), false, null) &&
-                    !keys.contains(readkey)) {
-                  if (!isExpired(readkey, ttl, now)) {
+                if (!HLogEdit.isDeleted(readval.get())) {
+                  if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) {
                     keys.add(new HStoreKey(readkey));
                   }
-
-                  // if we've collected enough versions, then exit the loop.
                   if (keys.size() >= versions) {
                     break;
                   }
+                } else {
+                  // Is this copy necessary?
+                  deletes.add(new HStoreKey(readkey));
                 }
               } else {
                 // the cell doesn't match, but there might be more with different
@@ -1398,7 +1379,6 @@
           } while (map.next(readkey, readval)); // advance to the next key
         }
       }
-      
       return keys;
     } finally {
       this.lock.readLock().unlock();
@@ -1446,7 +1426,8 @@
         rowAtOrBeforeFromMapFile(maparray[i], row, candidateKeys, deletes);
       }
       // Return the best key from candidateKeys
-      return candidateKeys.isEmpty()? null: candidateKeys.lastKey().getRow();
+      byte [] result = candidateKeys.isEmpty()? null: candidateKeys.lastKey().getRow();
+      return result;
     } finally {
       this.lock.readLock().unlock();
     }
@@ -1527,10 +1508,11 @@
    */
   static boolean notExpiredAndNotInDeletes(final long ttl,
       final HStoreKey hsk, final long now, final Set<HStoreKey> deletes) {
-    return !isExpired(hsk, ttl, now) && !deletes.contains(hsk);
+    return !isExpired(hsk, ttl, now) &&
+      (deletes == null || !deletes.contains(hsk));
   }
   
-  private static boolean isExpired(final HStoreKey hsk, final long ttl,
+  static boolean isExpired(final HStoreKey hsk, final long ttl,
       final long now) {
     boolean result = ttl != HConstants.FOREVER && now > hsk.getTimestamp() + ttl;
     if (result && LOG.isDebugEnabled()) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java Tue Aug 12 21:15:58 2008
@@ -52,7 +52,7 @@
 class Memcache {
   private final Log LOG = LogFactory.getLog(this.getClass().getName());
   
-  private long ttl;
+  private final long ttl;
 
   // Note that since these structures are always accessed with a lock held,
   // so no additional synchronization is required.
@@ -70,16 +70,15 @@
   /**
    * Default constructor. Used for tests.
    */
-  public Memcache()
-  {
-    ttl = HConstants.FOREVER;
+  public Memcache() {
+    this.ttl = HConstants.FOREVER;
   }
 
   /**
    * Constructor.
    * @param ttl The TTL for cache entries, in milliseconds.
    */
-  public Memcache(long ttl) {
+  public Memcache(final long ttl) {
     this.ttl = ttl;
   }
 
@@ -180,16 +179,29 @@
    * @return An array of byte arrays ordered by timestamp.
    */
   List<Cell> get(final HStoreKey key, final int numVersions) {
+    return get(key, numVersions, null, System.currentTimeMillis());
+  }
+  
+  /**
+   * Look back through all the backlog TreeMaps to find the target.
+   * @param key
+   * @param numVersions
+   * @param deletes
+   * @param now
+   * @return An array of byte arrays ordered by timestamp.
+   */
+  List<Cell> get(final HStoreKey key, final int numVersions,
+      final Set<HStoreKey> deletes, final long now) {
     this.lock.readLock().lock();
     try {
       List<Cell> results;
-      // The synchronizations here are because internalGet iterates
+      // The synchronizations here are because the below get iterates
       synchronized (this.memcache) {
-        results = internalGet(this.memcache, key, numVersions);
+        results = get(this.memcache, key, numVersions, deletes, now);
       }
       synchronized (this.snapshot) {
         results.addAll(results.size(),
-          internalGet(this.snapshot, key, numVersions - results.size()));
+          get(this.snapshot, key, numVersions - results.size(), deletes, now));
       }
       return results;
     } finally {
@@ -308,10 +320,7 @@
                   now < itKey.getTimestamp() + ttl) {
               results.put(itCol, new Cell(val, itKey.getTimestamp()));
             } else {
-              victims.add(itKey);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("internalGetFull: " + itKey + ": expired, skipped");
-              }
+              addVictim(victims, itKey);
             }
           }
         }
@@ -399,11 +408,7 @@
               if (deletedOrExpiredRow == null) {
                 deletedOrExpiredRow = new HStoreKey(found_key);
               }
-              victims.add(found_key);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("internalGetRowKeyAtOrBefore:" + found_key +
-                  " expired, skipped");
-              }
+              addVictim(victims, found_key);
             }
           }
         }
@@ -525,7 +530,7 @@
     return new HStoreKey(key.getRow(), key.getColumn());
   }
   
-  /**
+  /*
    * Examine a single map for the desired key.
    *
    * TODO - This is kinda slow.  We need a data structure that allows for 
@@ -534,42 +539,51 @@
    * @param map
    * @param key
    * @param numVersions
+   * @param deletes
    * @return Ordered list of items found in passed <code>map</code>.  If no
    * matching values, returns an empty list (does not return null).
    */
-  private ArrayList<Cell> internalGet(
-      final SortedMap<HStoreKey, byte []> map, final HStoreKey key,
-      final int numVersions) {
-
+  private ArrayList<Cell> get(final SortedMap<HStoreKey, byte []> map,
+      final HStoreKey key, final int numVersions, final Set<HStoreKey> deletes,
+      final long now) {
     ArrayList<Cell> result = new ArrayList<Cell>();
-    
-    // TODO: If get is of a particular version -- numVersions == 1 -- we
-    // should be able to avoid all of the tailmap creations and iterations
-    // below.
-    long now = System.currentTimeMillis();
     List<HStoreKey> victims = new ArrayList<HStoreKey>();
-    SortedMap<HStoreKey, byte []> tailMap = map.tailMap(key);
-    for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
-      HStoreKey itKey = es.getKey();
-      if (itKey.matchesRowCol(key)) {
-        if (!HLogEdit.isDeleted(es.getValue())) { 
-          // Filter out expired results
-          if (ttl == HConstants.FOREVER ||
-                now < itKey.getTimestamp() + ttl) {
-            result.add(new Cell(tailMap.get(itKey), itKey.getTimestamp()));
-            if (numVersions > 0 && result.size() >= numVersions) {
-              break;
+    // Handle special case where only one version wanted.
+    if (numVersions == 1) {
+      byte [] value = map.get(key);
+      if (!isDeleted(value)) {
+        // Filter out expired results
+        if (HStore.notExpiredAndNotInDeletes(ttl, key, now, deletes)) {
+          result.add(new Cell(value, key.getTimestamp()));
+        } else {
+          addVictim(victims, key);
+        }
+      } else {
+        deletes.add(key);
+      }
+    } else {
+      SortedMap<HStoreKey, byte[]> tailMap = map.tailMap(key);
+      for (Map.Entry<HStoreKey, byte[]> es : tailMap.entrySet()) {
+        HStoreKey itKey = es.getKey();
+        if (itKey.matchesRowCol(key)) {
+          if (!isDeleted(es.getValue())) {
+            // Filter out expired results
+            if (HStore.notExpiredAndNotInDeletes(ttl, itKey, now, deletes)) {
+              result.add(new Cell(tailMap.get(itKey), itKey.getTimestamp()));
+              if (numVersions > 0 && result.size() >= numVersions) {
+                break;
+              }
+            } else {
+              addVictim(victims, itKey);
             }
           } else {
-            victims.add(itKey);
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("internalGet: " + itKey + ": expired, skipped");
-            }
+            // Cell holds a delete value.
+            deletes.add(itKey);
           }
+        } else {
+          // By L.N. HBASE-684, map is sorted, so we can't find match any more.
+          break;
         }
-      } else {
-        // By L.N. HBASE-684, map is sorted, so we can't find match any more.
-        break;
       }
     }
     // Remove expired victims from the map.
@@ -579,30 +593,43 @@
     return result;
   }
 
+  /*
+   * Add <code>key</code> to the list of 'victims'.
+   * @param victims
+   * @param key
+   */
+  private void addVictim(final List<HStoreKey> victims, final HStoreKey key) {
+    victims.add(key);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(key + ": expired or in deletes, skipped");
+    }
+  }
+
   /**
    * Get <code>versions</code> keys matching the origin key's
-   * row/column/timestamp and those of an older vintage
-   * Default access so can be accessed out of {@link HRegionServer}.
+   * row/column/timestamp and those of an older vintage.
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @param deletes Accumulating list of deletes
    * @return Ordered list of <code>versions</code> keys going from newest back.
    * @throws IOException
    */
-  List<HStoreKey> getKeys(final HStoreKey origin, final int versions) {
+  List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
+      final Set<HStoreKey> deletes, final long now) {
     this.lock.readLock().lock();
     try {
       List<HStoreKey> results;
       synchronized (memcache) {
-        results = internalGetKeys(this.memcache, origin, versions);
+        results = getKeys(this.memcache, origin, versions, deletes, now);
       }
       synchronized (snapshot) {
-        results.addAll(results.size(), internalGetKeys(snapshot, origin,
+        results.addAll(results.size(), getKeys(snapshot, origin,
             versions == HConstants.ALL_VERSIONS ? versions :
-              (versions - results.size())));
+              (versions - results.size()), deletes, now));
       }
       return results;
-      
     } finally {
       this.lock.readLock().unlock();
     }
@@ -612,21 +639,20 @@
    * @param origin Where to start searching.
    * @param versions How many versions to return. Pass
    * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @param now
+   * @param deletes
    * @return List of all keys that are of the same row and column and of
    * equal or older timestamp.  If no keys, returns an empty List. Does not
    * return null.
    */
-  private List<HStoreKey> internalGetKeys(
-      final SortedMap<HStoreKey, byte []> map, final HStoreKey origin,
-      final int versions) {
-
-    long now = System.currentTimeMillis();
+  private List<HStoreKey> getKeys(final SortedMap<HStoreKey,
+      byte []> map, final HStoreKey origin, final int versions,
+      final Set<HStoreKey> deletes, final long now) {
     List<HStoreKey> result = new ArrayList<HStoreKey>();
     List<HStoreKey> victims = new ArrayList<HStoreKey>();
     SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
     for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
       HStoreKey key = es.getKey();
-  
       // if there's no column name, then compare rows and timestamps
       if (origin.getColumn() != null && origin.getColumn().length == 0) {
         // if the current and origin row don't match, then we can jump
@@ -639,38 +665,34 @@
         if (key.getTimestamp() > origin.getTimestamp()) {
           continue;
         }
-      }
-      else{ // compare rows and columns
+      } else { // compare rows and columns
         // if the key doesn't match the row and column, then we're done, since 
         // all the cells are ordered.
         if (!key.matchesRowCol(origin)) {
           break;
         }
       }
-      if (!HLogEdit.isDeleted(es.getValue())) {
-        if (ttl == HConstants.FOREVER || now < key.getTimestamp() + ttl) {
+      if (!isDeleted(es.getValue())) {
+        if (HStore.notExpiredAndNotInDeletes(this.ttl, key, now, deletes)) {
           result.add(key);
-        } else {
-          victims.add(key);
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("internalGetKeys: " + key + ": expired, skipped");
+          if (versions > 0 && result.size() >= versions) {
+            break;
           }
+        } else {
+          addVictim(victims, key);
         }
-        if (result.size() >= versions) {
-          // We have enough results.  Return.
-          break;
-        }
+      } else {
+        // Delete
+        deletes.add(key);
       }
     }
-
     // Clean expired victims from the map.
-    for (HStoreKey v: victims)
+    for (HStoreKey v: victims) {
        map.remove(v);
-
+    }
     return result;
   }
 
-
   /**
    * @param key
    * @return True if an entry and its content is {@link HGlobals.deleteBytes}.
@@ -678,7 +700,16 @@
    * the cell has been deleted.
    */
   boolean isDeleted(final HStoreKey key) {
-    return HLogEdit.isDeleted(this.memcache.get(key));
+    return isDeleted(this.memcache.get(key)) ||
+      (this.snapshot != null && isDeleted(this.snapshot.get(key)));
+  }
+  
+  /*
+   * @param b Cell value.
+   * @return True if this is a delete value.
+   */
+  private boolean isDeleted(final byte [] b) {
+    return HLogEdit.isDeleted(b);
   }
 
   /**

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java?rev=685432&r1=685431&r2=685432&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java Tue Aug 12 21:15:58 2008
@@ -27,6 +27,7 @@
 
 import org.apache.hadoop.dfs.MiniDFSCluster;
 import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.HTableDescriptor;
@@ -61,6 +62,76 @@
   }
 
   /**
+   * Test for HBASE-808 and HBASE-809.
+   * @throws Exception
+   */
+  public void testMaxVersionsAndDeleting() throws Exception {
+    HRegion region = null;
+    try {
+      HTableDescriptor htd = createTableDescriptor(getName());
+      region = createNewHRegion(htd, null, null);
+      
+      byte [] column = COLUMNS[0];
+      for (int i = 0; i < 100; i++) {
+        addToRow(region, T00, column, i, T00.getBytes());
+      }
+      checkVersions(region, T00, column);
+      // Flush and retry.
+      region.flushcache();
+      checkVersions(region, T00, column);
+      
+      // Now delete all then retry
+      region.deleteAll(Bytes.toBytes(T00), System.currentTimeMillis(), null);
+      Cell [] cells = region.get(Bytes.toBytes(T00), column,
+        HColumnDescriptor.DEFAULT_VERSIONS);
+      assertTrue(cells == null);
+      region.flushcache();
+      cells = region.get(Bytes.toBytes(T00), column,
+          HColumnDescriptor.DEFAULT_VERSIONS);
+      assertTrue(cells == null);
+      
+      // Now add back the rows
+      for (int i = 0; i < 100; i++) {
+        addToRow(region, T00, column, i, T00.getBytes());
+      }
+      // Run same verifications.
+      checkVersions(region, T00, column);
+      // Flush and retry.
+      region.flushcache();
+      checkVersions(region, T00, column);
+    } finally {
+      if (region != null) {
+        try {
+          region.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        region.getLog().closeAndDelete();
+      }
+    }
+  }
+  
+  private void addToRow(final HRegion r, final String row, final byte [] column,
+      final long ts, final byte [] bytes)
+  throws IOException {
+    BatchUpdate batchUpdate = new BatchUpdate(row, ts);
+    batchUpdate.put(column, bytes);
+    r.batchUpdate(batchUpdate, null);
+  }
+
+  private void checkVersions(final HRegion region, final String row,
+      final byte [] column)
+  throws IOException {
+    byte [] r = Bytes.toBytes(row);
+    Cell [] cells = region.get(r, column, 100);
+    assertTrue(cells.length == HColumnDescriptor.DEFAULT_VERSIONS);
+    cells = region.get(r, column, 1);
+    assertTrue(cells.length == 1);
+    cells = region.get(r, column, HConstants.ALL_VERSIONS);
+    assertTrue(cells.length == HColumnDescriptor.DEFAULT_VERSIONS);
+  }
+  
+  /**
    * Test file of multiple deletes and with deletes as final key.
    * @see <a href="https://issues.apache.org/jira/browse/HBASE-751">HBASE-751</a>
    */