You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/06/21 02:37:49 UTC

svn commit: r670104 - in /hadoop/hbase/branches/0.1: CHANGES.txt src/java/org/apache/hadoop/hbase/HStore.java src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java

Author: jimk
Date: Fri Jun 20 17:37:49 2008
New Revision: 670104

URL: http://svn.apache.org/viewvc?rev=670104&view=rev
Log:
HBASE-613   Timestamp-anchored scanning fails to find all records

Added:
    hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java
Modified:
    hadoop/hbase/branches/0.1/CHANGES.txt
    hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java

Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=670104&r1=670103&r2=670104&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Fri Jun 20 17:37:49 2008
@@ -23,7 +23,7 @@
                reading performance after break it (LN via Stack)
    HBASE-686   MemcacheScanner didn't return the first row(if it exists),
                because HScannerInterface's output incorrect (LN via Jim Kellerman)
-
+   HBASE-613   Timestamp-anchored scanning fails to find all records
 
 Release 0.1.2 - 05/13/2008
 

Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java?rev=670104&r1=670103&r2=670104&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java Fri Jun 20 17:37:49 2008
@@ -271,36 +271,49 @@
      * The returned object should map column names to byte arrays (byte[]).
      * @param key
      * @param results
+     * @return most recent timestamp found
      */
-    void getFull(HStoreKey key, Map<Text, Long> deletes, 
+    long getFull(HStoreKey key, Map<Text, Long> deletes, 
       SortedMap<Text, byte[]> results) {
+      long rowtime = -1L;
       
       this.mc_lock.readLock().lock();
       try {
         synchronized (mc) {
-          internalGetFull(mc, key, deletes, results);
+          long ts = internalGetFull(mc, key, deletes, results);
+          if (ts != HConstants.LATEST_TIMESTAMP && ts > rowtime) {
+            rowtime = ts;
+          }
         }
         synchronized (snapshot) {
-          internalGetFull(snapshot, key, deletes, results);
+          long ts = internalGetFull(snapshot, key, deletes, results);
+          if (ts != HConstants.LATEST_TIMESTAMP && ts > rowtime) {
+            rowtime = ts;
+          }
         }
-
+        return rowtime;
       } finally {
         this.mc_lock.readLock().unlock();
       }
     }
 
-    private void internalGetFull(SortedMap<HStoreKey, byte []> map, HStoreKey key, 
+    private long internalGetFull(SortedMap<HStoreKey, byte []> map, HStoreKey key, 
       Map<Text, Long> deletes, SortedMap<Text, byte []> results) {
 
       if (map.isEmpty() || key == null) {
-        return;
+        return -1L;
       }
 
+      long rowtime = -1L;
       SortedMap<HStoreKey, byte []> tailMap = map.tailMap(key);
       for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
         HStoreKey itKey = es.getKey();
         Text itCol = itKey.getColumn();
         if (results.get(itCol) == null && key.matchesWithoutColumn(itKey)) {
+          if (itKey.getTimestamp() != HConstants.LATEST_TIMESTAMP &&
+              itKey.getTimestamp() > rowtime) {
+            rowtime = itKey.getTimestamp();
+          }
           byte [] val = tailMap.get(itKey);
 
           if (HLogEdit.isDeleted(val)) {
@@ -316,6 +329,7 @@
           break;
         }
       }
+      return rowtime;
     }
 
     /**
@@ -631,6 +645,7 @@
        if (results.size() > 0) {
          results.clear();
        }
+       long ts = -1L;
        while (results.size() <= 0 && this.currentRow != null) {
          if (deletes.size() > 0) {
            deletes.clear();
@@ -640,7 +655,7 @@
          }
          key.setRow(this.currentRow);
          key.setVersion(this.timestamp);
-         getFull(key, deletes, rowResults);
+         ts = getFull(key, deletes, rowResults);
          for (Text column: deletes.keySet()) {
            rowResults.put(column, HLogEdit.deleteBytes.get());
          }
@@ -662,6 +677,12 @@
          }
          this.currentRow = getNextRow(this.currentRow);
        }
+       // Set the timestamp to the largest one for the row if we would otherwise
+       // return HConstants.LATEST_TIMESTAMP
+       if (key.getTimestamp() == HConstants.LATEST_TIMESTAMP &&
+           ts != -1L) {
+         key.setVersion(ts);
+       }
        return results.size() > 0;
      }
       
@@ -2512,7 +2533,7 @@
      // Advance the readers to the first pos.
      for (i = 0; i < sfsReaders.length; i++) {
        keys[i] = new HStoreKey();
-       if (firstRow.getLength() != 0) {
+       if (firstRow != null && firstRow.getLength() != 0) {
          if (findFirstRow(i, firstRow)) {
            continue;
          }
@@ -2561,7 +2582,6 @@
        if (viableRow.getRow() != null) {
          key.setRow(viableRow.getRow());
          key.setVersion(viableRow.getTimestamp());
-         key.setColumn(new Text(""));
          for (int i = 0; i < keys.length; i++) {
            // Fetch the data
            while ((keys[i] != null)
@@ -2612,9 +2632,21 @@
      Text viableRow = null;
      long viableTimestamp = -1;
      for(int i = 0; i < keys.length; i++) {
+       // The first key that we find that matches may have a timestamp greater
+       // than the one we're looking for. We have to advance to see if there
+       // is an older version present, since timestamps are sorted descending
+       while (keys[i] != null &&
+           keys[i].getTimestamp() > this.timestamp &&
+           columnMatch(i) &&
+           getNext(i)) {
+         if (columnMatch(i)) {
+           break;
+         }
+       }
        if((keys[i] != null)
-           && (columnMatch(i))
-           && (keys[i].getTimestamp() <= this.timestamp)
+           // If we get here and keys[i] is not null, we already know that the
+           // column matches and the timestamp of the row is less than or equal
+           // to this.timestamp, so we do not need to test that here
            && ((viableRow == null)
                || (keys[i].getRow().compareTo(viableRow) < 0)
                || ((keys[i].getRow().compareTo(viableRow) == 0)

Added: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java?rev=670104&view=auto
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java (added)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java Fri Jun 20 17:37:49 2008
@@ -0,0 +1,242 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import java.util.TreeMap;
+
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+
+import org.apache.hadoop.hbase.io.BatchUpdate;
+
+/**
+ * Regression test for HBASE-613
+ */
+public class TestScanMultipleVersions extends HBaseTestCase {
+  private final Text TABLE_NAME = new Text("TestScanMultipleVersions");
+  private final HRegionInfo[] INFOS = new HRegionInfo[2];
+  private final HRegion[] REGIONS = new HRegion[2];
+  // Row keys
+  private final Text[] ROWS = new Text[] {
+      new Text("row_0200"),
+      new Text("row_0800")
+  };
+
+  private final long[] TIMESTAMPS = new long[] {
+      100L,
+      1000L
+  };
+  private final Random rand = new Random();
+  private HTableDescriptor desc = null;
+  private Path rootdir = null;
+  private MiniDFSCluster dfsCluster = null;
+
+  /** {@inheritDoc} */
+  @Override
+  public void setUp() throws Exception {
+    // Create table description
+
+    this.desc = new HTableDescriptor(TABLE_NAME.toString());
+    this.desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY_STR));
+
+    // Region 0 will contain the key range [,row_0500)
+    INFOS[0] = new HRegionInfo(this.desc, HConstants.EMPTY_START_ROW,
+        new Text("row_0500"));
+    // Region 1 will contain the key range [row_0500,)
+    INFOS[1] = new HRegionInfo(this.desc, new Text("row_0500"),
+          HConstants.EMPTY_TEXT);
+    
+    // start HDFS
+    dfsCluster = new MiniDFSCluster(conf, 2, true, (String[])null);
+    try {
+      // Set the hbase.rootdir to be the home directory in mini dfs.
+      this.conf.set(HConstants.HBASE_DIR, 
+          dfsCluster.getFileSystem().getHomeDirectory().toString());
+      fs = dfsCluster.getFileSystem();
+      this.rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+      fs.mkdirs(this.rootdir);
+
+      // Create root region
+      HRegion root = HRegion.createHRegion(HRegionInfo.rootRegionInfo,
+          this.rootdir, this.conf);
+      // Create meta region
+      HRegion meta = HRegion.createHRegion(HRegionInfo.firstMetaRegionInfo,
+          this.rootdir, this.conf);
+      // Insert meta into root region
+      HRegion.addRegionToMETA(root, meta);
+      // Create the regions
+      for (int i = 0; i < REGIONS.length; i++) {
+        REGIONS[i] =
+          HRegion.createHRegion(this.INFOS[i], this.rootdir, this.conf);
+        // Insert data
+        for (int j = 0; j < TIMESTAMPS.length; j++) {
+          BatchUpdate b = new BatchUpdate(rand.nextLong());
+          long id = b.startUpdate(ROWS[i]);
+          b.put(id, HConstants.COLUMN_FAMILY, toBytes(TIMESTAMPS[j]));
+          REGIONS[i].batchUpdate(TIMESTAMPS[j], b);
+        }
+        // Insert the region we created into the meta
+        HRegion.addRegionToMETA(meta, REGIONS[i]);
+        // Close region
+        REGIONS[i].close();
+        REGIONS[i].getLog().closeAndDelete();
+      }
+
+      // Close root and meta regions
+      root.close();
+      root.getLog().closeAndDelete();
+      meta.close();
+      meta.getLog().closeAndDelete();
+      // Call super.Setup last. Otherwise we get a local file system.
+      super.setUp();
+    } catch (Exception e) {
+      if (dfsCluster != null) {
+        StaticTestEnvironment.shutdownDfs(dfsCluster);
+        dfsCluster = null;
+      }
+      throw e;
+    }
+  }
+
+  /**
+   * @throws Exception
+   */
+  public void testScanMultipleVersions() throws Exception {
+    // Now start HBase
+    MiniHBaseCluster cluster = new MiniHBaseCluster(conf, 1, dfsCluster, false);
+    try {
+      // At this point we have created multiple regions and both HDFS and HBase
+      // are running. There are 5 cases we have to test. Each is described below.
+
+      HTable t = new HTable(conf, TABLE_NAME);
+
+      // Case 1: scan with LATEST_TIMESTAMP. Should get two rows
+
+      int count = 0;
+      HScannerInterface s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+          HConstants.EMPTY_START_ROW);
+      try {
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        while (s.next(key, results)) {
+          count += 1;
+        }
+        assertEquals("Number of rows should be 2", 2, count);
+      } finally {
+        s.close();
+      }
+
+      // Case 2: Scan with a timestamp greater than most recent timestamp
+      // (in this case > 1000 and < LATEST_TIMESTAMP. Should get 2 rows.
+
+      count = 0;
+      s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+          HConstants.EMPTY_START_ROW, 10000L);
+      try {
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        while (s.next(key, results)) {
+          count += 1;
+        }
+        assertEquals("Number of rows should be 2", 2, count);
+      } finally {
+        s.close();
+      }
+
+      // Case 3: scan with timestamp equal to most recent timestamp
+      // (in this case == 1000. Should get 2 rows.
+
+      count = 0;
+      s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+          HConstants.EMPTY_START_ROW, 1000L);
+      try {
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        while (s.next(key, results)) {
+          count += 1;
+        }
+        assertEquals("Number of rows should be 2", 2, count);
+      } finally {
+        s.close();
+      }
+
+      // Case 4: scan with timestamp greater than first timestamp but less than
+      // second timestamp (100 < timestamp < 1000). Should get 2 rows.
+
+      count = 0;
+      s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+          HConstants.EMPTY_START_ROW, 500L);
+      try {
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        while (s.next(key, results)) {
+          count += 1;
+        }
+        assertEquals("Number of rows should be 2", 2, count);
+      } finally {
+        s.close();
+      }
+
+      // Case 5: scan with timestamp equal to first timestamp (100)
+      // Should get 2 rows.
+
+      count = 0;
+      s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+          HConstants.EMPTY_START_ROW, 100L);
+      try {
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        while (s.next(key, results)) {
+          count += 1;
+        }
+        assertEquals("Number of rows should be 2", 2, count);
+      } finally {
+        s.close();
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void tearDown() throws Exception {
+    if (dfsCluster != null) {
+      StaticTestEnvironment.shutdownDfs(dfsCluster);
+      dfsCluster = null;
+    }
+    super.tearDown();
+  }
+  
+  /*
+   * Convert a long value to a byte array
+   * @param val
+   * @return the byte array
+   */
+  private static byte[] toBytes(final long val) {
+    ByteBuffer bb = ByteBuffer.allocate(Long.SIZE/Byte.SIZE);
+    bb.putLong(val);
+    return bb.array();
+  }
+}