You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/06/21 02:37:49 UTC
svn commit: r670104 - in /hadoop/hbase/branches/0.1: CHANGES.txt
src/java/org/apache/hadoop/hbase/HStore.java
src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java
Author: jimk
Date: Fri Jun 20 17:37:49 2008
New Revision: 670104
URL: http://svn.apache.org/viewvc?rev=670104&view=rev
Log:
HBASE-613 Timestamp-anchored scanning fails to find all records
Added:
hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java
Modified:
hadoop/hbase/branches/0.1/CHANGES.txt
hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=670104&r1=670103&r2=670104&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Fri Jun 20 17:37:49 2008
@@ -23,7 +23,7 @@
reading performance after break it (LN via Stack)
HBASE-686 MemcacheScanner didn't return the first row(if it exists),
because HScannerInterface's output incorrect (LN via Jim Kellerman)
-
+ HBASE-613 Timestamp-anchored scanning fails to find all records
Release 0.1.2 - 05/13/2008
Modified: hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java?rev=670104&r1=670103&r2=670104&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java (original)
+++ hadoop/hbase/branches/0.1/src/java/org/apache/hadoop/hbase/HStore.java Fri Jun 20 17:37:49 2008
@@ -271,36 +271,49 @@
* The returned object should map column names to byte arrays (byte[]).
* @param key
* @param results
+ * @return most recent timestamp found
*/
- void getFull(HStoreKey key, Map<Text, Long> deletes,
+ long getFull(HStoreKey key, Map<Text, Long> deletes,
SortedMap<Text, byte[]> results) {
+ long rowtime = -1L;
this.mc_lock.readLock().lock();
try {
synchronized (mc) {
- internalGetFull(mc, key, deletes, results);
+ long ts = internalGetFull(mc, key, deletes, results);
+ if (ts != HConstants.LATEST_TIMESTAMP && ts > rowtime) {
+ rowtime = ts;
+ }
}
synchronized (snapshot) {
- internalGetFull(snapshot, key, deletes, results);
+ long ts = internalGetFull(snapshot, key, deletes, results);
+ if (ts != HConstants.LATEST_TIMESTAMP && ts > rowtime) {
+ rowtime = ts;
+ }
}
-
+ return rowtime;
} finally {
this.mc_lock.readLock().unlock();
}
}
- private void internalGetFull(SortedMap<HStoreKey, byte []> map, HStoreKey key,
+ private long internalGetFull(SortedMap<HStoreKey, byte []> map, HStoreKey key,
Map<Text, Long> deletes, SortedMap<Text, byte []> results) {
if (map.isEmpty() || key == null) {
- return;
+ return -1L;
}
+ long rowtime = -1L;
SortedMap<HStoreKey, byte []> tailMap = map.tailMap(key);
for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
HStoreKey itKey = es.getKey();
Text itCol = itKey.getColumn();
if (results.get(itCol) == null && key.matchesWithoutColumn(itKey)) {
+ if (itKey.getTimestamp() != HConstants.LATEST_TIMESTAMP &&
+ itKey.getTimestamp() > rowtime) {
+ rowtime = itKey.getTimestamp();
+ }
byte [] val = tailMap.get(itKey);
if (HLogEdit.isDeleted(val)) {
@@ -316,6 +329,7 @@
break;
}
}
+ return rowtime;
}
/**
@@ -631,6 +645,7 @@
if (results.size() > 0) {
results.clear();
}
+ long ts = -1L;
while (results.size() <= 0 && this.currentRow != null) {
if (deletes.size() > 0) {
deletes.clear();
@@ -640,7 +655,7 @@
}
key.setRow(this.currentRow);
key.setVersion(this.timestamp);
- getFull(key, deletes, rowResults);
+ ts = getFull(key, deletes, rowResults);
for (Text column: deletes.keySet()) {
rowResults.put(column, HLogEdit.deleteBytes.get());
}
@@ -662,6 +677,12 @@
}
this.currentRow = getNextRow(this.currentRow);
}
+ // Set the timestamp to the largest one for the row if we would otherwise
+ // return HConstants.LATEST_TIMESTAMP
+ if (key.getTimestamp() == HConstants.LATEST_TIMESTAMP &&
+ ts != -1L) {
+ key.setVersion(ts);
+ }
return results.size() > 0;
}
@@ -2512,7 +2533,7 @@
// Advance the readers to the first pos.
for (i = 0; i < sfsReaders.length; i++) {
keys[i] = new HStoreKey();
- if (firstRow.getLength() != 0) {
+ if (firstRow != null && firstRow.getLength() != 0) {
if (findFirstRow(i, firstRow)) {
continue;
}
@@ -2561,7 +2582,6 @@
if (viableRow.getRow() != null) {
key.setRow(viableRow.getRow());
key.setVersion(viableRow.getTimestamp());
- key.setColumn(new Text(""));
for (int i = 0; i < keys.length; i++) {
// Fetch the data
while ((keys[i] != null)
@@ -2612,9 +2632,21 @@
Text viableRow = null;
long viableTimestamp = -1;
for(int i = 0; i < keys.length; i++) {
+ // The first key that we find that matches may have a timestamp greater
+ // than the one we're looking for. We have to advance to see if there
+ // is an older version present, since timestamps are sorted descending
+ while (keys[i] != null &&
+ keys[i].getTimestamp() > this.timestamp &&
+ columnMatch(i) &&
+ getNext(i)) {
+ if (columnMatch(i)) {
+ break;
+ }
+ }
if((keys[i] != null)
- && (columnMatch(i))
- && (keys[i].getTimestamp() <= this.timestamp)
+ // If we get here and keys[i] is not null, we already know that the
+ // column matches and the timestamp of the row is less than or equal
+ // to this.timestamp, so we do not need to test that here
&& ((viableRow == null)
|| (keys[i].getRow().compareTo(viableRow) < 0)
|| ((keys[i].getRow().compareTo(viableRow) == 0)
Added: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java?rev=670104&view=auto
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java (added)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestScanMultipleVersions.java Fri Jun 20 17:37:49 2008
@@ -0,0 +1,242 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import java.util.TreeMap;
+
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+
+import org.apache.hadoop.hbase.io.BatchUpdate;
+
+/**
+ * Regression test for HBASE-613
+ */
+public class TestScanMultipleVersions extends HBaseTestCase {
+ private final Text TABLE_NAME = new Text("TestScanMultipleVersions");
+ private final HRegionInfo[] INFOS = new HRegionInfo[2];
+ private final HRegion[] REGIONS = new HRegion[2];
+ // Row keys
+ private final Text[] ROWS = new Text[] {
+ new Text("row_0200"),
+ new Text("row_0800")
+ };
+
+ private final long[] TIMESTAMPS = new long[] {
+ 100L,
+ 1000L
+ };
+ private final Random rand = new Random();
+ private HTableDescriptor desc = null;
+ private Path rootdir = null;
+ private MiniDFSCluster dfsCluster = null;
+
+ /** {@inheritDoc} */
+ @Override
+ public void setUp() throws Exception {
+ // Create table description
+
+ this.desc = new HTableDescriptor(TABLE_NAME.toString());
+ this.desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY_STR));
+
+ // Region 0 will contain the key range [,row_0500)
+ INFOS[0] = new HRegionInfo(this.desc, HConstants.EMPTY_START_ROW,
+ new Text("row_0500"));
+ // Region 1 will contain the key range [row_0500,)
+ INFOS[1] = new HRegionInfo(this.desc, new Text("row_0500"),
+ HConstants.EMPTY_TEXT);
+
+ // start HDFS
+ dfsCluster = new MiniDFSCluster(conf, 2, true, (String[])null);
+ try {
+ // Set the hbase.rootdir to be the home directory in mini dfs.
+ this.conf.set(HConstants.HBASE_DIR,
+ dfsCluster.getFileSystem().getHomeDirectory().toString());
+ fs = dfsCluster.getFileSystem();
+ this.rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+ fs.mkdirs(this.rootdir);
+
+ // Create root region
+ HRegion root = HRegion.createHRegion(HRegionInfo.rootRegionInfo,
+ this.rootdir, this.conf);
+ // Create meta region
+ HRegion meta = HRegion.createHRegion(HRegionInfo.firstMetaRegionInfo,
+ this.rootdir, this.conf);
+ // Insert meta into root region
+ HRegion.addRegionToMETA(root, meta);
+ // Create the regions
+ for (int i = 0; i < REGIONS.length; i++) {
+ REGIONS[i] =
+ HRegion.createHRegion(this.INFOS[i], this.rootdir, this.conf);
+ // Insert data
+ for (int j = 0; j < TIMESTAMPS.length; j++) {
+ BatchUpdate b = new BatchUpdate(rand.nextLong());
+ long id = b.startUpdate(ROWS[i]);
+ b.put(id, HConstants.COLUMN_FAMILY, toBytes(TIMESTAMPS[j]));
+ REGIONS[i].batchUpdate(TIMESTAMPS[j], b);
+ }
+ // Insert the region we created into the meta
+ HRegion.addRegionToMETA(meta, REGIONS[i]);
+ // Close region
+ REGIONS[i].close();
+ REGIONS[i].getLog().closeAndDelete();
+ }
+
+ // Close root and meta regions
+ root.close();
+ root.getLog().closeAndDelete();
+ meta.close();
+ meta.getLog().closeAndDelete();
+ // Call super.Setup last. Otherwise we get a local file system.
+ super.setUp();
+ } catch (Exception e) {
+ if (dfsCluster != null) {
+ StaticTestEnvironment.shutdownDfs(dfsCluster);
+ dfsCluster = null;
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * @throws Exception
+ */
+ public void testScanMultipleVersions() throws Exception {
+ // Now start HBase
+ MiniHBaseCluster cluster = new MiniHBaseCluster(conf, 1, dfsCluster, false);
+ try {
+ // At this point we have created multiple regions and both HDFS and HBase
+ // are running. There are 5 cases we have to test. Each is described below.
+
+ HTable t = new HTable(conf, TABLE_NAME);
+
+ // Case 1: scan with LATEST_TIMESTAMP. Should get two rows
+
+ int count = 0;
+ HScannerInterface s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+ HConstants.EMPTY_START_ROW);
+ try {
+ HStoreKey key = new HStoreKey();
+ TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+ while (s.next(key, results)) {
+ count += 1;
+ }
+ assertEquals("Number of rows should be 2", 2, count);
+ } finally {
+ s.close();
+ }
+
+ // Case 2: Scan with a timestamp greater than most recent timestamp
+ // (in this case > 1000 and < LATEST_TIMESTAMP. Should get 2 rows.
+
+ count = 0;
+ s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+ HConstants.EMPTY_START_ROW, 10000L);
+ try {
+ HStoreKey key = new HStoreKey();
+ TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+ while (s.next(key, results)) {
+ count += 1;
+ }
+ assertEquals("Number of rows should be 2", 2, count);
+ } finally {
+ s.close();
+ }
+
+ // Case 3: scan with timestamp equal to most recent timestamp
+ // (in this case == 1000. Should get 2 rows.
+
+ count = 0;
+ s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+ HConstants.EMPTY_START_ROW, 1000L);
+ try {
+ HStoreKey key = new HStoreKey();
+ TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+ while (s.next(key, results)) {
+ count += 1;
+ }
+ assertEquals("Number of rows should be 2", 2, count);
+ } finally {
+ s.close();
+ }
+
+ // Case 4: scan with timestamp greater than first timestamp but less than
+ // second timestamp (100 < timestamp < 1000). Should get 2 rows.
+
+ count = 0;
+ s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+ HConstants.EMPTY_START_ROW, 500L);
+ try {
+ HStoreKey key = new HStoreKey();
+ TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+ while (s.next(key, results)) {
+ count += 1;
+ }
+ assertEquals("Number of rows should be 2", 2, count);
+ } finally {
+ s.close();
+ }
+
+ // Case 5: scan with timestamp equal to first timestamp (100)
+ // Should get 2 rows.
+
+ count = 0;
+ s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
+ HConstants.EMPTY_START_ROW, 100L);
+ try {
+ HStoreKey key = new HStoreKey();
+ TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+ while (s.next(key, results)) {
+ count += 1;
+ }
+ assertEquals("Number of rows should be 2", 2, count);
+ } finally {
+ s.close();
+ }
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public void tearDown() throws Exception {
+ if (dfsCluster != null) {
+ StaticTestEnvironment.shutdownDfs(dfsCluster);
+ dfsCluster = null;
+ }
+ super.tearDown();
+ }
+
+ /*
+ * Convert a long value to a byte array
+ * @param val
+ * @return the byte array
+ */
+ private static byte[] toBytes(final long val) {
+ ByteBuffer bb = ByteBuffer.allocate(Long.SIZE/Byte.SIZE);
+ bb.putLong(val);
+ return bb.array();
+ }
+}