You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/26 18:04:05 UTC
svn commit: r1189282 - in /hbase/branches/0.92: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
Author: nspiegelberg
Date: Wed Oct 26 16:04:05 2011
New Revision: 1189282
URL: http://svn.apache.org/viewvc?rev=1189282&view=rev
Log:
HBASE-4645 Edits Log recovery losing data across column families
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1189282&r1=1189281&r2=1189282&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Wed Oct 26 16:04:05 2011
@@ -372,6 +372,7 @@ Release 0.92.0 - Unreleased
HBASE-4670 Fix javadoc warnings
HBASE-4367 Deadlock in MemStore flusher due to JDK internally synchronizing
on current thread
+ HBASE-4645 Edits Log recovery losing data across column families
TESTS
HBASE-4492 TestRollingRestart fails intermittently
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1189282&r1=1189281&r2=1189282&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Wed Oct 26 16:04:05 2011
@@ -396,20 +396,39 @@ public class HRegion implements HeapSize
status.setStatus("Cleaning up temporary data from old regions");
cleanupTmpDir();
- // Load in all the HStores. Get maximum seqid.
+ // Load in all the HStores.
+ // Get minimum of the maxSeqId across all the store.
+ //
+ // Context: During replay we want to ensure that we do not lose any data. So, we
+ // have to be conservative in how we replay logs. For each store, we calculate
+ // the maxSeqId up to which the store was flushed. But, since different stores
+ // could have a different maxSeqId, we choose the
+ // minimum across all the stores.
+ // This could potentially result in duplication of data for stores that are ahead
+ // of others. ColumnTrackers in the ScanQueryMatchers do the de-duplication, so we
+ // do not have to worry.
+ // TODO: If there is a store that was never flushed in a long time, we could replay
+ // a lot of data. Currently, this is not a problem because we flush all the stores at
+ // the same time. If we move to per-cf flushing, we might want to revisit this and send
+ // in a vector of maxSeqIds instead of sending in a single number, which has to be the
+ // min across all the max.
+ long minSeqId = -1;
long maxSeqId = -1;
for (HColumnDescriptor c : this.htableDescriptor.getFamilies()) {
status.setStatus("Instantiating store for column family " + c);
Store store = instantiateHStore(this.tableDir, c);
this.stores.put(c.getName(), store);
long storeSeqId = store.getMaxSequenceId();
- if (storeSeqId > maxSeqId) {
+ if (minSeqId == -1 || storeSeqId < minSeqId) {
+ minSeqId = storeSeqId;
+ }
+ if (maxSeqId == -1 || storeSeqId > maxSeqId) {
maxSeqId = storeSeqId;
}
}
// Recover any edits if available.
- maxSeqId = replayRecoveredEditsIfAny(
- this.regiondir, maxSeqId, reporter, status);
+ maxSeqId = Math.max(maxSeqId, replayRecoveredEditsIfAny(
+ this.regiondir, minSeqId, reporter, status));
status.setStatus("Cleaning up detritus from prior splits");
// Get rid of any splits or merges that were lost in-progress. Clean out
Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java?rev=1189282&r1=1189281&r2=1189282&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java Wed Oct 26 16:04:05 2011
@@ -276,24 +276,20 @@ public class TestWALReplay {
Result result = region.get(g, null);
assertEquals(countPerFamily * htd.getFamilies().size(),
result.size());
- // Now close the region, split the log, reopen the region and assert that
- // replay of log has no effect, that our seqids are calculated correctly so
+ // Now close the region (without flush), split the log, reopen the region and assert that
+ // replay of log has the correct effect, that our seqids are calculated correctly so
// all edits in logs are seen as 'stale'/old.
- region.close();
+ region.close(true);
wal.close();
runWALSplit(this.conf);
HLog wal2 = createWAL(this.conf);
- HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null) {
- @Override
- protected boolean restoreEdit(Store s, KeyValue kv) {
- super.restoreEdit(s, kv);
- throw new RuntimeException("Called when it should not have been!");
- }
- };
+ HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
long seqid2 = region2.initialize();
// HRegionServer usually does this. It knows the largest seqid across all regions.
wal2.setSequenceNumber(seqid2);
assertTrue(seqid + result.size() < seqid2);
+ final Result result1b = region2.get(g, null);
+ assertEquals(result.size(), result1b.size());
// Next test. Add more edits, then 'crash' this region by stealing its wal
// out from under it and assert that replay of the log adds the edits back
@@ -344,6 +340,88 @@ public class TestWALReplay {
}
/**
+ * Test that we recover correctly when there is a failure in between the
+ * flushes. i.e. Some stores got flushed but others did not.
+ *
+ * Unfortunately, there is no easy hook to flush at a store level. The way
+ * we get around this is by flushing at the region level, and then deleting
+ * the recently flushed store file for one of the Stores. This would put us
+ * back in the situation where all but that store got flushed and the region
+ * died.
+ *
+ * We restart Region again, and verify that the edits were replayed.
+ *
+ * @throws IOException
+ * @throws IllegalAccessException
+ * @throws NoSuchFieldException
+ * @throws IllegalArgumentException
+ * @throws SecurityException
+ */
+ @Test
+ public void testReplayEditsAfterPartialFlush()
+ throws IOException, SecurityException, IllegalArgumentException,
+ NoSuchFieldException, IllegalAccessException, InterruptedException {
+ final String tableNameStr = "testReplayEditsWrittenViaHRegion";
+ final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableNameStr);
+ final Path basedir = new Path(this.hbaseRootDir, tableNameStr);
+ deleteDir(basedir);
+ final byte[] rowName = Bytes.toBytes(tableNameStr);
+ final int countPerFamily = 10;
+ final HTableDescriptor htd = createBasic3FamilyHTD(tableNameStr);
+ HRegion region3 = HRegion.createHRegion(hri,
+ hbaseRootDir, this.conf, htd);
+
+ // Write countPerFamily edits into the three families. Do a flush on one
+ // of the families during the load of edits so its seqid is not same as
+ // others to test we do right thing when different seqids.
+ HLog wal = createWAL(this.conf);
+ HRegion region = new HRegion(basedir, wal, this.fs, this.conf, hri, htd, null);
+ long seqid = region.initialize();
+ // HRegionServer usually does this. It knows the largest seqid across all regions.
+ wal.setSequenceNumber(seqid);
+ for (HColumnDescriptor hcd: htd.getFamilies()) {
+ addRegionEdits(rowName, hcd.getName(), countPerFamily, this.ee, region, "x");
+ }
+
+ // Now assert edits made it in.
+ final Get g = new Get(rowName);
+ Result result = region.get(g, null);
+ assertEquals(countPerFamily * htd.getFamilies().size(),
+ result.size());
+
+ // Let us flush the region
+ region.flushcache();
+ region.close(true);
+ wal.close();
+
+ // delete the store files in the second column family to simulate a failure
+ // in between the flushcache();
+ // we have 3 families. killing the middle one ensures that taking the maximum
+ // will make us fail.
+ int cf_count = 0;
+ for (HColumnDescriptor hcd: htd.getFamilies()) {
+ cf_count++;
+ if (cf_count == 2) {
+ this.fs.delete(new Path(region.getRegionDir(), Bytes.toString(hcd.getName()))
+ , true);
+ }
+ }
+
+
+ // Let us try to split and recover
+ runWALSplit(this.conf);
+ HLog wal2 = createWAL(this.conf);
+ HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
+ long seqid2 = region2.initialize();
+ // HRegionServer usually does this. It knows the largest seqid across all regions.
+ wal2.setSequenceNumber(seqid2);
+ assertTrue(seqid + result.size() < seqid2);
+
+ final Result result1b = region2.get(g, null);
+ assertEquals(result.size(), result1b.size());
+ }
+
+ /**
* Create an HRegion with the result of a HLog split and test we only see the
* good edits
* @throws Exception