You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/07/14 02:27:15 UTC
svn commit: r1146526 - in /hbase/trunk: ./
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/master/handler/
src/main/java/org/apache/hadoop/hbase/regionserver/
src/test/java/org/apache/hadoop/hbase/catalog/ src/test/j...
Author: stack
Date: Thu Jul 14 00:27:14 2011
New Revision: 1146526
URL: http://svn.apache.org/viewvc?rev=1146526&view=rev
Log:
HBASE-3872 Hole in split transaction rollback; edits to .META. need to be rolled back even if it seems like they didn't make it
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java
Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Thu Jul 14 00:27:14 2011
@@ -409,6 +409,8 @@ Release 0.90.4 - Unreleased
HBASE-4033 The shutdown RegionServer could be added to
AssignmentManager.servers again (Jieshan Bean)
HBASE-4088 npes in server shutdown
+ HBASE-3872 Hole in split transaction rollback; edits to .META. need
+ to be rolled back even if it seems like they didn't make it
IMPROVEMENT
HBASE-3882 hbase-config.sh needs to be updated so it can auto-detects the
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java Thu Jul 14 00:27:14 2011
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.client.Re
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
@@ -161,16 +162,15 @@ class CatalogJanitor extends Chore {
* the filesystem.
* @throws IOException
*/
- boolean cleanParent(final HRegionInfo parent,
- Result rowContent)
+ boolean cleanParent(final HRegionInfo parent, Result rowContent)
throws IOException {
boolean result = false;
// Run checks on each daughter split.
- boolean hasReferencesA =
+ Pair<Boolean, Boolean> a =
checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
- boolean hasReferencesB =
+ Pair<Boolean, Boolean> b =
checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
- if (!hasReferencesA && !hasReferencesB) {
+ if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) {
LOG.debug("Deleting region " + parent.getRegionNameAsString() +
" because daughter splits no longer hold references");
// This latter regionOffline should not be necessary but is done for now
@@ -197,14 +197,26 @@ class CatalogJanitor extends Chore {
* @param parent
* @param rowContent
* @param qualifier
- * @return True if this daughter still has references to the parent.
+ * @return A pair where the first boolean says whether or not the daughter
+ * region directory exists in the filesystem and then the second boolean says
+ * whether the daughter has references to the parent.
* @throws IOException
*/
- boolean checkDaughter(final HRegionInfo parent,
+ Pair<Boolean, Boolean> checkDaughter(final HRegionInfo parent,
final Result rowContent, final byte [] qualifier)
throws IOException {
HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier);
- return hasReferences(parent, rowContent, hri, qualifier);
+ Pair<Boolean, Boolean> result =
+ checkDaughterInFs(parent, rowContent, hri, qualifier);
+ if (result.getFirst() && !result.getSecond()) {
+ // Remove daughter from the parent IFF the daughter region exists in FS.
+ // If there is no daughter region in the filesystem, must be because of
+ // a failed split. The ServerShutdownHandler will do the fixup. Don't
+ // do any deletes in here that could intefere with ServerShutdownHandler
+ // fixup
+ removeDaughterFromParent(parent, hri, qualifier);
+ }
+ return result;
}
/**
@@ -242,23 +254,35 @@ class CatalogJanitor extends Chore {
/**
* Checks if a daughter region -- either splitA or splitB -- still holds
* references to parent. If not, removes reference to the split from
- * the parent meta region row so we don't check it any more.
+ * the parent meta region row so we don't check it any more. Also checks
+ * daughter region exists in the filesytem.
* @param parent Parent region name.
* @param rowContent Keyed content of the parent row in meta region.
* @param split Which column family.
* @param qualifier Which of the daughters to look at, splitA or splitB.
- * @return True if still has references to parent.
+ * @return A pair where the first boolean says whether or not the daughter
+ * region directory exists in the filesystem and then the second boolean says
+ * whether the daughter has references to the parent.
* @throws IOException
*/
- boolean hasReferences(final HRegionInfo parent,
+ Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent,
final Result rowContent, final HRegionInfo split,
final byte [] qualifier)
throws IOException {
- boolean result = false;
- if (split == null) return result;
+ boolean references = false;
+ boolean exists = false;
+ if (split == null) {
+ return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
+ }
FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
Path rootdir = this.services.getMasterFileSystem().getRootDir();
Path tabledir = new Path(rootdir, split.getTableNameAsString());
+ Path regiondir = new Path(tabledir, split.getEncodedName());
+ exists = fs.exists(regiondir);
+ if (!exists) {
+ LOG.warn("Daughter regiondir does not exist: " + regiondir.toString());
+ return new Pair<Boolean, Boolean>(exists, Boolean.FALSE);
+ }
HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
@@ -275,18 +299,16 @@ class CatalogJanitor extends Chore {
);
if (ps != null && ps.length > 0) {
- result = true;
+ references = true;
break;
}
}
- if (!result) {
- removeDaughterFromParent(parent, split, qualifier);
- }
- return result;
+ return new Pair<Boolean, Boolean>(Boolean.valueOf(exists),
+ Boolean.valueOf(references));
}
private HTableDescriptor getTableDescriptor(byte[] tableName)
throws TableExistsException, FileNotFoundException, IOException {
return this.services.getTableDescriptors().get(Bytes.toString(tableName));
}
-}
\ No newline at end of file
+}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Thu Jul 14 00:27:14 2011
@@ -237,11 +237,12 @@ public class ServerShutdownHandler exten
*/
static void fixupDaughters(final Result result,
final AssignmentManager assignmentManager,
- final CatalogTracker catalogTracker) throws IOException {
+ final CatalogTracker catalogTracker)
+ throws IOException {
fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
- catalogTracker);
+ catalogTracker);
fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
- catalogTracker);
+ catalogTracker);
}
/**
@@ -282,8 +283,8 @@ public class ServerShutdownHandler exten
}
/**
- * Look for presence of the daughter OR of a split of the daughter. Daughter
- * could have been split over on regionserver before a run of the
+ * Look for presence of the daughter OR of a split of the daughter in .META.
+ * Daughter could have been split over on regionserver before a run of the
* catalogJanitor had chance to clear reference from parent.
* @param daughter Daughter region to search for.
* @throws IOException
@@ -350,4 +351,4 @@ public class ServerShutdownHandler exten
return false;
}
}
-}
+}
\ No newline at end of file
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Jul 14 00:27:14 2011
@@ -1380,7 +1380,8 @@ public class HRegionServer implements HR
public void postOpenDeployTasks(final HRegion r, final CatalogTracker ct,
final boolean daughter)
throws KeeperException, IOException {
- LOG.info("HRS.PostOpenDeployTasks");
+ LOG.info("Post open deploy tasks for region=" + r.getRegionNameAsString() +
+ ", daughter=" + daughter);
// Do checks to see if we need to compact (references or too many files)
for (Store s : r.getStores().values()) {
if (s.hasReferences() || s.needsCompaction()) {
@@ -1393,32 +1394,23 @@ public class HRegionServer implements HR
LOG.info("addToOnlineRegions is done" + r.getRegionInfo());
// Update ZK, ROOT or META
if (r.getRegionInfo().isRootRegion()) {
-
- LOG.info("setRootLocation");
RootLocationEditor.setRootLocation(getZooKeeper(),
this.serverNameFromMasterPOV);
} else if (r.getRegionInfo().isMetaRegion()) {
- LOG.info("updateMetaLocation");
-
MetaEditor.updateMetaLocation(ct, r.getRegionInfo(),
this.serverNameFromMasterPOV);
} else {
- LOG.info("updateMetaLocation 111");
-
if (daughter) {
- LOG.info("updateMetaLocation 22");
-
// If daughter of a split, update whole row, not just location.
MetaEditor.addDaughter(ct, r.getRegionInfo(),
this.serverNameFromMasterPOV);
} else {
- LOG.info("updateMetaLocation 33");
-
MetaEditor.updateRegionLocation(ct, r.getRegionInfo(),
this.serverNameFromMasterPOV);
}
}
- LOG.info("END HRS.PostOpenDeployTasks");
+ LOG.info("Done with post open deploy taks for region=" +
+ r.getRegionNameAsString() + ", daughter=" + daughter);
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java Thu Jul 14 00:27:14 2011
@@ -24,7 +24,6 @@ import java.io.IOException;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.ipc.HBaseRpcMetrics;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import java.util.Set;
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java Thu Jul 14 00:27:14 2011
@@ -62,15 +62,20 @@ class SplitRequest implements Runnable {
st.execute(this.server, this.server);
} catch (Exception e) {
try {
- LOG.info("Running rollback of failed split of " + parent + "; "
- + e.getMessage());
- st.rollback(this.server, this.server);
- LOG.info("Successful rollback of failed split of " + parent);
+ LOG.info("Running rollback/cleanup of failed split of " +
+ parent.getRegionNameAsString() + "; " + e.getMessage());
+ if (st.rollback(this.server, this.server)) {
+ LOG.info("Successful rollback of failed split of " +
+ parent.getRegionNameAsString());
+ } else {
+ this.server.abort("Abort; we got an error after point-of-no-return");
+ }
} catch (RuntimeException ee) {
- // If failed rollback, kill server to avoid having a hole in table.
- LOG.info("Failed rollback of failed split of "
- + parent.getRegionNameAsString() + " -- aborting server", ee);
- this.server.abort("Failed split");
+ String msg = "Failed rollback of failed split of " +
+ parent.getRegionNameAsString() + " -- aborting server";
+ // If failed rollback, kill this server to avoid having a hole in table.
+ LOG.info(msg, ee);
+ this.server.abort(msg);
}
return;
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java Thu Jul 14 00:27:14 2011
@@ -60,18 +60,18 @@ import com.google.common.util.concurrent
/**
* Executes region split as a "transaction". Call {@link #prepare()} to setup
- * the transaction, {@link #execute(OnlineRegions)} to run the transaction and
- * {@link #rollback(OnlineRegions)} to cleanup if execute fails.
+ * the transaction, {@link #execute(Server, RegionServerServices)} to run the
+ * transaction and {@link #rollback(OnlineRegions)} to cleanup if execute fails.
*
* <p>Here is an example of how you would use this class:
* <pre>
* SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
* if (!st.prepare()) return;
* try {
- * st.execute(myOnlineRegions);
+ * st.execute(server, services);
* } catch (IOException ioe) {
* try {
- * st.rollback(myOnlineRegions);
+ * st.rollback(server, services);
* return;
* } catch (RuntimeException e) {
* myAbortable.abort("Failed split, abort");
@@ -101,7 +101,9 @@ public class SplitTransaction {
private final byte [] splitrow;
/**
- * Types to add to the transaction journal
+ * Types to add to the transaction journal.
+ * Each enum is a step in the split transaction. Used to figure how much
+ * we need to rollback.
*/
enum JournalEntry {
/**
@@ -127,7 +129,13 @@ public class SplitTransaction {
/**
* Started in on the creation of the second daughter region.
*/
- STARTED_REGION_B_CREATION
+ STARTED_REGION_B_CREATION,
+ /**
+ * Point of no return.
+ * If we got here, then transaction is not recoverable other than by
+ * crashing out the regionserver.
+ */
+ PONR
}
/*
@@ -137,7 +145,7 @@ public class SplitTransaction {
/**
* Constructor
- * @param services So we can online new servces. If null, we'll skip onlining
+ * @param services So we can online new regions. If null, we'll skip onlining
* (Useful testing).
* @param c Configuration to use running split
* @param r Region to split
@@ -156,7 +164,7 @@ public class SplitTransaction {
*/
public boolean prepare() {
if (this.parent.isClosed() || this.parent.isClosing()) return false;
- // Split key can be false if this region is unsplittable; i.e. has refs.
+ // Split key can be null if this region is unsplittable; i.e. has refs.
if (this.splitrow == null) return false;
HRegionInfo hri = this.parent.getRegionInfo();
parent.prepareToSplit();
@@ -196,13 +204,14 @@ public class SplitTransaction {
/**
* Run the transaction.
- * @param server Hosting server instance.
+ * @param server Hosting server instance. Can be null when testing (won't try
+ * and update in zk if a null server)
* @param services Used to online/offline regions.
- * @throws IOException If thrown, transaction failed. Call {@link #rollback(OnlineRegions)}
+ * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)}
* @return Regions created
* @throws KeeperException
* @throws NodeExistsException
- * @see #rollback(OnlineRegions)
+ * @see #rollback(Server, RegionServerServices)
*/
public PairOfSameType<HRegion> execute(final Server server,
final RegionServerServices services)
@@ -227,7 +236,7 @@ public class SplitTransaction {
this.fileSplitTimeout);
// Set ephemeral SPLITTING znode up in zk. Mocked servers sometimes don't
- // have zookeeper so don't do zk stuff if zookeeper is null
+ // have zookeeper so don't do zk stuff if server or zookeeper is null
if (server != null && server.getZooKeeper() != null) {
try {
this.znodeVersion = createNodeSplitting(server.getZooKeeper(),
@@ -259,13 +268,13 @@ public class SplitTransaction {
}
this.journal.add(JournalEntry.OFFLINED_PARENT);
- // TODO: If the below were multithreaded would we complete steps in less
- // elapsed time? St.Ack 20100920
-
- splitStoreFiles(this.splitdir, hstoreFilesToSplit);
+ // TODO: If splitStoreFiles were multithreaded would we complete steps in
+ // less elapsed time? St.Ack 20100920
+ //
// splitStoreFiles creates daughter region dirs under the parent splits dir
// Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
// clean this up.
+ splitStoreFiles(this.splitdir, hstoreFilesToSplit);
// Log to the journal that we are creating region A, the first daughter
// region. We could fail halfway through. If we do, we could have left
@@ -278,27 +287,38 @@ public class SplitTransaction {
this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices);
- // Edit parent in meta
+ // Edit parent in meta. Offlines parent region and adds splita and splitb.
if (!testing) {
MetaEditor.offlineParentInMeta(server.getCatalogTracker(),
this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo());
}
- // This is the point of no return. We are committed to the split now. We
- // have still the daughter regions to open but meta has been changed.
- // If we fail from here on out, we cannot rollback so, we'll just abort.
- if (!testing) {
+ // This is the point of no return. Adding subsequent edits to .META. as we
+ // do below when we do the daugther opens adding each to .META. can fail in
+ // various interesting ways the most interesting of which is a timeout
+ // BUT the edits all go through (See HBASE-3872). IF we reach the POWR
+ // then subsequent failures need to crash out this regionserver; the
+ // server shutdown processing should be able to fix-up the incomplete split.
+ this.journal.add(JournalEntry.PONR);
// Open daughters in parallel.
- DaughterOpener aOpener = new DaughterOpener(server, services, a);
- DaughterOpener bOpener = new DaughterOpener(server, services, b);
- aOpener.start();
- bOpener.start();
- try {
- aOpener.join();
- bOpener.join();
- } catch (InterruptedException e) {
- server.abort("Exception running daughter opens", e);
- }
+ DaughterOpener aOpener = new DaughterOpener(server, services, a);
+ DaughterOpener bOpener = new DaughterOpener(server, services, b);
+ aOpener.start();
+ bOpener.start();
+ try {
+ aOpener.join();
+ bOpener.join();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Interrupted " + e.getMessage());
+ }
+ if (aOpener.getException() != null) {
+ throw new IOException("Failed " +
+ aOpener.getName(), aOpener.getException());
+ }
+ if (bOpener.getException() != null) {
+ throw new IOException("Failed " +
+ bOpener.Name(), bOpener.getException());
}
// Tell master about split by updating zk. If we fail, abort.
@@ -328,12 +348,10 @@ public class SplitTransaction {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
- server.abort("Failed telling master about split", e);
+ throw new IOException("Failed telling master about split", e);
}
}
-
-
// Coprocessor callback
if (this.parent.getCoprocessorHost() != null) {
this.parent.getCoprocessorHost().postSplit(a,b);
@@ -345,10 +363,15 @@ public class SplitTransaction {
return new PairOfSameType<HRegion>(a, b);
}
+ /*
+ * Open daughter region in its own thread.
+ * If we fail, abort this hosting server.
+ */
class DaughterOpener extends Thread {
private final RegionServerServices services;
private final Server server;
private final HRegion r;
+ private Throwable t = null;
DaughterOpener(final Server s, final RegionServerServices services,
final HRegion r) {
@@ -359,13 +382,20 @@ public class SplitTransaction {
this.r = r;
}
+ /**
+ * @return Null if open succeeded else exception that causes us fail open.
+ * Call it after this thread exits else you may get wrong view on result.
+ */
+ Throwable getException() {
+ return this.t;
+ }
+
@Override
public void run() {
try {
openDaughterRegion(this.server, this.services, r);
} catch (Throwable t) {
- this.server.abort("Failed open of daughter " +
- this.r.getRegionInfo().getRegionNameAsString(), t);
+ this.t = t;
}
}
}
@@ -373,7 +403,7 @@ public class SplitTransaction {
/**
* Open daughter regions, add them to online list and update meta.
* @param server
- * @param services
+ * @param services Can be null when testing.
* @param daughter
* @throws IOException
* @throws KeeperException
@@ -381,20 +411,22 @@ public class SplitTransaction {
void openDaughterRegion(final Server server,
final RegionServerServices services, final HRegion daughter)
throws IOException, KeeperException {
- if (server.isStopped() || services.isStopping()) {
+ boolean stopping = services != null && services.isStopping();
+ if (server.isStopped() || stopping) {
MetaEditor.addDaughter(server.getCatalogTracker(),
daughter.getRegionInfo(), null);
LOG.info("Not opening daughter " +
daughter.getRegionInfo().getRegionNameAsString() +
- " because stopping=" + services.isStopping() + ", stopped=" +
- server.isStopped());
+ " because stopping=" + stopping + ", stopped=" + server.isStopped());
return;
}
HRegionInfo hri = daughter.getRegionInfo();
LoggingProgressable reporter =
new LoggingProgressable(hri, server.getConfiguration());
HRegion r = daughter.openHRegion(reporter);
- services.postOpenDeployTasks(r, server.getCatalogTracker(), true);
+ if (services != null) {
+ services.postOpenDeployTasks(r, server.getCatalogTracker(), true);
+ }
}
static class LoggingProgressable implements CancelableProgressable {
@@ -598,16 +630,19 @@ public class SplitTransaction {
}
/**
- * @param or Object that can online/offline parent region. Can be passed null
- * by unit tests.
- * @return The region we were splitting
+ * @param server Hosting server instance (May be null when testing).
+ * @param services
* @throws IOException If thrown, rollback failed. Take drastic action.
+ * @return True if we successfully rolled back, false if we got to the point
+ * of no return and so now need to abort the server to minimize damage.
*/
- public void rollback(final Server server, final OnlineRegions or)
+ public boolean rollback(final Server server, final RegionServerServices services)
throws IOException {
+ boolean result = true;
FileSystem fs = this.parent.getFilesystem();
ListIterator<JournalEntry> iterator =
this.journal.listIterator(this.journal.size());
+ // Iterate in reverse.
while (iterator.hasPrevious()) {
JournalEntry je = iterator.previous();
switch(je) {
@@ -642,13 +677,19 @@ public class SplitTransaction {
break;
case OFFLINED_PARENT:
- if (or != null) or.addToOnlineRegions(this.parent);
+ if (services != null) services.addToOnlineRegions(this.parent);
break;
+ case PONR:
+ // We got to the point-of-no-return so we need to just abort. Return
+ // immediately.
+ return false;
+
default:
throw new RuntimeException("Unhandled journal entry: " + je);
}
}
+ return result;
}
HRegionInfo getFirstDaughter() {
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java Thu Jul 14 00:27:14 2011
@@ -113,7 +113,7 @@ public class TestCatalogTracker {
@Test public void testThatIfMETAMovesWeAreNotified()
throws IOException, InterruptedException, KeeperException {
HConnection connection = Mockito.mock(HConnection.class);
- final CatalogTracker ct = constructAndStartCatalogTracker(connection);
+ constructAndStartCatalogTracker(connection);
try {
RootLocationEditor.setRootLocation(this.watcher,
new ServerName("example.com", 1234, System.currentTimeMillis()));
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java Thu Jul 14 00:27:14 2011
@@ -293,6 +293,14 @@ public class TestCatalogJanitor {
assertFalse(janitor.cleanParent(parent, r));
// Remove the reference file and try again.
assertTrue(fs.delete(p, true));
+ // We will fail!!! Because split b is empty, which is right... we should
+ // not remove parent if daughters do not exist in fs.
+ assertFalse(janitor.cleanParent(parent, r));
+ // Put in place daughter dir for b... that should make it so parent gets
+ // cleaned up.
+ storedir = Store.getStoreHomedir(tabledir, splitb.getEncodedName(),
+ htd.getColumnFamilies()[0].getName());
+ assertTrue(fs.mkdirs(storedir));
assertTrue(janitor.cleanParent(parent, r));
}
}
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java?rev=1146526&r1=1146525&r2=1146526&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java Thu Jul 14 00:27:14 2011
@@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.Sc
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -84,6 +85,46 @@ public class TestSplitTransaction {
this.fs.delete(this.testdir, true);
}
+ @Test public void testFailAfterPONR() throws IOException, KeeperException {
+ final int rowcount = TEST_UTIL.loadRegion(this.parent, CF);
+ assertTrue(rowcount > 0);
+ int parentRowCount = countRows(this.parent);
+ assertEquals(rowcount, parentRowCount);
+
+ // Start transaction.
+ SplitTransaction st = prepareGOOD_SPLIT_ROW();
+ SplitTransaction spiedUponSt = spy(st);
+ Mockito.doThrow(new MockedFailedDaughterOpen()).
+ when(spiedUponSt).openDaughterRegion((Server)Mockito.anyObject(),
+ (RegionServerServices)Mockito.anyObject(), (HRegion)Mockito.anyObject());
+
+ // Run the execute. Look at what it returns.
+ boolean expectedException = false;
+ Server mockServer = Mockito.mock(Server.class);
+ when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
+ try {
+ spiedUponSt.execute(mockServer, null);
+ } catch (IOException e) {
+ if (e.getCause() != null &&
+ e.getCause() instanceof MockedFailedDaughterOpen) {
+ expectedException = true;
+ }
+ }
+ assertTrue(expectedException);
+ // Run rollback returns that we should restart.
+ assertFalse(spiedUponSt.rollback(null, null));
+ // Make sure that region a and region b are still in the filesystem, that
+ // they have not been removed; this is supposed to be the case if we go
+ // past point of no return.
+ Path tableDir = this.parent.getRegionDir().getParent();
+ Path daughterADir =
+ new Path(tableDir, spiedUponSt.getFirstDaughter().getEncodedName());
+ Path daughterBDir =
+ new Path(tableDir, spiedUponSt.getSecondDaughter().getEncodedName());
+ assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterADir));
+ assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterBDir));
+ }
+
/**
* Test straight prepare works. Tries to split on {@link #GOOD_SPLIT_ROW}
* @throws IOException
@@ -190,7 +231,7 @@ public class TestSplitTransaction {
}
assertTrue(expectedException);
// Run rollback
- spiedUponSt.rollback(null, null);
+ assertTrue(spiedUponSt.rollback(null, null));
// Assert I can scan parent.
int parentRowCount2 = countRows(this.parent);
@@ -228,6 +269,7 @@ public class TestSplitTransaction {
*/
@SuppressWarnings("serial")
private class MockedFailedDaughterCreation extends IOException {}
+ private class MockedFailedDaughterOpen extends IOException {}
private int countRows(final HRegion r) throws IOException {
int rowcount = 0;