You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2010/12/18 01:13:13 UTC
svn commit: r1050542 - in /hbase/trunk: ./
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/regionserver/
src/main/java/org/apache/hadoop/hbase/regionserver/wal/
src/main/java/org/apache/hadoop/hbase/replication/regio...
Author: jdcryans
Date: Sat Dec 18 00:13:13 2010
New Revision: 1050542
URL: http://svn.apache.org/viewvc?rev=1050542&view=rev
Log:
HBASE-3366 WALObservers should be notified before the lock
HBASE-3367 Failed log split not retried
HBASE-3370 ReplicationSource.openReader fails to locate HLogs when they
aren't split yet
HBASE-3371 Race in TestReplication can make it fail
HBASE-3372 HRS shouldn't print a full stack for ServerNotRunningException
Added:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java
Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sat Dec 18 00:13:13 2010
@@ -790,6 +790,11 @@ Release 0.90.0 - Unreleased
location in .META. is possible
HBASE-3368 Split message can come in before region opened message; results
in 'Region has been PENDING_CLOSE for too long' cycle
+ HBASE-3366 WALObservers should be notified before the lock
+ HBASE-3367 Failed log split not retried
+ HBASE-3370 ReplicationSource.openReader fails to locate HLogs when they
+ aren't split yet
+ HBASE-3371 Race in TestReplication can make it fail
IMPROVEMENTS
@@ -1274,6 +1279,7 @@ Release 0.90.0 - Unreleased
HBASE-3303 Lower hbase.regionserver.handler.count from 25 back to 10
HBASE-2467 Concurrent flushers in HLog sync using HDFS-895
HBASE-3349 Pass HBase configuration to HttpServer
+ HBASE-3372 HRS shouldn't print a full stack for ServerNotRunningException
NEW FEATURES
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Sat Dec 18 00:13:13 2010
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.regionser
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
+import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@@ -190,7 +191,12 @@ public class MasterFileSystem {
Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
try {
HLogSplitter splitter = HLogSplitter.createLogSplitter(conf);
- splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+ try {
+ splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+ } catch (OrphanHLogAfterSplitException e) {
+ LOG.warn("Retrying splitting because of:", e);
+ splitter.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf);
+ }
splitTime = splitter.getTime();
splitLogSize = splitter.getSize();
} catch (IOException e) {
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Sat Dec 18 00:13:13 2010
@@ -103,6 +103,7 @@ import org.apache.hadoop.hbase.ipc.HMast
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.ipc.Invocation;
import org.apache.hadoop.hbase.ipc.RpcServer;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningException;
import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler;
import org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler;
@@ -1419,7 +1420,13 @@ public class HRegionServer implements HR
masterAddress.getInetSocketAddress(), this.conf, -1,
this.rpcTimeout, this.rpcTimeout);
} catch (IOException e) {
- LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+ e = e instanceof RemoteException ?
+ ((RemoteException)e).unwrapRemoteException() : e;
+ if (e instanceof ServerNotRunningException) {
+ LOG.info("Master isn't available yet, retrying");
+ } else {
+ LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+ }
sleeper.sleep();
}
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Sat Dec 18 00:13:13 2010
@@ -493,6 +493,13 @@ public class HLog implements Syncable {
nextHdfsOut =
((SequenceFileLogWriter)nextWriter).getDFSCOutputStream();
}
+ // Tell our listeners that a new log was created
+ if (!this.listeners.isEmpty()) {
+ for (WALObserver i : this.listeners) {
+ i.logRolled(newPath);
+ }
+ }
+
synchronized (updateLock) {
// Clean up current writer.
Path oldFile = cleanupCurrentWriter(currentFilenum);
@@ -509,12 +516,6 @@ public class HLog implements Syncable {
this.numEntries.set(0);
this.logRollRequested = false;
}
- // Tell our listeners that a new log was created
- if (!this.listeners.isEmpty()) {
- for (WALObserver i : this.listeners) {
- i.logRolled(newPath);
- }
- }
// Can we delete any of the old log files?
if (this.outputfiles.size() > 0) {
if (this.lastSeqWritten.isEmpty()) {
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java Sat Dec 18 00:13:13 2010
@@ -287,7 +287,8 @@ public class HLogSplitter {
}
if (fs.listStatus(srcDir).length > processedLogs.size()
+ corruptedLogs.size()) {
- throw new IOException("Discovered orphan hlog after split. Maybe "
+ throw new OrphanHLogAfterSplitException(
+ "Discovered orphan hlog after split. Maybe the "
+ "HRegionServer was not dead when we started");
}
archiveLogs(corruptedLogs, processedLogs, oldLogDir, fs, conf);
Added: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java?rev=1050542&view=auto
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java (added)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/OrphanHLogAfterSplitException.java Sat Dec 18 00:13:13 2010
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.wal;
+
+import java.io.IOException;
+
+public class OrphanHLogAfterSplitException extends IOException {
+
+ /**
+ * Create this exception without a message
+ */
+ public OrphanHLogAfterSplitException() {
+ super();
+ }
+
+ /**
+ * Create this exception with a message
+ * @param message why it failed
+ */
+ public OrphanHLogAfterSplitException(String message) {
+ super(message);
+ }
+}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java Sat Dec 18 00:13:13 2010
@@ -438,17 +438,20 @@ public class ReplicationSource extends T
// We didn't find the log in the archive directory, look if it still
// exists in the dead RS folder (there could be a chain of failures
// to look at)
- for (int i = this.deadRegionServers.length - 1; i > 0; i--) {
+ LOG.info("NB dead servers : " + deadRegionServers.length);
+ for (int i = this.deadRegionServers.length - 1; i >= 0; i--) {
+
Path deadRsDirectory =
- new Path(this.manager.getLogDir(), this.deadRegionServers[i]);
+ new Path(manager.getLogDir().getParent(), this.deadRegionServers[i]);
Path possibleLogLocation =
new Path(deadRsDirectory, currentPath.getName());
+ LOG.info("Possible location " + possibleLogLocation.toUri().toString());
if (this.manager.getFs().exists(possibleLogLocation)) {
// We found the right new location
LOG.info("Log " + this.currentPath + " still exists at " +
possibleLogLocation);
// Breaking here will make us sleep since reader is null
- break;
+ return true;
}
}
// TODO What happens if the log was missing from every single location?
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Sat Dec 18 00:13:13 2010
@@ -599,6 +599,7 @@ public class HBaseTestingUtility {
Delete del = new Delete(res.getRow());
table.delete(del);
}
+ resScan = table.getScanner(scan);
return table;
}
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java Sat Dec 18 00:13:13 2010
@@ -157,7 +157,7 @@ public class TestHLogSplit {
assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
}
- @Test(expected = IOException.class)
+ @Test(expected = OrphanHLogAfterSplitException.class)
public void testSplitFailsIfNewHLogGetsCreatedAfterSplitStarted()
throws IOException {
AtomicBoolean stop = new AtomicBoolean(false);
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java?rev=1050542&r1=1050541&r2=1050542&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java Sat Dec 18 00:13:13 2010
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.client.Sc
import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.mapreduce.Job;
@@ -126,7 +127,6 @@ public class TestReplication {
utility2.startMiniCluster(2);
HTableDescriptor table = new HTableDescriptor(tableName);
- table.setDeferredLogFlush(false);
HColumnDescriptor fam = new HColumnDescriptor(famName);
fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
table.addFamily(fam);
@@ -153,8 +153,15 @@ public class TestReplication {
*/
@Before
public void setUp() throws Exception {
+
+ // Starting and stopping replication can make us miss new logs,
+ // rolling like this makes sure the most recent one gets added to the queue
+ for ( JVMClusterUtil.RegionServerThread r :
+ utility1.getHBaseCluster().getRegionServerThreads()) {
+ r.getRegionServer().getWAL().rollWriter();
+ }
utility1.truncateTable(tableName);
- // truncating the table will send on Delete per row to the slave cluster
+ // truncating the table will send one Delete per row to the slave cluster
// in an async fashion, which is why we cannot just call truncateTable on
// utility2 since late writes could make it to the slave in some way.
// Instead, we truncate the first table and wait for all the Deletes to
@@ -172,6 +179,7 @@ public class TestReplication {
if (lastCount < res.length) {
i--; // Don't increment timeout if we make progress
}
+ lastCount = res.length;
LOG.info("Still got " + res.length + " rows");
Thread.sleep(SLEEP_TIME);
} else {