You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by to...@apache.org on 2011/08/30 21:27:24 UTC
svn commit: r1163345 - in /hbase/trunk: ./
src/main/java/org/apache/hadoop/hbase/ipc/
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/monitoring/
src/main/java/org/apache/hadoop/hbase/regionserver/ src/test/java/org/...
Author: todd
Date: Tue Aug 30 19:27:24 2011
New Revision: 1163345
URL: http://svn.apache.org/viewvc?rev=1163345&view=rev
Log:
HBASE-4275 RS should communicate fatal "aborts" back to the master
Added:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Tue Aug 30 19:27:24 2011
@@ -422,6 +422,7 @@ Release 0.91.0 - Unreleased
HBASE-4291 Improve display of regions in transition in UI to be more
readable (todd)
HBASE-4281 Add facility to dump current state of all executors (todd)
+ HBASE-4275 RS should communicate fatal "aborts" back to the master (todd)
TASKS
HBASE-3559 Move report of split to master OFF the heartbeat channel
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java Tue Aug 30 19:27:24 2011
@@ -61,4 +61,12 @@ public interface HMasterRegionInterface
*/
public void regionServerReport(byte [] sn, HServerLoad hsl)
throws IOException;
-}
\ No newline at end of file
+
+ /**
+ * Called by a region server to report a fatal error that is causing
+ * it to abort.
+ * @param sn {@link ServerName#getBytes()}
+ * @param errorMessage informative text to expose in the master logs and UI
+ */
+ public void reportRSFatalError(byte [] sn, String errorMessage);
+}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Aug 30 19:27:24 2011
@@ -73,6 +73,7 @@ import org.apache.hadoop.hbase.master.ha
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
+import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -156,6 +157,11 @@ implements HMasterInterface, HMasterRegi
private CatalogTracker catalogTracker;
// Cluster status zk tracker and local setter
private ClusterStatusTracker clusterStatusTracker;
+
+ // buffer for "fatal error" notices from region servers
+ // in the cluster. This is only used for assisting
+ // operations/debugging.
+ private MemoryBoundedLogMessageBuffer rsFatals;
// This flag is for stopping this Master instance. Its set when we are
// stopping or aborting
@@ -223,6 +229,8 @@ implements HMasterInterface, HMasterRegi
this.isa = this.rpcServer.getListenerAddress();
this.serverName = new ServerName(this.isa.getHostName(),
this.isa.getPort(), System.currentTimeMillis());
+ this.rsFatals = new MemoryBoundedLogMessageBuffer(
+ conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
// initialize server principal (if using secure Hadoop)
User.login(conf, "hbase.master.keytab.file",
@@ -759,6 +767,15 @@ implements HMasterInterface, HMasterRegi
}
}
+ @Override
+ public void reportRSFatalError(byte [] sn, String errorText) {
+ ServerName serverName = new ServerName(sn);
+ String msg = "Region server " + serverName + " reported a fatal error:\n"
+ + errorText;
+ LOG.error(msg);
+ rsFatals.add(msg);
+ }
+
public boolean isMasterRunning() {
return !isStopped();
}
@@ -1207,6 +1224,10 @@ implements HMasterInterface, HMasterRegi
public AssignmentManager getAssignmentManager() {
return this.assignmentManager;
}
+
+ public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
+ return rsFatals;
+ }
@Override
public void shutdown() {
Added: hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java?rev=1163345&view=auto
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java (added)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java Tue Aug 30 19:27:24 2011
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.monitoring;
+
+import java.io.PrintWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.List;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * A size-bounded repository of alerts, which are kept
+ * in a linked list. Alerts can be added, and they will
+ * automatically be removed one by one when the specified heap
+ * usage is exhausted.
+ */
+public class MemoryBoundedLogMessageBuffer {
+ private final long maxSizeBytes;
+ private long usage = 0;
+ private LinkedList<LogMessage> messages;
+
+ public MemoryBoundedLogMessageBuffer(long maxSizeBytes) {
+ Preconditions.checkArgument(
+ maxSizeBytes > 0);
+ this.maxSizeBytes = maxSizeBytes;
+ this.messages = Lists.newLinkedList();
+ }
+
+ /**
+ * Append the given message to this buffer, automatically evicting
+ * older messages until the desired memory limit is achieved.
+ */
+ public synchronized void add(String messageText) {
+ LogMessage message = new LogMessage(messageText, System.currentTimeMillis());
+
+ usage += message.estimateHeapUsage();
+ messages.add(message);
+ while (usage > maxSizeBytes) {
+ LogMessage removed = messages.remove();
+ usage -= removed.estimateHeapUsage();
+ assert usage >= 0;
+ }
+ }
+
+ /**
+ * Dump the contents of the buffer to the given stream.
+ */
+ public synchronized void dumpTo(PrintWriter out) {
+ SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+
+ for (LogMessage msg : messages) {
+ out.write(df.format(new Date(msg.timestamp)));
+ out.write(" ");
+ out.println(new String(msg.message, Charsets.UTF_8));
+ }
+ }
+
+ synchronized List<LogMessage> getMessages() {
+ // defensive copy
+ return Lists.newArrayList(messages);
+ }
+
+ /**
+ * Estimate the number of bytes this buffer is currently
+ * using.
+ */
+ synchronized long estimateHeapUsage() {
+ return usage;
+ }
+
+ private static class LogMessage {
+ /** the error text, encoded in bytes to save memory */
+ public final byte[] message;
+ public final long timestamp;
+
+ /**
+ * Completely non-scientific estimate of how much one of these
+ * objects takes, along with the LinkedList overhead. This doesn't
+ * need to be exact, since we don't expect a ton of these alerts.
+ */
+ private static final long BASE_USAGE=100;
+
+ public LogMessage(String message, long timestamp) {
+ this.message = message.getBytes(Charsets.UTF_8);
+ this.timestamp = timestamp;
+ }
+
+ public long estimateHeapUsage() {
+ return message.length + BASE_USAGE;
+ }
+ }
+
+}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Aug 30 19:27:24 2011
@@ -1471,16 +1471,29 @@ public class HRegionServer implements HR
* the exception that caused the abort, or null
*/
public void abort(String reason, Throwable cause) {
+ String msg = "ABORTING region server " + this + ": " + reason;
if (cause != null) {
- LOG.fatal("ABORTING region server " + this + ": " + reason, cause);
+ LOG.fatal(msg, cause);
} else {
- LOG.fatal("ABORTING region server " + this + ": " + reason);
+ LOG.fatal(msg);
}
this.abortRequested = true;
this.reservedSpace.clear();
if (this.metrics != null) {
LOG.info("Dump of metrics: " + this.metrics);
}
+ // Do our best to report our abort to the master, but this may not work
+ try {
+ if (cause != null) {
+ msg += "\nCause:\n" + StringUtils.stringifyException(cause);
+ }
+ if (hbaseMaster != null) {
+ hbaseMaster.reportRSFatalError(
+ this.serverNameFromMasterPOV.getBytes(), msg);
+ }
+ } catch (Throwable t) {
+ LOG.warn("Unable to report fatal error to master", t);
+ }
stop(reason);
}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java?rev=1163345&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java Tue Aug 30 19:27:24 2011
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.monitoring;
+
+import static org.junit.Assert.*;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.junit.Test;
+
+/**
+ * Test case for the MemoryBoundedLogMessageBuffer utility.
+ * Ensures that it uses no more memory than it's supposed to,
+ * and that it properly deals with multibyte encodings.
+ */
+public class TestMemoryBoundedLogMessageBuffer {
+
+ private static final long TEN_KB = 10 * 1024;
+ private static final String JP_TEXT = "ããã«ã¡ã¯";
+
+ @Test
+ public void testBuffer() {
+ MemoryBoundedLogMessageBuffer buf =
+ new MemoryBoundedLogMessageBuffer(TEN_KB);
+
+ for (int i = 0; i < 1000; i++) {
+ buf.add("hello " + i);
+ }
+ assertTrue("Usage too big: " + buf.estimateHeapUsage(),
+ buf.estimateHeapUsage() < TEN_KB);
+ assertTrue("Too many retained: " + buf.getMessages().size(),
+ buf.getMessages().size() < 100);
+ StringWriter sw = new StringWriter();
+ buf.dumpTo(new PrintWriter(sw));
+ String dump = sw.toString();
+ System.out.println(dump);
+ assertFalse("The early log messages should be evicted",
+ dump.contains("hello 1\n"));
+ assertTrue("The late log messages should be retained",
+ dump.contains("hello 999\n"));
+ }
+
+ @Test
+ public void testNonAsciiEncoding() {
+ MemoryBoundedLogMessageBuffer buf =
+ new MemoryBoundedLogMessageBuffer(TEN_KB);
+
+ buf.add(JP_TEXT);
+ StringWriter sw = new StringWriter();
+ buf.dumpTo(new PrintWriter(sw));
+ String dump = sw.toString();
+ assertTrue(dump.contains(JP_TEXT));
+ }
+}