You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by to...@apache.org on 2011/08/30 21:27:24 UTC

svn commit: r1163345 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/ipc/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/monitoring/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/test/java/org/...

Author: todd
Date: Tue Aug 30 19:27:24 2011
New Revision: 1163345

URL: http://svn.apache.org/viewvc?rev=1163345&view=rev
Log:
HBASE-4275  RS should communicate fatal "aborts" back to the master

Added:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Tue Aug 30 19:27:24 2011
@@ -422,6 +422,7 @@ Release 0.91.0 - Unreleased
    HBASE-4291  Improve display of regions in transition in UI to be more
                readable (todd)
    HBASE-4281  Add facility to dump current state of all executors (todd)
+   HBASE-4275  RS should communicate fatal "aborts" back to the master (todd)
 
   TASKS
    HBASE-3559  Move report of split to master OFF the heartbeat channel

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java Tue Aug 30 19:27:24 2011
@@ -61,4 +61,12 @@ public interface HMasterRegionInterface 
    */
   public void regionServerReport(byte [] sn, HServerLoad hsl)
   throws IOException;
-}
\ No newline at end of file
+  
+  /**
+   * Called by a region server to report a fatal error that is causing
+   * it to abort.
+   * @param sn {@link ServerName#getBytes()}
+   * @param errorMessage informative text to expose in the master logs and UI
+   */
+  public void reportRSFatalError(byte [] sn, String errorMessage);
+}

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Aug 30 19:27:24 2011
@@ -73,6 +73,7 @@ import org.apache.hadoop.hbase.master.ha
 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
+import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
 import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -156,6 +157,11 @@ implements HMasterInterface, HMasterRegi
   private CatalogTracker catalogTracker;
   // Cluster status zk tracker and local setter
   private ClusterStatusTracker clusterStatusTracker;
+  
+  // buffer for "fatal error" notices from region servers
+  // in the cluster. This is only used for assisting
+  // operations/debugging.
+  private MemoryBoundedLogMessageBuffer rsFatals;
 
   // This flag is for stopping this Master instance.  Its set when we are
   // stopping or aborting
@@ -223,6 +229,8 @@ implements HMasterInterface, HMasterRegi
     this.isa = this.rpcServer.getListenerAddress();
     this.serverName = new ServerName(this.isa.getHostName(),
       this.isa.getPort(), System.currentTimeMillis());
+    this.rsFatals = new MemoryBoundedLogMessageBuffer(
+        conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
 
     // initialize server principal (if using secure Hadoop)
     User.login(conf, "hbase.master.keytab.file",
@@ -759,6 +767,15 @@ implements HMasterInterface, HMasterRegi
     }
   }
 
+  @Override
+  public void reportRSFatalError(byte [] sn, String errorText) {
+    ServerName serverName = new ServerName(sn);
+    String msg = "Region server " + serverName + " reported a fatal error:\n"
+        + errorText;
+    LOG.error(msg);
+    rsFatals.add(msg);
+  }
+
   public boolean isMasterRunning() {
     return !isStopped();
   }
@@ -1207,6 +1224,10 @@ implements HMasterInterface, HMasterRegi
   public AssignmentManager getAssignmentManager() {
     return this.assignmentManager;
   }
+  
+  public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
+    return rsFatals;
+  }
 
   @Override
   public void shutdown() {

Added: hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java?rev=1163345&view=auto
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java (added)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/monitoring/MemoryBoundedLogMessageBuffer.java Tue Aug 30 19:27:24 2011
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.monitoring;
+
+import java.io.PrintWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.List;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * A size-bounded repository of alerts, which are kept
+ * in a linked list. Alerts can be added, and they will
+ * automatically be removed one by one when the specified heap
+ * usage is exhausted.
+ */
+public class MemoryBoundedLogMessageBuffer {
+  private final long maxSizeBytes;
+  private long usage = 0;
+  private LinkedList<LogMessage> messages;
+  
+  public MemoryBoundedLogMessageBuffer(long maxSizeBytes) {
+    Preconditions.checkArgument(
+        maxSizeBytes > 0);
+    this.maxSizeBytes = maxSizeBytes;
+    this.messages = Lists.newLinkedList();
+  }
+  
+  /**
+   * Append the given message to this buffer, automatically evicting
+   * older messages until the desired memory limit is achieved.
+   */
+  public synchronized void add(String messageText) {
+    LogMessage message = new LogMessage(messageText, System.currentTimeMillis());
+    
+    usage += message.estimateHeapUsage();
+    messages.add(message);
+    while (usage > maxSizeBytes) {
+      LogMessage removed = messages.remove();
+      usage -= removed.estimateHeapUsage();
+      assert usage >= 0;
+    }
+  }
+  
+  /**
+   * Dump the contents of the buffer to the given stream.
+   */
+  public synchronized void dumpTo(PrintWriter out) {
+    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+
+    for (LogMessage msg : messages) {
+      out.write(df.format(new Date(msg.timestamp)));
+      out.write(" ");
+      out.println(new String(msg.message, Charsets.UTF_8));
+    }
+  }
+  
+  synchronized List<LogMessage> getMessages() {
+    // defensive copy
+    return Lists.newArrayList(messages);
+  }
+ 
+  /**
+   * Estimate the number of bytes this buffer is currently
+   * using.
+   */
+  synchronized long estimateHeapUsage() {
+    return usage;
+  }
+  
+  private static class LogMessage {
+    /** the error text, encoded in bytes to save memory */
+    public final byte[] message;
+    public final long timestamp;
+    
+    /**
+     * Completely non-scientific estimate of how much one of these
+     * objects takes, along with the LinkedList overhead. This doesn't
+     * need to be exact, since we don't expect a ton of these alerts.
+     */
+    private static final long BASE_USAGE=100;
+    
+    public LogMessage(String message, long timestamp) {
+      this.message = message.getBytes(Charsets.UTF_8);
+      this.timestamp = timestamp;
+    }
+    
+    public long estimateHeapUsage() {
+      return message.length + BASE_USAGE;
+    }
+  }
+
+}

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1163345&r1=1163344&r2=1163345&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Aug 30 19:27:24 2011
@@ -1471,16 +1471,29 @@ public class HRegionServer implements HR
    *          the exception that caused the abort, or null
    */
   public void abort(String reason, Throwable cause) {
+    String msg = "ABORTING region server " + this + ": " + reason;
     if (cause != null) {
-      LOG.fatal("ABORTING region server " + this + ": " + reason, cause);
+      LOG.fatal(msg, cause);
     } else {
-      LOG.fatal("ABORTING region server " + this + ": " + reason);
+      LOG.fatal(msg);
     }
     this.abortRequested = true;
     this.reservedSpace.clear();
     if (this.metrics != null) {
       LOG.info("Dump of metrics: " + this.metrics);
     }
+    // Do our best to report our abort to the master, but this may not work
+    try {
+      if (cause != null) {
+        msg += "\nCause:\n" + StringUtils.stringifyException(cause);
+      }
+      if (hbaseMaster != null) {
+        hbaseMaster.reportRSFatalError(
+            this.serverNameFromMasterPOV.getBytes(), msg);
+      }
+    } catch (Throwable t) {
+      LOG.warn("Unable to report fatal error to master", t);
+    }
     stop(reason);
   }
 

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java?rev=1163345&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/monitoring/TestMemoryBoundedLogMessageBuffer.java Tue Aug 30 19:27:24 2011
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.monitoring;
+
+import static org.junit.Assert.*;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.junit.Test;
+
+/**
+ * Test case for the MemoryBoundedLogMessageBuffer utility.
+ * Ensures that it uses no more memory than it's supposed to,
+ * and that it properly deals with multibyte encodings.
+ */
+public class TestMemoryBoundedLogMessageBuffer {
+
+  private static final long TEN_KB = 10 * 1024;
+  private static final String JP_TEXT = "こんにちは";
+  
+  @Test
+  public void testBuffer() {
+    MemoryBoundedLogMessageBuffer buf =
+      new MemoryBoundedLogMessageBuffer(TEN_KB);
+    
+    for (int i = 0; i < 1000; i++) {
+      buf.add("hello " + i);
+    }
+    assertTrue("Usage too big: " + buf.estimateHeapUsage(),
+        buf.estimateHeapUsage() < TEN_KB);
+    assertTrue("Too many retained: " + buf.getMessages().size(),
+        buf.getMessages().size() < 100);
+    StringWriter sw = new StringWriter();
+    buf.dumpTo(new PrintWriter(sw));
+    String dump = sw.toString();
+    System.out.println(dump);
+    assertFalse("The early log messages should be evicted",
+        dump.contains("hello 1\n"));
+    assertTrue("The late log messages should be retained",
+        dump.contains("hello 999\n"));
+  }
+  
+  @Test
+  public void testNonAsciiEncoding() {
+    MemoryBoundedLogMessageBuffer buf =
+      new MemoryBoundedLogMessageBuffer(TEN_KB);
+    
+    buf.add(JP_TEXT);
+    StringWriter sw = new StringWriter();
+    buf.dumpTo(new PrintWriter(sw));
+    String dump = sw.toString();
+    assertTrue(dump.contains(JP_TEXT));
+  }
+}