You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2015/02/10 22:16:17 UTC

[1/2] hbase git commit: HBASE-13000 Backport print hdfs pipeline when hlog's sync is slow (Liu Shaohui)

Repository: hbase
Updated Branches:
  refs/heads/0.98 a1342abbb -> 8a5cdf997


HBASE-13000 Backport print hdfs pipeline when hlog's sync is slow (Liu Shaohui)

Amending-Author: Sean Busbey <bu...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/53e41dde
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/53e41dde
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/53e41dde

Branch: refs/heads/0.98
Commit: 53e41dde7128fb12cbec2b2a427d62f6e97ea8e2
Parents: a1342ab
Author: Sean Busbey <bu...@apache.org>
Authored: Tue Jul 1 15:05:03 2014 -0700
Committer: Sean Busbey <bu...@apache.org>
Committed: Tue Feb 10 00:21:16 2015 -0600

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/wal/FSHLog.java   | 68 +++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/53e41dde/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
index b528871..0c5d025 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hbase.util.DrainBarrier;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HasThread;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.util.StringUtils;
 import org.cloudera.htrace.Trace;
 import org.cloudera.htrace.TraceScope;
@@ -110,6 +111,8 @@ import com.google.common.annotations.VisibleForTesting;
 class FSHLog implements HLog, Syncable {
   static final Log LOG = LogFactory.getLog(FSHLog.class);
 
+  private static final int DEFAULT_SLOW_SYNC_TIME_MS = 100; // in ms
+
   private final FileSystem fs;
   private final Path rootDir;
   private final Path dir;
@@ -136,6 +139,9 @@ class FSHLog implements HLog, Syncable {
   // rollWriter will be triggered
   private int minTolerableReplication;
   private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
+  private final Method getPipeLine; // refers to DFSOutputStream.getPipeLine
+  private final int slowSyncNs;
+
   final static Object [] NO_ARGS = new Object []{};
 
   /** The barrier used to ensure that close() waits for all log rolls and flushes to finish. */
@@ -416,8 +422,12 @@ class FSHLog implements HLog, Syncable {
     // rollWriter sets this.hdfs_out if it can.
     rollWriter();
 
+    this.slowSyncNs =
+        1000000 * conf.getInt("hbase.regionserver.hlog.slowsync.ms",
+          DEFAULT_SLOW_SYNC_TIME_MS);
     // handle the reflection necessary to call getNumCurrentReplicas()
     this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
+    this.getPipeLine = getGetPipeline(this.hdfs_out);
 
     final String n = Thread.currentThread().getName();
 
@@ -1250,7 +1260,16 @@ class FSHLog implements HLog, Syncable {
 
             this.isSyncing = false;
           }
-          metrics.finishSync(EnvironmentEdgeManager.currentTimeMillis() - now);
+          final long took = EnvironmentEdgeManager.currentTimeMillis() - now;
+          metrics.finishSync(took);
+          if (took > (slowSyncNs/1000000)) {
+            String msg =
+                new StringBuilder().append("Slow sync cost: ")
+                    .append(took).append(" ms, current pipeline: ")
+                    .append(Arrays.toString(getPipeLine())).toString();
+            Trace.addTimelineAnnotation(msg);
+            LOG.info(msg);
+          }
 
           // 3. wake up AsyncNotifier to notify(wake-up) all pending 'put'
           // handler threads on 'sync()'
@@ -1696,4 +1715,51 @@ class FSHLog implements HLog, Syncable {
       System.exit(-1);
     }
   }
+
+  /**
+   * Find the 'getPipeline' on the passed <code>os</code> stream.
+   * @return Method or null.
+   */
+  private Method getGetPipeline(final FSDataOutputStream os) {
+    Method m = null;
+    if (os != null) {
+      Class<? extends OutputStream> wrappedStreamClass = os.getWrappedStream()
+          .getClass();
+      try {
+        m = wrappedStreamClass.getDeclaredMethod("getPipeline",
+          new Class<?>[] {});
+        m.setAccessible(true);
+      } catch (NoSuchMethodException e) {
+        LOG.info("FileSystem's output stream doesn't support"
+            + " getPipeline; not available; fsOut="
+            + wrappedStreamClass.getName());
+      } catch (SecurityException e) {
+        LOG.info(
+          "Doesn't have access to getPipeline on "
+              + "FileSystems's output stream ; fsOut="
+              + wrappedStreamClass.getName(), e);
+        m = null; // could happen on setAccessible()
+      }
+    }
+    return m;
+  }
+
+  /**
+   * This method gets the pipeline for the current HLog.
+   * @return
+   */
+  DatanodeInfo[] getPipeLine() {
+    if (this.getPipeLine != null && this.hdfs_out != null) {
+      Object repl;
+      try {
+        repl = this.getPipeLine.invoke(getOutputStream(), NO_ARGS);
+        if (repl instanceof DatanodeInfo[]) {
+          return ((DatanodeInfo[]) repl);
+        }
+      } catch (Exception e) {
+        LOG.info("Get pipeline failed", e);
+      }
+    }
+    return new DatanodeInfo[0];
+  }
 }


[2/2] hbase git commit: HBASE-12997 print wal pipeline on low replication.

Posted by bu...@apache.org.
HBASE-12997 print wal pipeline on low replication.

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8a5cdf99
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8a5cdf99
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8a5cdf99

Branch: refs/heads/0.98
Commit: 8a5cdf9971da782374846547097180af5c7b66f4
Parents: 53e41dd
Author: Sean Busbey <bu...@apache.org>
Authored: Mon Feb 9 16:30:05 2015 -0600
Committer: Sean Busbey <bu...@apache.org>
Committed: Tue Feb 10 00:25:56 2015 -0600

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8a5cdf99/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
index 0c5d025..fad4d29 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java
@@ -1403,7 +1403,8 @@ class FSHLog implements HLog, Syncable {
             LOG.warn("HDFS pipeline error detected. " + "Found "
                 + numCurrentReplicas + " replicas but expecting no less than "
                 + this.minTolerableReplication + " replicas. "
-                + " Requesting close of hlog.");
+                + " Requesting close of hlog. current pipeline: "
+                + Arrays.toString(getPipeLine()));
             logRollNeeded = true;
             // If rollWriter is requested, increase consecutiveLogRolls. Once it
             // is larger than lowReplicationRollLimit, disable the