You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2015/10/05 06:10:11 UTC

[2/3] hbase git commit: HBASE-14547 Add more debug/trace to zk-procedure

HBASE-14547 Add more debug/trace to zk-procedure


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/86b505c2
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/86b505c2
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/86b505c2

Branch: refs/heads/branch-1
Commit: 86b505c2245bda30a8a4d8108db072876a3c2de6
Parents: b9bfd6f
Author: Matteo Bertozzi <ma...@cloudera.com>
Authored: Fri Oct 2 15:37:42 2015 -0700
Committer: Matteo Bertozzi <ma...@cloudera.com>
Committed: Sun Oct 4 20:56:01 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/procedure/Procedure.java       |  1 +
 .../hbase/procedure/ProcedureCoordinator.java   | 14 ++++++++-
 .../hadoop/hbase/procedure/ProcedureMember.java |  5 +++-
 .../procedure/ZKProcedureCoordinatorRpcs.java   | 31 +++++++++++++-------
 .../hadoop/hbase/procedure/ZKProcedureUtil.java |  8 +++++
 .../snapshot/FlushSnapshotSubprocedure.java     |  1 +
 .../snapshot/RegionServerSnapshotManager.java   |  2 +-
 7 files changed, 49 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java
index 198bcdd..0279a60 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java
@@ -213,6 +213,7 @@ public class Procedure implements Callable<Void>, ForeignExceptionListener {
       sendGlobalBarrierReached();
 
       // wait for all members to report barrier release
+      LOG.debug("Waiting for all members to 'release'");
       waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released");
 
       // make sure we didn't get an error during in barrier execution and release

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java
index d55fc85..b7e0c04 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java
@@ -174,6 +174,7 @@ public class ProcedureCoordinator {
     // kick off the procedure's execution in a separate thread
     try {
       if (this.procedures.putIfAbsent(procName, proc) == null) {
+        LOG.debug("Submitting procedure " + procName);
         this.pool.submit(proc);
         return true;
       } else {
@@ -200,11 +201,16 @@ public class ProcedureCoordinator {
   void rpcConnectionFailure(final String message, final IOException cause) {
     Collection<Procedure> toNotify = procedures.values();
 
+    boolean isTraceEnabled = LOG.isTraceEnabled();
+    LOG.debug("received connection failure: " + message, cause);
     for (Procedure proc : toNotify) {
       if (proc == null) {
         continue;
       }
       // notify the elements, if they aren't null
+      if (isTraceEnabled) {
+        LOG.trace("connection failure - notify procedure: " + proc.getName());
+      }
       proc.receive(new ForeignException(proc.getName(), cause));
     }
   }
@@ -215,6 +221,7 @@ public class ProcedureCoordinator {
    * @param reason serialized information about the abort
    */
   public void abortProcedure(String procName, ForeignException reason) {
+    LOG.debug("abort procedure " + procName, reason);
     // if we know about the Procedure, notify it
     Procedure proc = procedures.get(procName);
     if (proc == null) {
@@ -272,7 +279,9 @@ public class ProcedureCoordinator {
       LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
       return;
     }
-
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Member '"+ member +"' acquired procedure '"+ procName +"'");
+    }
     proc.barrierAcquiredByMember(member);
   }
 
@@ -289,6 +298,9 @@ public class ProcedureCoordinator {
       LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
       return;
     }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Member '"+ member +"' released procedure '"+ procName +"'");
+    }
     proc.barrierReleasedByMember(member, dataFromMember);
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java
index 1f22022..485821e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java
@@ -176,9 +176,12 @@ public class ProcedureMember implements Closeable {
    public void receivedReachedGlobalBarrier(String procName) {
      Subprocedure subproc = subprocs.get(procName);
      if (subproc == null) {
-       LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'");
+       LOG.warn("Unexpected reached globa barrier message for Sub-Procedure '" + procName + "'");
        return;
      }
+     if (LOG.isTraceEnabled()) {
+      LOG.trace("reached global barrier message for Sub-Procedure '" + procName + "'");
+     }
      subproc.receiveReachedGlobalBarrier();
    }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java
index fedd595..085d642 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java
@@ -84,8 +84,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
       // If we get an abort node watch triggered here, we'll go complete creating the acquired
       // znode but then handle the acquire znode and bail out
     } catch (KeeperException e) {
-      LOG.error("Failed to watch abort", e);
-      throw new IOException("Failed while watching abort node:" + abortNode, e);
+      String msg = "Failed while watching abort node:" + abortNode;
+      LOG.error(msg, e);
+      throw new IOException(msg, e);
     }
 
     // create the acquire barrier
@@ -104,7 +105,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
         }
       }
     } catch (KeeperException e) {
-      throw new IOException("Failed while creating acquire node:" + acquire, e);
+      String msg = "Failed while creating acquire node:" + acquire;
+      LOG.error(msg, e);
+      throw new IOException(msg, e);
     }
   }
 
@@ -124,9 +127,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
           // ProtobufUtil.isPBMagicPrefix will check null
           if (dataFromMember != null && dataFromMember.length > 0) {
             if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) {
-              throw new IOException(
-                "Failed to get data from finished node or data is illegally formatted: "
-                    + znode);
+              String msg =
+                "Failed to get data from finished node or data is illegally formatted: " + znode;
+              LOG.error(msg);
+              throw new IOException(msg);
             } else {
               dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(),
                 dataFromMember.length);
@@ -138,9 +142,13 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
         }
       }
     } catch (KeeperException e) {
-      throw new IOException("Failed while creating reached node:" + reachedNode, e);
+      String msg = "Failed while creating reached node:" + reachedNode;
+      LOG.error(msg, e);
+      throw new IOException(msg, e);
     } catch (InterruptedException e) {
-      throw new InterruptedIOException("Interrupted while creating reached node:" + reachedNode);
+      String msg = "Interrupted while creating reached node:" + reachedNode;
+      LOG.error(msg, e);
+      throw new InterruptedIOException(msg);
     }
   }
 
@@ -162,7 +170,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
         // children trickling in
         stillGettingNotifications = true;
       } catch (KeeperException e) {
-        throw new IOException("Failed to complete reset procedure " + procName, e);
+        String msg = "Failed to complete reset procedure " + procName;
+        LOG.error(msg, e);
+        throw new IOException(msg, e);
       }
     } while (stillGettingNotifications);
   }
@@ -282,7 +292,8 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
         LOG.warn("Got an error notification for op:" + abortNode
             + " but we can't read the information. Killing the procedure.");
         // we got a remote exception, but we can't describe it
-        ee = new ForeignException(coordName, "Data in abort node is illegally formatted.  ignoring content.");
+        ee = new ForeignException(coordName,
+          "Data in abort node is illegally formatted.  ignoring content.");
       } else {
 
         data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java
index 8171218..56983ff 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java
@@ -275,6 +275,10 @@ public abstract class ZKProcedureUtil
     // harder to figure out how to keep an procedure going and the subject of HBASE-5487.
     ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode,
       abortZnode);
+
+    if (LOG.isTraceEnabled()) {
+      logZKTree(this.baseZNode);
+    }
   }
 
   public void clearZNodes(String procedureName) throws KeeperException {
@@ -291,5 +295,9 @@ public abstract class ZKProcedureUtil
 
     ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode,
       reachedBarrierNode, abortZNode);
+
+    if (LOG.isTraceEnabled()) {
+      logZKTree(this.baseZNode);
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
index 998c1fb..f083601 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
@@ -139,6 +139,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
     try {
       taskManager.waitForOutstandingTasks();
     } catch (InterruptedException e) {
+      LOG.error("got interrupted exception for " + getMemberName());
       throw new ForeignException(getMemberName(), e);
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/86b505c2/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
index f04feb1..5181f03 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
@@ -175,7 +175,7 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
     // will hang and fail.
 
     LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
-        + snapshot.getTable());
+        + snapshot.getTable() + " type " + snapshot.getType());
     ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
     Configuration conf = rss.getConfiguration();
     long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,