You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2016/01/28 19:11:54 UTC

[4/6] hbase git commit: HBASE-15019 Replication stuck when HDFS is restarted.

HBASE-15019 Replication stuck when HDFS is restarted.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/60c6b6df
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/60c6b6df
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/60c6b6df

Branch: refs/heads/0.98
Commit: 60c6b6df104030995754bb1470a0d5d3e20cf220
Parents: 444debd
Author: Matteo Bertozzi <ma...@cloudera.com>
Authored: Thu Jan 28 10:04:20 2016 -0800
Committer: Matteo Bertozzi <ma...@cloudera.com>
Committed: Thu Jan 28 10:04:20 2016 -0800

----------------------------------------------------------------------
 .../hbase/regionserver/wal/HLogFactory.java     | 11 +++--
 .../regionserver/ReplicationSource.java         | 30 +++++++++++--
 .../hbase/util/LeaseNotRecoveredException.java  | 47 ++++++++++++++++++++
 3 files changed, 81 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/60c6b6df/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
index e6107bf..6999f8e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
 import org.apache.hadoop.hbase.regionserver.wal.HLog.Writer;
 import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
 
 @InterfaceAudience.Private
 public class HLogFactory {
@@ -46,12 +47,12 @@ public class HLogFactory {
         final Configuration conf) throws IOException {
       return new FSHLog(fs, root, logName, conf);
     }
-    
+
     public static HLog createHLog(final FileSystem fs, final Path root, final String logName,
         final String oldLogName, final Configuration conf) throws IOException {
       return new FSHLog(fs, root, logName, oldLogName, conf);
 }
-    
+
     public static HLog createHLog(final FileSystem fs, final Path root, final String logName,
         final Configuration conf, final List<WALActionsListener> listeners,
         final String prefix) throws IOException {
@@ -61,7 +62,7 @@ public class HLogFactory {
     public static HLog createMetaHLog(final FileSystem fs, final Path root, final String logName,
         final Configuration conf, final List<WALActionsListener> listeners,
         final String prefix) throws IOException {
-      return new FSHLog(fs, root, logName, HConstants.HREGION_OLDLOGDIR_NAME, 
+      return new FSHLog(fs, root, logName, HConstants.HREGION_OLDLOGDIR_NAME,
             conf, listeners, false, prefix, true);
     }
 
@@ -162,8 +163,10 @@ public class HLogFactory {
                   throw iioe;
                 }
               }
+              throw new LeaseNotRecoveredException(e);
+            } else {
+              throw e;
             }
-            throw e;
           }
         }
       } catch (IOException ie) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/60c6b6df/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index 97993bb..b7330aa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -50,7 +50,10 @@ import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueues;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
+import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
 import org.apache.hadoop.hbase.util.Threads;
 import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ListenableFuture;
@@ -577,6 +580,11 @@ public class ReplicationSource extends Thread
           // TODO What happens the log is missing in both places?
         }
       }
+    } catch (LeaseNotRecoveredException lnre) {
+      // HBASE-15019 the WAL was not closed due to some hiccup.
+      LOG.warn(peerClusterZnode + " Try to recover the WAL lease " + currentPath, lnre);
+      recoverLease(conf, currentPath);
+      this.reader = null;
     } catch (IOException ioe) {
       if (ioe instanceof EOFException && isCurrentLogEmpty()) return true;
       LOG.warn(this.peerClusterZnode + " Got: ", ioe);
@@ -596,6 +604,22 @@ public class ReplicationSource extends Thread
     return true;
   }
 
+  private void recoverLease(final Configuration conf, final Path path) {
+    try {
+      final FileSystem dfs = FSUtils.getCurrentFileSystem(conf);
+      FSUtils fsUtils = FSUtils.getInstance(dfs, conf);
+      fsUtils.recoverFileLease(dfs, path, conf, new CancelableProgressable() {
+        @Override
+        public boolean progress() {
+          LOG.debug("recover WAL lease: " + path);
+          return isActive();
+        }
+      });
+    } catch (IOException e) {
+      LOG.warn("unable to recover lease for WAL: " + path, e);
+    }
+  }
+
   /*
    * Checks whether the current log file is empty, and it is not a recovered queue. This is to
    * handle scenario when in an idle cluster, there is no entry in the current log and we keep on
@@ -845,9 +869,9 @@ public class ReplicationSource extends Thread
      * @param p path to split
      * @return start time
      */
-    private long getTS(Path p) {
-      String[] parts = p.getName().split("\\.");
-      return Long.parseLong(parts[parts.length-1]);
+    private static long getTS(Path p) {
+      int tsIndex = p.getName().lastIndexOf('.') + 1;
+      return Long.parseLong(p.getName().substring(tsIndex));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/60c6b6df/hbase-server/src/main/java/org/apache/hadoop/hbase/util/LeaseNotRecoveredException.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/LeaseNotRecoveredException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/LeaseNotRecoveredException.java
new file mode 100644
index 0000000..6a72e42
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/LeaseNotRecoveredException.java
@@ -0,0 +1,47 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.HBaseIOException;
+
+/**
+ * Thrown when the lease was expected to be recovered,
+ * but the file can't be opened.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Stable
+public class LeaseNotRecoveredException extends HBaseIOException {
+  public LeaseNotRecoveredException() {
+    super();
+  }
+
+  public LeaseNotRecoveredException(String message) {
+    super(message);
+  }
+
+  public LeaseNotRecoveredException(String message, Throwable cause) {
+      super(message, cause);
+  }
+
+  public LeaseNotRecoveredException(Throwable cause) {
+      super(cause);
+  }
+}