You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2019/01/22 02:46:34 UTC

[hbase] branch branch-1 updated: HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1

This is an automated email from the ASF dual-hosted git repository.

apurtell pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-1 by this push:
     new acdd23e  HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1
acdd23e is described below

commit acdd23ec4cf1621ca3b0e8d6a59af9502acd7ace
Author: xcang <xc...@salesforce.com>
AuthorDate: Mon Jan 21 17:40:59 2019 -0800

    HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1
    
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../hadoop/hbase/master/MasterFileSystem.java      | 39 ++++++++++
 .../master/procedure/ServerCrashProcedure.java     |  3 +
 .../hadoop/hbase/wal/DefaultWALProvider.java       |  9 +++
 .../hbase/regionserver/TestCleanupMetaWAL.java     | 90 ++++++++++++++++++++++
 4 files changed, 141 insertions(+)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
index c1bd930..28e5801 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@@ -652,4 +652,43 @@ public class MasterFileSystem {
   public void logFileSystemState(Log log) throws IOException {
     FSUtils.logFileSystemState(fs, rootdir, log);
   }
+
+  /**
+   * For meta region open and closed normally on a server, it may leave some meta
+   * WAL in the server's wal dir. Since meta region is no long on this server,
+   * The SCP won't split those meta wals, just leaving them there. So deleting
+   * the wal dir will fail since the dir is not empty. Actually We can safely achive those
+   * meta log and Archiving the meta log and delete the dir.
+   * @param serverName the server to archive meta log
+   */
+  public void archiveMetaLog(final ServerName serverName) {
+    try {
+      Path logDir = new Path(this.rootdir,
+          DefaultWALProvider.getWALDirectoryName(serverName.toString()));
+      Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT);
+      if (fs.exists(splitDir)) {
+        FileStatus[] logfiles = FSUtils.listStatus(fs, splitDir, META_FILTER);
+        if (logfiles != null) {
+          for (FileStatus status : logfiles) {
+            if (!status.isDir()) {
+              Path newPath = DefaultWALProvider.getWALArchivePath(this.oldLogDir,
+                  status.getPath());
+              if (!FSUtils.renameAndSetModifyTime(fs, status.getPath(), newPath)) {
+                LOG.warn("Unable to move  " + status.getPath() + " to " + newPath);
+              } else {
+                LOG.debug("Archived meta log " + status.getPath() + " to " + newPath);
+              }
+            }
+          }
+        }
+        if (!fs.delete(splitDir, false)) {
+          LOG.warn("Unable to delete log dir. Ignoring. " + splitDir);
+        }
+      }
+    } catch (IOException ie) {
+      LOG.warn("Failed archiving meta log for server " + serverName, ie);
+    }
+  }
+
+
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 1fbc428..b6e7a7c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -438,6 +438,9 @@ implements ServerProcedureInterface {
     AssignmentManager am = env.getMasterServices().getAssignmentManager();
     // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
     mfs.splitLog(this.serverName);
+    if (!carryingMeta) {
+      mfs.archiveMetaLog(this.serverName);
+    }
     am.getRegionStates().logSplit(this.serverName);
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java
index af5893d..505f8b0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/DefaultWALProvider.java
@@ -407,4 +407,13 @@ public class DefaultWALProvider implements WALProvider {
     return name.substring(0, endIndex);
   }
 
+  /*
+   * only public so WALSplitter can use.
+   * @return archived location of a WAL file with the given path p
+   */
+  public static Path getWALArchivePath(Path archiveDir, Path p) {
+    return new Path(archiveDir, p.getName());
+  }
+
+
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java
new file mode 100644
index 0000000..f3d56f5
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCleanupMetaWAL.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.List;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.ProcedureInfo;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.master.MasterFileSystem;
+import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.wal.DefaultWALProvider;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.fail;
+
+@Category(MediumTests.class)
+public class TestCleanupMetaWAL {
+  private static final Logger LOG = LoggerFactory.getLogger(TestCleanupMetaWAL.class);
+
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  @BeforeClass
+  public static void before() throws Exception {
+    TEST_UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass
+  public static void after() throws Exception {
+    TEST_UTIL.shutdownMiniZKCluster();
+  }
+
+  @Test
+  public void testCleanupMetaWAL() throws Exception {
+    TEST_UTIL.createTable(TableName.valueOf("test"), "cf");
+    HRegionServer serverWithMeta = TEST_UTIL.getMiniHBaseCluster()
+        .getRegionServer(TEST_UTIL.getMiniHBaseCluster().getServerWithMeta());
+    TEST_UTIL.getHBaseAdmin()
+        .move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), null);
+    TEST_UTIL.getMiniHBaseCluster().killRegionServer(serverWithMeta.getServerName());
+    int count = 0;
+    boolean scpFinished = false;
+    while(count < 25 && !scpFinished) {
+      List<ProcedureInfo> procs = TEST_UTIL.getMiniHBaseCluster().getMaster().listProcedures();
+      for(ProcedureInfo pi : procs) {
+        if(pi.getProcName().startsWith("ServerCrashProcedure") && pi.getProcState() ==
+            ProcedureProtos.ProcedureState.FINISHED){
+          LOG.info("SCP is finished: " + pi.getProcName());
+          scpFinished = true;
+          break;
+        }
+      }
+      Thread.sleep(1000);
+      count++;
+    }
+
+    MasterFileSystem fs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem();
+    Path walPath = new Path(fs.getWALRootDir(), HConstants.HREGION_LOGDIR_NAME);
+    for (FileStatus status : FSUtils.listStatus(fs.getFileSystem(), walPath)) {
+      if (status.getPath().toString().contains(DefaultWALProvider.SPLITTING_EXT)) {
+        fail("Should not have splitting wal dir here:" + status);
+      }
+    }
+  }
+}
\ No newline at end of file