You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2017/06/22 09:10:39 UTC

hbase git commit: HBASE-18167 OfflineMetaRepair tool may cause HMaster to abort always - revert wrong version of patch

Repository: hbase
Updated Branches:
  refs/heads/branch-1 940f4107b -> 3d9c54442


HBASE-18167 OfflineMetaRepair tool may cause HMaster to abort always - revert wrong version of patch


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3d9c5444
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3d9c5444
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3d9c5444

Branch: refs/heads/branch-1
Commit: 3d9c54442865521ab7c7f923e0c475a7ced95035
Parents: 940f410
Author: tedyu <yu...@gmail.com>
Authored: Thu Jun 22 02:10:25 2017 -0700
Committer: tedyu <yu...@gmail.com>
Committed: Thu Jun 22 02:10:25 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hbase/MetaTableAccessor.java  | 29 --------
 .../hadoop/hbase/master/AssignmentManager.java  | 73 +++-----------------
 .../util/hbck/TestOfflineMetaRebuildBase.java   | 66 +-----------------
 3 files changed, 11 insertions(+), 157 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3d9c5444/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 04ab430..2bbae15 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -25,7 +25,6 @@ import java.io.InterruptedIOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
@@ -576,34 +575,6 @@ public class MetaTableAccessor {
     return hris;
   }
 
-  /**
-   * Retrieve server names from meta table.
-   * @param connection connection we're using
-   * @return List of region servers.
-   * @throws IOException
-   */
-  public static Set<ServerName> getServerNames(Connection connection) throws IOException {
-    final Set<ServerName> serverNames = new HashSet<ServerName>();
-    // Fill the above serverNames set with server entries from hbase:meta
-    CollectingVisitor<Result> v = new CollectingVisitor<Result>() {
-      @Override
-          void add(Result r) {
-        if (r == null || r.isEmpty()) return;
-        RegionLocations locations = getRegionLocations(r);
-        if (locations == null) return;
-        for (HRegionLocation loc : locations.getRegionLocations()) {
-          if (loc != null) {
-            if (loc.getServerName() != null) {
-              serverNames.add(loc.getServerName());
-            }
-          }
-        }
-      }
-    };
-    fullScan(connection, v);
-    return serverNames;
-  }
-
   public static void fullScanMetaAndPrint(Connection connection)
     throws IOException {
     Visitor v = new Visitor() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/3d9c5444/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 7927745..0a28967 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -634,10 +634,17 @@ public class AssignmentManager extends ZooKeeperListener {
       }
     }
 
+    Set<TableName> disabledOrDisablingOrEnabling = null;
     Map<HRegionInfo, ServerName> allRegions = null;
+
     if (!failover) {
-      // Retrieve user regions except tables region that are in disabled/disabling/enabling states.
-      allRegions = getUserRegionsToAssign();
+      disabledOrDisablingOrEnabling = tableStateManager.getTablesInStates(
+        ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING,
+        ZooKeeperProtos.Table.State.ENABLING);
+
+      // Clean re/start, mark all user regions closed before reassignment
+      allRegions = regionStates.closeAllUserRegions(
+        disabledOrDisablingOrEnabling);
     }
 
     // Now region states are restored
@@ -649,15 +656,6 @@ public class AssignmentManager extends ZooKeeperListener {
       // Process list of dead servers and regions in RIT.
       // See HBASE-4580 for more information.
       processDeadServersAndRecoverLostRegions(deadServers);
-
-      // Handle the scenario when meta is rebuild by OfflineMetaRepair tool.
-      // In this scenario, meta will have only info:regioninfo entries (won't contain info:server)
-      // which lead SSH to skip holding region assignment.
-      if (MetaTableAccessor.getServerNames(server.getConnection()).isEmpty()) {
-        // Need to assign the user region as a fresh startup, otherwise user region assignment will
-        // never happen
-        assignRegionsOnSSHCompletion();
-      }
     }
 
     if (!failover && useZKForAssignment) {
@@ -687,59 +685,6 @@ public class AssignmentManager extends ZooKeeperListener {
     return failover;
   }
 
-  /*
-   * At cluster clean re/start, mark all user regions closed except those of tables that are
-   * excluded, such as disabled/disabling/enabling tables. All user regions and their previous
-   * locations are returned.
-   */
-  private Map<HRegionInfo, ServerName> getUserRegionsToAssign()
-      throws InterruptedIOException, CoordinatedStateException {
-    Set<TableName> disabledOrDisablingOrEnabling =
-        tableStateManager.getTablesInStates(ZooKeeperProtos.Table.State.DISABLED,
-          ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLING);
-
-    // Clean re/start, mark all user regions closed before reassignment
-    return regionStates.closeAllUserRegions(disabledOrDisablingOrEnabling);
-  }
-
-  /*
-   * Wait for SSH completion and assign user region which are not in disabled/disabling/enabling
-   * table states.
-   */
-  private void assignRegionsOnSSHCompletion() {
-    LOG.info("Meta is rebuild by OfflineMetaRepair tool, assigning all user regions.");
-    Thread regionAssignerThread = new Thread("RegionAssignerOnMetaRebuild") {
-      public void run() {
-        long sshTimeout =
-            server.getConfiguration().getLong("hbase.master.initializationmonitor.timeout", 900000);
-        long startTime = EnvironmentEdgeManager.currentTime();
-        // Wait until all dead sercessing is done.
-        while (serverManager.areDeadServersInProgress()) {
-          if (EnvironmentEdgeManager.currentTime() - startTime > sshTimeout) {
-            LOG.warn(
-              "Couldn't assign the regions as SSH was not finished within the specified time in hbase.master.initializationmonitor.timeout parameter.");
-            return;
-          }
-          try {
-            Thread.sleep(100);
-          } catch (InterruptedException e) {
-            LOG.warn("RegionAssignerOnMetaRebuild got interrupted.", e);
-            break;
-          }
-        }
-        LOG.info("SSH has been completed for all dead servers, assigning the user regions.");
-        try {
-          // Assign the regions
-          assignAllUserRegions(getUserRegionsToAssign());
-        } catch (CoordinatedStateException | IOException | InterruptedException e) {
-          LOG.error("Exception occured while assigning user regions.", e);
-        }
-      };
-    };
-    regionAssignerThread.setDaemon(true);
-    regionAssignerThread.start();
-  }
-
   /**
    * If region is up in zk in transition, then do fixup and block and wait until
    * the region is assigned and out of transition.  Used on startup for

http://git-wip-us.apache.org/repos/asf/hbase/blob/3d9c5444/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
index 4c5d306..b31e20e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
@@ -25,7 +25,6 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -34,17 +33,13 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.NamespaceDescriptor;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -82,63 +77,6 @@ public class TestOfflineMetaRebuildBase extends OfflineMetaRebuildTestCore {
     // bring up the minicluster
     TEST_UTIL.startMiniZKCluster();
     TEST_UTIL.restartHBaseCluster(3);
-    validateMetaAndUserTableRows(1, 5);
-  }
-
-  @Test(timeout = 300000)
-  public void testHMasterStartupOnMetaRebuild() throws Exception {
-    // shutdown the minicluster
-    TEST_UTIL.shutdownMiniHBaseCluster();
-
-    // Assign meta in master and restart Hbase
-    TEST_UTIL.getConfiguration().set("hbase.balancer.tablesOnMaster", "hbase:meta");
-    // Set namespace initialization timeout
-    TEST_UTIL.getConfiguration().set("hbase.master.namespace.init.timeout", "150000");
-    TEST_UTIL.restartHBaseCluster(3);
-    TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster();
-
-    // Create namespace
-    TEST_UTIL.getHBaseAdmin().createNamespace(NamespaceDescriptor.create("ns1").build());
-    TEST_UTIL.getHBaseAdmin().createNamespace(NamespaceDescriptor.create("ns2").build());
-    // Create tables
-    TEST_UTIL.createTable(TableName.valueOf("ns1:testHMasterStartupOnMetaRebuild"),
-      Bytes.toBytes("cf1"));
-    TEST_UTIL.createTable(TableName.valueOf("ns2:testHMasterStartupOnMetaRebuild"),
-      Bytes.toBytes("cf1"));
-    // Flush meta
-    TEST_UTIL.flush(TableName.META_TABLE_NAME);
-
-    // HMaster graceful shutdown
-    TEST_UTIL.getHBaseCluster().getMaster().shutdown();
-
-    // Kill region servers
-    List<RegionServerThread> regionServerThreads =
-        TEST_UTIL.getHBaseCluster().getRegionServerThreads();
-    for (RegionServerThread regionServerThread : regionServerThreads) {
-      TEST_UTIL.getHBaseCluster()
-          .killRegionServer(regionServerThread.getRegionServer().getServerName());
-    }
-
-    // rebuild meta table from scratch
-    HBaseFsck fsck = new HBaseFsck(conf);
-    assertTrue(fsck.rebuildMeta(false));
-
-    // bring up the minicluster
-    TEST_UTIL.restartHBaseCluster(3);
-    validateMetaAndUserTableRows(3, 7);
-
-    // Remove table and namesapce
-    TEST_UTIL.deleteTable("ns1:testHMasterStartupOnMetaRebuild");
-    TEST_UTIL.deleteTable("ns2:testHMasterStartupOnMetaRebuild");
-    TEST_UTIL.getHBaseAdmin().deleteNamespace("ns1");
-    TEST_UTIL.getHBaseAdmin().deleteNamespace("ns2");
-  }
-
-  /*
-   * Validate meta table region count and user table rows.
-   */
-  private void validateMetaAndUserTableRows(int totalTableCount, int totalRegionCount)
-      throws Exception {
     try (Connection connection = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration())) {
       Admin admin = connection.getAdmin();
       admin.enableTable(table);
@@ -147,10 +85,10 @@ public class TestOfflineMetaRebuildBase extends OfflineMetaRebuildTestCore {
       LOG.info("No more RIT in ZK, now doing final test verification");
 
       // everything is good again.
-      assertEquals(totalRegionCount, scanMeta());
+      assertEquals(5, scanMeta());
       HTableDescriptor[] htbls = admin.listTables();
       LOG.info("Tables present after restart: " + Arrays.toString(htbls));
-      assertEquals(totalTableCount, htbls.length);
+      assertEquals(1, htbls.length);
     }
 
     assertErrors(doFsck(conf, false), new ERROR_CODE[] {});