You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/10/06 22:10:08 UTC

svn commit: r1179809 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/master/ src/test/java/org/apache/hadoop/hbase/master/

Author: stack
Date: Thu Oct  6 20:10:08 2011
New Revision: 1179809

URL: http://svn.apache.org/viewvc?rev=1179809&view=rev
Log:
HBASE-4402 Retaining locality after restart broken

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1179809&r1=1179808&r2=1179809&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Thu Oct  6 20:10:08 2011
@@ -341,6 +341,7 @@ Release 0.92.0 - Unreleased
                (Kay Kay)
    HBASE-4481  TestMergeTool failed in 0.92 build 20
    HBASE-4386  Fix a potential NPE in TaskMonitor (todd)
+   HBASE-4402  Retaining locality after restart broken
 
   TESTS
    HBASE-4450  test for number of blocks read: to serve as baseline for expected

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1179809&r1=1179808&r2=1179809&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Oct  6 20:10:08 2011
@@ -620,7 +620,7 @@ public class AssignmentManager extends Z
           (System.currentTimeMillis() - 15000);
       LOG.debug("Handling transition=" + data.getEventType() +
         ", server=" + data.getOrigin() + ", region=" +
-          prettyPrintedRegionName +
+          (prettyPrintedRegionName == null? "null": prettyPrintedRegionName)  +
           (lateEvent? ", which is more than 15 seconds late" : ""));
       RegionState regionState = regionsInTransition.get(encodedName);
       switch (data.getEventType()) {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java?rev=1179809&r1=1179808&r2=1179809&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java Thu Oct  6 20:10:08 2011
@@ -31,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Random;
+import java.util.Set;
 import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
@@ -46,7 +47,10 @@ import org.apache.hadoop.hbase.TableExis
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.util.Bytes;
 
+import com.google.common.base.Joiner;
+import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.MinMaxPriorityQueue;
+import com.google.common.collect.Sets;
 
 /**
  * Makes decisions about the placement and movement of Regions across
@@ -576,20 +580,71 @@ public class DefaultLoadBalancer impleme
    */
   public Map<ServerName, List<HRegionInfo>> retainAssignment(
       Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
+    // Group all of the old assignments by their hostname.
+    // We can't group directly by ServerName since the servers all have
+    // new start-codes.
+    
+    // Group the servers by their hostname. It's possible we have multiple
+    // servers on the same host on different ports.
+    ArrayListMultimap<String, ServerName> serversByHostname =
+        ArrayListMultimap.create();
+    for (ServerName server : servers) {
+      serversByHostname.put(server.getHostname(), server);
+    }
+    
+    // Now come up with new assignments
     Map<ServerName, List<HRegionInfo>> assignments =
       new TreeMap<ServerName, List<HRegionInfo>>();
+    
     for (ServerName server : servers) {
       assignments.put(server, new ArrayList<HRegionInfo>());
     }
-    for (Map.Entry<HRegionInfo, ServerName> region : regions.entrySet()) {
-      ServerName sn = region.getValue();
-      if (sn != null && servers.contains(sn)) {
-        assignments.get(sn).add(region.getKey());
+    
+    // Collection of the hostnames that used to have regions
+    // assigned, but for which we no longer have any RS running
+    // after the cluster restart.
+    Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
+    
+    int numRandomAssignments = 0;
+    int numRetainedAssigments = 0;
+    for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
+      HRegionInfo region = entry.getKey();
+      ServerName oldServerName = entry.getValue();
+      List<ServerName> localServers = new ArrayList<ServerName>();
+      if (oldServerName != null) {
+        localServers = serversByHostname.get(oldServerName.getHostname());
+      }
+      if (localServers.isEmpty()) {
+        // No servers on the new cluster match up with this hostname,
+        // assign randomly.
+        ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
+        assignments.get(randomServer).add(region);
+        numRandomAssignments++;
+        if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
+      } else if (localServers.size() == 1) {
+        // the usual case - one new server on same host
+        assignments.get(localServers.get(0)).add(region);
+        numRetainedAssigments++;
       } else {
-        int size = assignments.size();
-        assignments.get(servers.get(RANDOM.nextInt(size))).add(region.getKey());
+        // multiple new servers in the cluster on this same host
+        int size = localServers.size();
+        ServerName target = localServers.get(RANDOM.nextInt(size));
+        assignments.get(target).add(region);
+        numRetainedAssigments++;
       }
     }
+    
+    String randomAssignMsg = "";
+    if (numRandomAssignments > 0) {
+      randomAssignMsg = numRandomAssignments + " regions were assigned " +
+      		"to random hosts, since the old hosts for these regions are no " +
+      		"longer present in the cluster. These hosts were:\n  " +
+          Joiner.on("\n  ").join(oldHostsNoLongerPresent);
+    }
+    
+    LOG.info("Reassigned " + regions.size() + " regions. " +
+        numRetainedAssigments + " retained the pre-restart assignment. " +
+        randomAssignMsg);
     return assignments;
   }
 

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java?rev=1179809&r1=1179808&r2=1179809&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java Thu Oct  6 20:10:08 2011
@@ -277,7 +277,12 @@ public class TestDefaultLoadBalancer {
     Map<HRegionInfo, ServerName> existing =
       new TreeMap<HRegionInfo, ServerName>();
     for (int i = 0; i < regions.size(); i++) {
-      existing.put(regions.get(i), servers.get(i % servers.size()).getServerName());
+      ServerName sn = servers.get(i % servers.size()).getServerName();
+      // The old server would have had same host and port, but different
+      // start code!
+      ServerName snWithOldStartCode =
+        new ServerName(sn.getHostname(), sn.getPort(), sn.getStartcode() - 10);
+      existing.put(regions.get(i), snWithOldStartCode);
     }
     List<ServerName> listOfServerNames = getListOfServerNames(servers);
     Map<ServerName, List<HRegionInfo>> assignment =
@@ -296,9 +301,9 @@ public class TestDefaultLoadBalancer {
     // Remove two of the servers that were previously there
     List<ServerAndLoad> servers3 =
       new ArrayList<ServerAndLoad>(servers);
-    servers3.remove(servers3.size()-1);
-    servers3.remove(servers3.size()-2);
-    listOfServerNames = getListOfServerNames(servers2);
+    servers3.remove(0);
+    servers3.remove(0);
+    listOfServerNames = getListOfServerNames(servers3);
     assignment = loadBalancer.retainAssignment(existing, listOfServerNames);
     assertRetainedAssignment(existing, listOfServerNames, assignment);
   }
@@ -338,13 +343,20 @@ public class TestDefaultLoadBalancer {
     assertEquals(existing.size(), assignedRegions.size());
 
     // Verify condition 2, if server had existing assignment, must have same
-    Set<ServerName> onlineAddresses = new TreeSet<ServerName>();
-    for (ServerName s : servers) onlineAddresses.add(s);
+    Set<String> onlineHostNames = new TreeSet<String>();
+    for (ServerName s : servers) {
+      onlineHostNames.add(s.getHostname());
+    }
+    
     for (Map.Entry<ServerName, List<HRegionInfo>> a : assignment.entrySet()) {
+      ServerName assignedTo = a.getKey();
       for (HRegionInfo r : a.getValue()) {
         ServerName address = existing.get(r);
-        if (address != null && onlineAddresses.contains(address)) {
-          assertTrue(a.getKey().equals(address));
+        if (address != null && onlineHostNames.contains(address.getHostname())) {
+          // this region was prevously assigned somewhere, and that
+          // host is still around, then it should be re-assigned on the
+          // same host
+          assertEquals(address.getHostname(), assignedTo.getHostname());
         }
       }
     }
@@ -470,7 +482,7 @@ public class TestDefaultLoadBalancer {
       ServerName sn = this.serverQueue.poll();
       return new ServerAndLoad(sn, numRegionsPerServer);
     }
-    String host = "127.0.0.1";
+    String host = "server" + rand.nextInt(100000);
     int port = rand.nextInt(60000);
     long startCode = rand.nextLong();
     ServerName sn = new ServerName(host, port, startCode);