You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/10/06 22:08:17 UTC
svn commit: r1179806 - in /hbase/branches/0.92: ./
src/main/java/org/apache/hadoop/hbase/master/
src/test/java/org/apache/hadoop/hbase/master/
Author: stack
Date: Thu Oct 6 20:08:16 2011
New Revision: 1179806
URL: http://svn.apache.org/viewvc?rev=1179806&view=rev
Log:
HBASE-4402 Retaining locality after restart broken
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java
Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1179806&r1=1179805&r2=1179806&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Thu Oct 6 20:08:16 2011
@@ -324,6 +324,7 @@ Release 0.92.0 - Unreleased
(Kay Kay)
HBASE-4481 TestMergeTool failed in 0.92 build 20
HBASE-4386 Fix a potential NPE in TaskMonitor (todd)
+ HBASE-4402 Retaining locality after restart broken
TESTS
HBASE-4492 TestRollingRestart fails intermittently
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1179806&r1=1179805&r2=1179806&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Oct 6 20:08:16 2011
@@ -620,7 +620,7 @@ public class AssignmentManager extends Z
(System.currentTimeMillis() - 15000);
LOG.debug("Handling transition=" + data.getEventType() +
", server=" + data.getOrigin() + ", region=" +
- prettyPrintedRegionName +
+ (prettyPrintedRegionName == null? "null": prettyPrintedRegionName) +
(lateEvent? ", which is more than 15 seconds late" : ""));
RegionState regionState = regionsInTransition.get(encodedName);
switch (data.getEventType()) {
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java?rev=1179806&r1=1179805&r2=1179806&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java Thu Oct 6 20:08:16 2011
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
+import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
@@ -46,7 +47,10 @@ import org.apache.hadoop.hbase.TableExis
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.MinMaxPriorityQueue;
+import com.google.common.collect.Sets;
/**
* Makes decisions about the placement and movement of Regions across
@@ -576,20 +580,71 @@ public class DefaultLoadBalancer impleme
*/
public Map<ServerName, List<HRegionInfo>> retainAssignment(
Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
+ // Group all of the old assignments by their hostname.
+ // We can't group directly by ServerName since the servers all have
+ // new start-codes.
+
+ // Group the servers by their hostname. It's possible we have multiple
+ // servers on the same host on different ports.
+ ArrayListMultimap<String, ServerName> serversByHostname =
+ ArrayListMultimap.create();
+ for (ServerName server : servers) {
+ serversByHostname.put(server.getHostname(), server);
+ }
+
+ // Now come up with new assignments
Map<ServerName, List<HRegionInfo>> assignments =
new TreeMap<ServerName, List<HRegionInfo>>();
+
for (ServerName server : servers) {
assignments.put(server, new ArrayList<HRegionInfo>());
}
- for (Map.Entry<HRegionInfo, ServerName> region : regions.entrySet()) {
- ServerName sn = region.getValue();
- if (sn != null && servers.contains(sn)) {
- assignments.get(sn).add(region.getKey());
+
+ // Collection of the hostnames that used to have regions
+ // assigned, but for which we no longer have any RS running
+ // after the cluster restart.
+ Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
+
+ int numRandomAssignments = 0;
+ int numRetainedAssigments = 0;
+ for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
+ HRegionInfo region = entry.getKey();
+ ServerName oldServerName = entry.getValue();
+ List<ServerName> localServers = new ArrayList<ServerName>();
+ if (oldServerName != null) {
+ localServers = serversByHostname.get(oldServerName.getHostname());
+ }
+ if (localServers.isEmpty()) {
+ // No servers on the new cluster match up with this hostname,
+ // assign randomly.
+ ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
+ assignments.get(randomServer).add(region);
+ numRandomAssignments++;
+ if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
+ } else if (localServers.size() == 1) {
+ // the usual case - one new server on same host
+ assignments.get(localServers.get(0)).add(region);
+ numRetainedAssigments++;
} else {
- int size = assignments.size();
- assignments.get(servers.get(RANDOM.nextInt(size))).add(region.getKey());
+ // multiple new servers in the cluster on this same host
+ int size = localServers.size();
+ ServerName target = localServers.get(RANDOM.nextInt(size));
+ assignments.get(target).add(region);
+ numRetainedAssigments++;
}
}
+
+ String randomAssignMsg = "";
+ if (numRandomAssignments > 0) {
+ randomAssignMsg = numRandomAssignments + " regions were assigned " +
+ "to random hosts, since the old hosts for these regions are no " +
+ "longer present in the cluster. These hosts were:\n " +
+ Joiner.on("\n ").join(oldHostsNoLongerPresent);
+ }
+
+ LOG.info("Reassigned " + regions.size() + " regions. " +
+ numRetainedAssigments + " retained the pre-restart assignment. " +
+ randomAssignMsg);
return assignments;
}
Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java?rev=1179806&r1=1179805&r2=1179806&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java Thu Oct 6 20:08:16 2011
@@ -277,7 +277,12 @@ public class TestDefaultLoadBalancer {
Map<HRegionInfo, ServerName> existing =
new TreeMap<HRegionInfo, ServerName>();
for (int i = 0; i < regions.size(); i++) {
- existing.put(regions.get(i), servers.get(i % servers.size()).getServerName());
+ ServerName sn = servers.get(i % servers.size()).getServerName();
+ // The old server would have had same host and port, but different
+ // start code!
+ ServerName snWithOldStartCode =
+ new ServerName(sn.getHostname(), sn.getPort(), sn.getStartcode() - 10);
+ existing.put(regions.get(i), snWithOldStartCode);
}
List<ServerName> listOfServerNames = getListOfServerNames(servers);
Map<ServerName, List<HRegionInfo>> assignment =
@@ -296,9 +301,9 @@ public class TestDefaultLoadBalancer {
// Remove two of the servers that were previously there
List<ServerAndLoad> servers3 =
new ArrayList<ServerAndLoad>(servers);
- servers3.remove(servers3.size()-1);
- servers3.remove(servers3.size()-2);
- listOfServerNames = getListOfServerNames(servers2);
+ servers3.remove(0);
+ servers3.remove(0);
+ listOfServerNames = getListOfServerNames(servers3);
assignment = loadBalancer.retainAssignment(existing, listOfServerNames);
assertRetainedAssignment(existing, listOfServerNames, assignment);
}
@@ -338,13 +343,20 @@ public class TestDefaultLoadBalancer {
assertEquals(existing.size(), assignedRegions.size());
// Verify condition 2, if server had existing assignment, must have same
- Set<ServerName> onlineAddresses = new TreeSet<ServerName>();
- for (ServerName s : servers) onlineAddresses.add(s);
+ Set<String> onlineHostNames = new TreeSet<String>();
+ for (ServerName s : servers) {
+ onlineHostNames.add(s.getHostname());
+ }
+
for (Map.Entry<ServerName, List<HRegionInfo>> a : assignment.entrySet()) {
+ ServerName assignedTo = a.getKey();
for (HRegionInfo r : a.getValue()) {
ServerName address = existing.get(r);
- if (address != null && onlineAddresses.contains(address)) {
- assertTrue(a.getKey().equals(address));
+ if (address != null && onlineHostNames.contains(address.getHostname())) {
+ // this region was prevously assigned somewhere, and that
+ // host is still around, then it should be re-assigned on the
+ // same host
+ assertEquals(address.getHostname(), assignedTo.getHostname());
}
}
}
@@ -470,7 +482,7 @@ public class TestDefaultLoadBalancer {
ServerName sn = this.serverQueue.poll();
return new ServerAndLoad(sn, numRegionsPerServer);
}
- String host = "127.0.0.1";
+ String host = "server" + rand.nextInt(100000);
int port = rand.nextInt(60000);
long startCode = rand.nextLong();
ServerName sn = new ServerName(host, port, startCode);