You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/09/08 14:51:34 UTC
svn commit: r1382294 - in /hbase/branches/0.89-fb/src:
main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/
test/java/org/apache/hadoop/hbase/util/
Author: mbautin
Date: Sat Sep 8 12:51:34 2012
New Revision: 1382294
URL: http://svn.apache.org/viewvc?rev=1382294&view=rev
Log:
[HBASE-6741] [0.89-fb] Detect duplicate assignments more accurately.
Author: aaiyer
Summary:
We have seen cases where multiple notifications from ZK about a
region being OPENED at a particular RS, results in the master
believing that the region is duplicately assigned -- even though
both the notifications mention the same RS (in the data).
There are two ways to fix this:
- detect and fix why we are seeing multiple notifications, and/or
- fix the duplicate assignment logic to handle multiple notifications
from the same server.
This diff does the latter. Whenever a region is opened, we cache the location
of where the region is opened. If we get a second notification for region
being opened at the same server, then we will not consider it to be a
duplicate notification.
Test Plan:
Add a test using injection handler framework to duplicate the
region opened events.
Reviewers: kannan, kranganathan, mbautin
Reviewed By: mbautin
CC: hbase-eng@, pkhemani
Differential Revision: https://phabricator.fb.com/D565749
Added:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Sat Sep 8 12:51:34 2012
@@ -33,6 +33,7 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
@@ -114,6 +115,19 @@ public class RegionManager {
final SortedMap<String, RegionState> regionsInTransition =
Collections.synchronizedSortedMap(new TreeMap<String, RegionState>());
+ /** Serves as a cache for locating where a particular region is open.
+ * Currently being used to detect legitmate duplicate assignments from
+ * spurious ones, that may seem to occur if a ZK notification is received
+ * twice.
+ *
+ * maps regionName --> serverName
+ *
+ * Note: This is a temporary hack. Should be safe to remove once we get
+ * rid of duplicate notifications from ZK.
+ */
+ final ConcurrentMap<String, String> regionLocationHintToDetectDupAssignment =
+ new ConcurrentHashMap<String, String>();
+
// regions in transition are also recorded in ZK using the zk wrapper
final ZooKeeperWrapper zkWrapper;
@@ -1057,8 +1071,8 @@ public class RegionManager {
int prefixlen = META_REGION_PREFIX.length;
if (row.length > prefixlen &&
Bytes.compareTo(META_REGION_PREFIX, 0, prefixlen, row, 0, prefixlen) == 0) {
- return new MetaRegion(this.master.getRegionManager().getRootRegionLocation(),
- HRegionInfo.ROOT_REGIONINFO);
+ return new MetaRegion(this.master.getRegionManager().getRootRegionLocation(),
+ HRegionInfo.ROOT_REGIONINFO);
}
return this.onlineMetaRegions.floorEntry(row).getValue();
}
@@ -1442,12 +1456,24 @@ public class RegionManager {
if (s != null) {
s.setOpen();
this.master.getMetrics().incRegionsOpened();
+ this.regionLocationHintToDetectDupAssignment.put(regionName, s.serverName);
}
}
}
/**
+ * Check if the region was last opened at the particular server
+ * @param regionName
+ * @param serverName
+ * @return true if regionName was last opened at serverName
+ */
+ public boolean lastOpenedAt(String regionName, String serverName) {
+ String openAt = this.regionLocationHintToDetectDupAssignment.get(regionName);
+ return openAt != null && openAt.equals(serverName);
+ }
+
+ /**
* @param regionName
* @return true if region is marked to be offlined.
*/
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Sat Sep 8 12:51:34 2012
@@ -745,6 +745,15 @@ public class ServerManager {
// A duplicate report from the correct server
return;
}
+
+ // Do not consider this a duplicate assignment if we are getting a notification
+ // for the same server twice
+ if (regionManager.lastOpenedAt(region.getRegionNameAsString(),
+ serverInfo.getServerName())) {
+ LOG.warn("Multiple REGION_OPENED notifications for region: " + region + " opened on " +
+ serverInfo);
+ return;
+ }
duplicateAssignment = true;
}
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java Sat Sep 8 12:51:34 2012
@@ -34,6 +34,8 @@ import org.apache.hadoop.hbase.util.Para
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.zookeeper.ZNodeEventData;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
@@ -167,8 +169,8 @@ public class ZKUnassignedWatcher impleme
handleRegionStateInZK(eventType, zNodePath, data, true);
}
- void handleRegionStateInZK(EventType eventType, String zNodePath, byte[] data, boolean canDefer)
- throws IOException {
+ public void handleRegionStateInZK(EventType eventType, String zNodePath,
+ byte[] data, boolean canDefer) throws IOException {
// a null value is set when a node is created, we don't need to handle this
if(data == null) {
return;
@@ -214,6 +216,12 @@ public class ZKUnassignedWatcher impleme
rsEvent == HBaseEventType.RS2ZK_REGION_OPENING) {
new MasterOpenRegionHandler(rsEvent, serverManager, serverName, region,
data).submit();
+
+ // For testing purposes
+ if (rsEvent == HBaseEventType.RS2ZK_REGION_OPENED) {
+ InjectionHandler.processEvent(InjectionEvent.ZKUNASSIGNEDWATCHER_REGION_OPENED,
+ this, eventType, zNodePath, data);
+ }
}
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java Sat Sep 8 12:51:34 2012
@@ -31,5 +31,6 @@ public enum InjectionEvent {
HMASTER_DELETE_TABLE,
HMASTER_ALTER_TABLE,
HMASTER_ENABLE_TABLE,
- HMASTER_DISABLE_TABLE
+ HMASTER_DISABLE_TABLE,
+ ZKUNASSIGNEDWATCHER_REGION_OPENED
}
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Sat Sep 8 12:51:34 2012
@@ -32,6 +32,9 @@ import org.apache.hadoop.hbase.client.Pu
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.DuplicateZKNotificationInjectionHandler;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
/**
@@ -116,6 +119,26 @@ public class TestRegionRebalancing exten
assertRegionsAreBalanced();
}
+ /**
+ * Make sure we can handle duplicate notifications for region
+ * being opened. Same as testRebalancing -- but we will duplicate
+ * some of the notifications.
+ *
+ * @throws IOException
+ */
+ public void testRebalancingWithDuplicateNotification() throws IOException {
+ DuplicateZKNotificationInjectionHandler duplicator =
+ new DuplicateZKNotificationInjectionHandler();
+ duplicator.setProbability(0.05);
+ duplicator.duplicateEvent(InjectionEvent.ZKUNASSIGNEDWATCHER_REGION_OPENED);
+ InjectionHandler.set(duplicator);
+
+ testRebalancing();
+
+ // make sure that some events did get duplicated.
+ assertTrue(duplicator.getDuplicatedEventCnt() > 0);
+ }
+
private void checkingServerStatus() {
List<HRegionServer> servers = getOnlineRegionServers();
double avg = cluster.getMaster().getAverageLoad();
Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java?rev=1382294&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java Sat Sep 8 12:51:34 2012
@@ -0,0 +1,100 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.ZKUnassignedWatcher;
+import org.apache.zookeeper.Watcher.Event.EventType;
+
+/**
+ * Duplicates the ZKNotifications for Region opened events,
+ * with a given probability.
+ */
+public class DuplicateZKNotificationInjectionHandler extends InjectionHandler {
+
+ private static final Log LOG =
+ LogFactory.getLog(DuplicateZKNotificationInjectionHandler.class);
+ private Collection<InjectionEvent> events;
+
+ private double probability;
+ private Random rand;
+ private long duplicatedEventCnt;
+
+ public double getProbability() {
+ return probability;
+ }
+
+ public void setProbability(double d) {
+ this.probability = d;
+ }
+
+ public DuplicateZKNotificationInjectionHandler() {
+ this(23434234);
+ }
+
+ public DuplicateZKNotificationInjectionHandler(long seed) {
+ events = new ArrayList<InjectionEvent>();
+ LOG.info("Using DuplicateZKNotificationInjectionHandler with seed " + seed);
+ rand = new Random(seed);
+ duplicatedEventCnt = 0;
+ }
+
+ @Override
+ protected void _processEvent(InjectionEvent event, Object... args) {
+ if (events.contains(event)) {
+ if (rand.nextDouble() < probability) {
+ // let us duplicate the processing
+ duplicatedEventCnt++;
+ LOG.info("Duplicating event " + event + " for " + args);
+ ZKUnassignedWatcher zk = (ZKUnassignedWatcher) args[0];
+ EventType eventType = (EventType) args[1];
+ String path = (String) args[2];
+ byte [] data = (byte[]) args[3];
+ try {
+ zk.handleRegionStateInZK(eventType, path, data, true);
+ } catch (IOException e) {
+ LOG.error("Caught exception handling ZK Event", e);
+ e.printStackTrace();
+ }
+ } else {
+ LOG.debug("Not duplicating event " + event + " for " + args);
+ }
+ } else {
+ LOG.warn("Unexpected event " + event + " for " + args);
+ }
+ }
+
+ public long getDuplicatedEventCnt() {
+ return duplicatedEventCnt;
+ }
+
+ public void duplicateEvent(InjectionEvent event) {
+ events.add(event);
+ }
+
+}