You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/09/08 14:51:34 UTC

svn commit: r1382294 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/ test/java/org/apache/hadoop/hbase/util/

Author: mbautin
Date: Sat Sep  8 12:51:34 2012
New Revision: 1382294

URL: http://svn.apache.org/viewvc?rev=1382294&view=rev
Log:
[HBASE-6741] [0.89-fb] Detect duplicate assignments more accurately.

Author: aaiyer

Summary:
We have seen cases where multiple notifications from ZK about a
region being OPENED at a particular RS, results in the master
believing that the region is duplicately assigned -- even though
both the notifications mention the same RS (in the data).

There are two ways to fix this:
 - detect and fix why we are seeing multiple notifications, and/or
 - fix the duplicate assignment logic to handle multiple notifications
 from the same server.

 This diff does the latter. Whenever a region is opened, we cache the location
of where the region is opened. If we get a second notification for region
being opened at the same server, then we will not consider it to be a
duplicate notification.

Test Plan:
Add a test using injection handler framework to duplicate the
region opened events.

Reviewers: kannan, kranganathan, mbautin

Reviewed By: mbautin

CC: hbase-eng@, pkhemani

Differential Revision: https://phabricator.fb.com/D565749

Added:
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java
Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Sat Sep  8 12:51:34 2012
@@ -33,6 +33,7 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
@@ -114,6 +115,19 @@ public class RegionManager {
    final SortedMap<String, RegionState> regionsInTransition =
     Collections.synchronizedSortedMap(new TreeMap<String, RegionState>());
    
+   /** Serves as a cache for locating where a particular region is open.
+    * Currently being used to detect legitmate duplicate assignments from
+    * spurious ones, that may seem to occur if a ZK notification is received
+    * twice.
+    *
+    * maps regionName --> serverName
+    *
+    * Note: This is a temporary hack. Should be safe to remove once we get
+    * rid of duplicate notifications from ZK.
+    */
+   final ConcurrentMap<String, String> regionLocationHintToDetectDupAssignment =
+       new ConcurrentHashMap<String, String>();
+
    // regions in transition are also recorded in ZK using the zk wrapper
    final ZooKeeperWrapper zkWrapper;
 
@@ -1057,8 +1071,8 @@ public class RegionManager {
     int prefixlen = META_REGION_PREFIX.length;
     if (row.length > prefixlen &&
      Bytes.compareTo(META_REGION_PREFIX, 0, prefixlen, row, 0, prefixlen) == 0) {
-    	return new MetaRegion(this.master.getRegionManager().getRootRegionLocation(),
-    	  HRegionInfo.ROOT_REGIONINFO);
+      return new MetaRegion(this.master.getRegionManager().getRootRegionLocation(),
+        HRegionInfo.ROOT_REGIONINFO);
     }
     return this.onlineMetaRegions.floorEntry(row).getValue();
   }
@@ -1442,12 +1456,24 @@ public class RegionManager {
       if (s != null) {
         s.setOpen();
         this.master.getMetrics().incRegionsOpened();
+        this.regionLocationHintToDetectDupAssignment.put(regionName, s.serverName);
       }
     }
 
   }
 
   /**
+   * Check if the region was last opened at the particular server
+   * @param regionName
+   * @param serverName
+   * @return true if regionName was last opened at serverName
+   */
+  public boolean lastOpenedAt(String regionName, String serverName) {
+    String openAt = this.regionLocationHintToDetectDupAssignment.get(regionName);
+    return openAt != null && openAt.equals(serverName);
+  }
+
+  /**
    * @param regionName
    * @return true if region is marked to be offlined.
    */

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Sat Sep  8 12:51:34 2012
@@ -745,6 +745,15 @@ public class ServerManager {
             // A duplicate report from the correct server
             return;
           }
+
+          // Do not consider this a duplicate assignment if we are getting a notification
+          // for the same server twice
+          if (regionManager.lastOpenedAt(region.getRegionNameAsString(),
+              serverInfo.getServerName())) {
+            LOG.warn("Multiple REGION_OPENED notifications for region: " + region + " opened on " +
+              serverInfo);
+            return;
+          }
           duplicateAssignment = true;
         }
       }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java Sat Sep  8 12:51:34 2012
@@ -34,6 +34,8 @@ import org.apache.hadoop.hbase.util.Para
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.hbase.zookeeper.ZNodeEventData;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -167,8 +169,8 @@ public class ZKUnassignedWatcher impleme
     handleRegionStateInZK(eventType, zNodePath, data, true);
   }
 
-  void handleRegionStateInZK(EventType eventType, String zNodePath, byte[] data, boolean canDefer)
-      throws IOException {
+  public void handleRegionStateInZK(EventType eventType, String zNodePath,
+      byte[] data, boolean canDefer) throws IOException {
     // a null value is set when a node is created, we don't need to handle this
     if(data == null) {
       return;
@@ -214,6 +216,12 @@ public class ZKUnassignedWatcher impleme
             rsEvent == HBaseEventType.RS2ZK_REGION_OPENING) {
       new MasterOpenRegionHandler(rsEvent, serverManager, serverName, region,
           data).submit();
+
+      // For testing purposes
+      if (rsEvent == HBaseEventType.RS2ZK_REGION_OPENED) {
+        InjectionHandler.processEvent(InjectionEvent.ZKUNASSIGNEDWATCHER_REGION_OPENED,
+            this, eventType, zNodePath, data);
+      }
     }
   }
 

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java Sat Sep  8 12:51:34 2012
@@ -31,5 +31,6 @@ public enum InjectionEvent {
   HMASTER_DELETE_TABLE,
   HMASTER_ALTER_TABLE,
   HMASTER_ENABLE_TABLE,
-  HMASTER_DISABLE_TABLE
+  HMASTER_DISABLE_TABLE,
+  ZKUNASSIGNEDWATCHER_REGION_OPENED
 }

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1382294&r1=1382293&r2=1382294&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Sat Sep  8 12:51:34 2012
@@ -32,6 +32,9 @@ import org.apache.hadoop.hbase.client.Pu
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.DuplicateZKNotificationInjectionHandler;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 
 /**
@@ -116,6 +119,26 @@ public class TestRegionRebalancing exten
     assertRegionsAreBalanced();
   }
 
+  /**
+   * Make sure we can handle duplicate notifications for region
+   * being opened. Same as testRebalancing -- but we will duplicate
+   * some of the notifications.
+   *
+   * @throws IOException
+   */
+  public void testRebalancingWithDuplicateNotification() throws IOException {
+    DuplicateZKNotificationInjectionHandler duplicator =
+      new DuplicateZKNotificationInjectionHandler();
+    duplicator.setProbability(0.05);
+    duplicator.duplicateEvent(InjectionEvent.ZKUNASSIGNEDWATCHER_REGION_OPENED);
+    InjectionHandler.set(duplicator);
+
+    testRebalancing();
+
+    // make sure that some events did get duplicated.
+    assertTrue(duplicator.getDuplicatedEventCnt() > 0);
+  }
+
   private void checkingServerStatus() {
     List<HRegionServer> servers = getOnlineRegionServers();
     double avg = cluster.getMaster().getAverageLoad();

Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java?rev=1382294&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/DuplicateZKNotificationInjectionHandler.java Sat Sep  8 12:51:34 2012
@@ -0,0 +1,100 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.ZKUnassignedWatcher;
+import org.apache.zookeeper.Watcher.Event.EventType;
+
+/**
+ * Duplicates the ZKNotifications for Region opened events,
+ * with a given probability.
+ */
+public class DuplicateZKNotificationInjectionHandler extends InjectionHandler {
+
+  private static final Log LOG =
+    LogFactory.getLog(DuplicateZKNotificationInjectionHandler.class);
+  private Collection<InjectionEvent> events;
+
+  private double probability;
+  private Random rand;
+  private long duplicatedEventCnt;
+
+  public double getProbability() {
+    return probability;
+  }
+
+  public void setProbability(double d) {
+    this.probability = d;
+  }
+
+  public DuplicateZKNotificationInjectionHandler() {
+    this(23434234);
+  }
+
+  public DuplicateZKNotificationInjectionHandler(long seed) {
+    events = new ArrayList<InjectionEvent>();
+    LOG.info("Using DuplicateZKNotificationInjectionHandler  with seed " + seed);
+    rand = new Random(seed);
+    duplicatedEventCnt = 0;
+  }
+
+  @Override
+  protected void _processEvent(InjectionEvent event, Object... args) {
+    if (events.contains(event)) {
+      if (rand.nextDouble() < probability) {
+        // let us duplicate the processing
+        duplicatedEventCnt++;
+        LOG.info("Duplicating event " + event + " for " + args);
+        ZKUnassignedWatcher zk = (ZKUnassignedWatcher) args[0];
+        EventType eventType = (EventType) args[1];
+        String path = (String) args[2];
+        byte [] data = (byte[]) args[3];
+        try {
+          zk.handleRegionStateInZK(eventType, path, data, true);
+        } catch (IOException e) {
+          LOG.error("Caught exception handling ZK Event", e);
+          e.printStackTrace();
+        }
+      } else {
+        LOG.debug("Not duplicating event " + event + " for " + args);
+      }
+    } else {
+        LOG.warn("Unexpected event " + event + " for " + args);
+    }
+  }
+
+  public long getDuplicatedEventCnt() {
+    return duplicatedEventCnt;
+  }
+
+  public void duplicateEvent(InjectionEvent event) {
+    events.add(event);
+  }
+
+}