You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:02:59 UTC
svn commit: r1181373 - in
/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase: client/ master/
regionserver/ replication/ zookeeper/
Author: nspiegelberg
Date: Tue Oct 11 02:02:57 2011
New Revision: 1181373
URL: http://svn.apache.org/viewvc?rev=1181373&view=rev
Log:
Cleanup ZK usage in internal hbase branch
Summary:
We need to abort when we get session expiration.
This is a first cut, need to go back over list of what we wanted to fix. Not
meant for commit yet.
I still need to add fix where we delete the unassigned node after close of
disabled region.
Test Plan:
I think this breaks unit tests.
DiffCamp Revision: 152879
Reviewed By: kannan
Commenters: kranganathan
CC: jgray, kannan, kranganathan, hbase@lists
Revert Plan:
OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeperWrapper.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue Oct 11 02:02:57 2011
@@ -380,16 +380,18 @@ public class HConnectionManager {
tries++) {
try {
- masterLocation = zk.readMasterAddressOrThrow();
+ masterLocation = zk.readMasterAddress(zk);
- HMasterInterface tryMaster = (HMasterInterface)HBaseRPC.getProxy(
- HMasterInterface.class, HBaseRPCProtocolVersion.versionID,
- masterLocation.getInetSocketAddress(), this.conf);
-
- if (tryMaster.isMasterRunning()) {
- this.master = tryMaster;
- this.masterLock.notifyAll();
- break;
+ if (masterLocation != null) {
+ HMasterInterface tryMaster = (HMasterInterface)HBaseRPC.getProxy(
+ HMasterInterface.class, HBaseRPCProtocolVersion.versionID,
+ masterLocation.getInetSocketAddress(), this.conf);
+
+ if (tryMaster.isMasterRunning()) {
+ this.master = tryMaster;
+ this.masterLock.notifyAll();
+ break;
+ }
}
} catch (IOException e) {
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 11 02:02:57 2011
@@ -66,13 +66,12 @@ import org.apache.hadoop.hbase.RemoteExc
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.MetaScanner;
-import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ServerConnection;
import org.apache.hadoop.hbase.client.ServerConnectionManager;
-import org.apache.hadoop.hbase.executor.HBaseEventHandler;
+import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.executor.HBaseExecutorService;
import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
@@ -101,8 +100,6 @@ import org.apache.hadoop.ipc.RemoteExcep
import org.apache.hadoop.net.DNS;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.Watcher.Event.EventType;
-import org.apache.zookeeper.Watcher.Event.KeeperState;
import com.google.common.collect.Lists;
@@ -1182,41 +1179,7 @@ public class HMaster extends Thread impl
*/
@Override
public void process(WatchedEvent event) {
- LOG.debug("Event " + event.getType() +
- " with state " + event.getState() +
- " with path " + event.getPath());
- // Master should kill itself if its session expired or if its
- // znode was deleted manually (usually for testing purposes)
- if(event.getState() == KeeperState.Expired ||
- (event.getType().equals(EventType.NodeDeleted) &&
- event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
- !shutdownRequested.get()) {
-
- LOG.info("Master lost its znode, trying to get a new one");
-
- // Can we still be the master? If not, goodbye
-
- zooKeeperWrapper.close();
- try {
- zooKeeperWrapper =
- ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
- zooKeeperWrapper.registerListener(this);
- this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
- if(!this.zkMasterAddressWatcher.
- writeAddressToZooKeeper(this.address,false)) {
- throw new Exception("Another Master is currently active");
- }
-
- // we are a failed over master, reset the fact that we started the
- // cluster
- resetClusterStartup();
- // Verify the cluster to see if anything happened while we were away
- joinCluster();
- } catch (Exception e) {
- LOG.error("Killing master because of", e);
- System.exit(1);
- }
- }
+ // no-op now
}
private static void printUsageAndExit() {
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 11 02:02:57 2011
@@ -950,6 +950,7 @@ public class RegionManager {
public void removeRegion(HRegionInfo info) {
synchronized (this.regionsInTransition) {
this.regionsInTransition.remove(info.getRegionNameAsString());
+ zkWrapper.deleteUnassignedRegion(info.getEncodedName());
}
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java Tue Oct 11 02:02:57 2011
@@ -61,20 +61,27 @@ class ZKMasterAddressWatcher implements
@Override
public synchronized void process (WatchedEvent event) {
EventType type = event.getType();
- LOG.debug(("Got event " + type + " with path " + event.getPath()));
if (type.equals(EventType.NodeDeleted)) {
if (event.getPath().equals(this.zookeeper.clusterStateZNode)) {
LOG.info("Cluster shutdown while waiting, shutting down" +
" this master.");
this.requestShutdown.set(true);
- } else {
- LOG.debug("Master address ZNode deleted, notifying waiting masters");
+ } else if (event.getPath().equals(this.zookeeper.masterElectionZNode)){
+ LOG.info("Master address ZNode deleted, notifying waiting masters");
notifyAll();
}
- } else if(type.equals(EventType.NodeCreated) &&
- event.getPath().equals(this.zookeeper.clusterStateZNode)) {
- LOG.debug("Resetting watch on cluster state node.");
- this.zookeeper.setClusterStateWatch();
+ } else if(type.equals(EventType.NodeCreated)) {
+ if (event.getPath().equals(this.zookeeper.clusterStateZNode)) {
+ LOG.info("Resetting watch on cluster state node.");
+ this.zookeeper.setClusterStateWatch();
+ } else if (event.getPath().equals(this.zookeeper.masterElectionZNode)) {
+ LOG.info("Master address ZNode created, check exists and reset watch");
+ if (!zookeeper.exists(zookeeper.masterElectionZNode, true)) {
+ LOG.debug("Got NodeCreated for master node but it does not exist now" +
+ ", notifying");
+ notifyAll();
+ }
+ }
}
}
@@ -83,7 +90,7 @@ class ZKMasterAddressWatcher implements
* blocks until the master address ZNode gets deleted.
*/
public synchronized void waitForMasterAddressAvailability() {
- while (zookeeper.readMasterAddress(this) != null) {
+ while (zookeeper.readMasterAddress(zookeeper) != null) {
try {
LOG.debug("Waiting for master address ZNode to be deleted " +
"(Also watching cluster state node)");
@@ -112,7 +119,7 @@ class ZKMasterAddressWatcher implements
this.zookeeper.setClusterState(true);
this.zookeeper.setClusterStateWatch();
// Watch our own node
- this.zookeeper.readMasterAddress(this);
+ this.zookeeper.readMasterAddress(zookeeper);
return true;
}
} while(retry);
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java Tue Oct 11 02:02:57 2011
@@ -92,10 +92,6 @@ public class ZKUnassignedWatcher impleme
@Override
public synchronized void process(WatchedEvent event) {
EventType type = event.getType();
- LOG.debug("ZK-EVENT-PROCESS: Got zkEvent " + type +
- " state:" + event.getState() +
- " path:" + event.getPath());
-
// Handle the ignored events
if(type.equals(EventType.None) ||
type.equals(EventType.NodeDeleted)) {
@@ -109,6 +105,9 @@ public class ZKUnassignedWatcher impleme
return;
}
+ LOG.debug("ZK-EVENT-PROCESS: Got zkEvent " + type +
+ " path:" + event.getPath());
+
try
{
/*
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Oct 11 02:02:57 2011
@@ -112,6 +112,7 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
+import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
@@ -322,18 +323,22 @@ public class HRegionServer implements HR
"hbase-958 debugging");
}
reinitializeThreads();
- reinitializeZooKeeper();
+ initializeZooKeeper();
int nbBlocks = conf.getInt("hbase.regionserver.nbreservationblocks", 4);
for(int i = 0; i < nbBlocks; i++) {
reservedSpace.add(new byte[HConstants.DEFAULT_SIZE_RESERVATION_BLOCK]);
}
}
- private void reinitializeZooKeeper() throws IOException {
+ private void initializeZooKeeper() throws IOException {
zooKeeperWrapper =
ZooKeeperWrapper.createInstance(conf, serverInfo.getServerName());
zooKeeperWrapper.registerListener(this);
- watchMasterAddress();
+ try {
+ zooKeeperWrapper.watchMasterAddress(zooKeeperWrapper);
+ } catch (KeeperException e) {
+ throw new IOException(e);
+ }
}
private void reinitializeThreads() {
@@ -371,8 +376,6 @@ public class HRegionServer implements HR
public void process(WatchedEvent event) {
EventType type = event.getType();
KeeperState state = event.getState();
- LOG.info("Got ZooKeeper event, state: " + state + ", type: " +
- type + ", path: " + event.getPath());
// Ignore events if we're shutting down.
if (this.stopRequested.get()) {
@@ -380,48 +383,34 @@ public class HRegionServer implements HR
return;
}
- if (state == KeeperState.Expired) {
- LOG.error("ZooKeeper session expired");
- boolean restart =
- this.conf.getBoolean("hbase.regionserver.restart.on.zk.expire", false);
- if (restart) {
- restart();
- } else {
- abort("ZooKeeper session expired");
- }
- } else if (type == EventType.NodeDeleted) {
- watchMasterAddress();
- } else if (type == EventType.NodeCreated) {
- getMaster();
+ if (!event.getPath().equals(zooKeeperWrapper.masterElectionZNode)) {
+ return;
+ }
- // ZooKeeper watches are one time only, so we need to re-register our watch.
- watchMasterAddress();
+ try {
+ if (type == EventType.NodeDeleted) {
+ handleMasterNodeDeleted();
+ } else if (type == EventType.NodeCreated) {
+ handleMasterNodeCreated();
+ }
+ } catch(KeeperException ke) {
+ LOG.error("KeeperException handling master failover", ke);
+ abort("ZooKeeper exception handling master failover");
}
}
- private void watchMasterAddress() {
- while (!stopRequested.get() && !zooKeeperWrapper.watchMasterAddress(this)) {
- LOG.warn("Unable to set watcher on ZooKeeper master address. Retrying.");
- sleeper.sleep();
+ private void handleMasterNodeDeleted() throws KeeperException {
+ if(zooKeeperWrapper.watchMasterAddress(zooKeeperWrapper)) {
+ handleMasterNodeCreated();
}
}
- private void restart() {
- abort("Restarting region server");
- Threads.shutdown(regionServerThread);
- boolean done = false;
- while (!done) {
- try {
- reinitialize();
- done = true;
- } catch (IOException e) {
- LOG.debug("Error trying to reinitialize ZooKeeper", e);
- }
+ private void handleMasterNodeCreated() throws KeeperException {
+ if(!zooKeeperWrapper.watchMasterAddress(zooKeeperWrapper)) {
+ handleMasterNodeDeleted();
+ } else {
+ getMaster();
}
- Thread t = new Thread(this);
- String name = regionServerThread.getName();
- t.setName(name);
- t.start();
}
/** @return ZooKeeperWrapper used by RegionServer. */
@@ -1204,11 +1193,11 @@ public class HRegionServer implements HR
return false;
}
try {
- masterAddress = zooKeeperWrapper.readMasterAddressOrThrow();
- } catch (IOException e) {
- LOG.warn("Unable to read master address from ZooKeeper. Retrying." +
- " Error was:", e);
- sleeper.sleep();
+ masterAddress = zooKeeperWrapper.readAddressOrThrow(
+ zooKeeperWrapper.masterElectionZNode, zooKeeperWrapper);
+ } catch (KeeperException e) {
+ LOG.warn("Unable to read master address from ZooKeeper.", e);
+ abort("Unable to read master address in ZK");
}
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeperWrapper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeperWrapper.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeperWrapper.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeperWrapper.java Tue Oct 11 02:02:57 2011
@@ -483,8 +483,8 @@ public class ReplicationZookeeperWrapper
@Override
public void process(WatchedEvent watchedEvent) {
Event.EventType type = watchedEvent.getType();
- LOG.info("Got event " + type + " with path " + watchedEvent.getPath());
if (type.equals(Event.EventType.NodeDataChanged)) {
+ LOG.info("Got event " + type + " with path " + watchedEvent.getPath());
setReplicating();
}
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=1181373&r1=1181372&r2=1181373&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Tue Oct 11 02:02:57 2011
@@ -55,6 +55,8 @@ import org.apache.zookeeper.KeeperExcept
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event.EventType;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooKeeper.States;
import org.apache.zookeeper.data.Stat;
@@ -127,7 +129,7 @@ public class ZooKeeperWrapper implements
/*
* ZNode used for election of the primary master when there are secondaries.
*/
- private final String masterElectionZNode;
+ public final String masterElectionZNode;
/*
* State of the cluster - if up and running or shutting down
*/
@@ -191,7 +193,6 @@ public class ZooKeeperWrapper implements
HConstants.ZOOKEEPER_CONFIG_NAME);
}
sessionTimeout = conf.getInt("zookeeper.session.timeout", 60 * 1000);
- reconnectToZk();
parentZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
@@ -206,26 +207,47 @@ public class ZooKeeperWrapper implements
rgnsInTransitZNode = getZNode(parentZNode, regionsInTransitZNodeName);
masterElectionZNode = getZNode(parentZNode, masterAddressZNodeName);
clusterStateZNode = getZNode(parentZNode, stateZNodeName);
+
+ connectToZk();
}
- public void reconnectToZk() throws IOException {
+ public void connectToZk() throws IOException {
try {
- LOG.info("Reconnecting to zookeeper");
+ LOG.info("Connecting to zookeeper");
if(zooKeeper != null) {
zooKeeper.close();
- LOG.debug("<" + instanceName + ">" + "Closed existing zookeeper client");
+ LOG.error("<" + instanceName + ">" + " Closed existing zookeeper client");
}
zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, this);
- LOG.debug("<" + instanceName + ">" + "Connected to zookeeper again");
+ LOG.debug("<" + instanceName + ">" + " Connected to zookeeper");
+ // Ensure we are actually connected
+ ensureZkAvailable(10);
} catch (IOException e) {
- LOG.error("<" + instanceName + ">" + "Failed to create ZooKeeper object: " + e);
+ LOG.error("<" + instanceName + "> " + "Failed to create ZooKeeper object: " + e);
throw new IOException(e);
} catch (InterruptedException e) {
- LOG.error("<" + instanceName + ">" + "Error closing ZK connection: " + e);
+ LOG.error("<" + instanceName + " >" + "Error closing ZK connection: " + e);
throw new IOException(e);
}
}
+ private void ensureZkAvailable(int maxRetries)
+ throws IOException, InterruptedException {
+ while (maxRetries-- > 0) {
+ try {
+ zooKeeper.exists(parentZNode, false);
+ return;
+ } catch(KeeperException.ConnectionLossException cle) {
+ LOG.info("Received ZK ConnectionLossException, ZK not done initializing"
+ + ", retrying connect to ZK after 1 second sleep");
+ Thread.sleep(1000);
+ } catch(KeeperException ke) {
+ LOG.error("Received abnormal ZK exception, aborting", ke);
+ throw new IOException(ke);
+ }
+ }
+ }
+
public synchronized void registerListener(Watcher watcher) {
listeners.add(watcher);
}
@@ -240,11 +262,27 @@ public class ZooKeeperWrapper implements
*/
@Override
public synchronized void process(WatchedEvent event) {
+ LOG.debug("<" + instanceName + "> Received ZK WatchedEvent: " +
+ "[path=" + event.getPath() + "] " +
+ "[state=" + event.getState().toString() + "] " +
+ "[type=" + event.getType().toString() + "]");
+ if (event.getType() == EventType.None) {
+ if (event.getState() == KeeperState.Expired) {
+ LOG.error("ZooKeeper Session Expiration, aborting server");
+ abort();
+ } else if (event.getState() == KeeperState.Disconnected) {
+ LOG.warn("Disconnected from ZooKeeper");
+ } else if (event.getState() == KeeperState.SyncConnected) {
+ LOG.info("Reconnected to ZooKeeper");
+ }
+ return;
+ }
for(Watcher w : listeners) {
try {
w.process(event);
} catch (Throwable t) {
- LOG.error("<"+instanceName+">" + "ZK updates listener threw an exception in process()", t);
+ LOG.error("<"+instanceName+">" + " Sub-ZK Watcher threw an exception " +
+ "in process()", t);
}
}
}
@@ -386,26 +424,20 @@ public class ZooKeeperWrapper implements
return res.toArray(new String[res.size()]);
}
+ /**
+ * Check if the specified znode exists. Set a watch if boolean is true,
+ * whether or not the node exists.
+ * @param znode
+ * @param watch
+ * @return
+ */
public boolean exists(String znode, boolean watch) {
try {
- return zooKeeper.exists(getZNode(parentZNode, znode), watch?this:null) != null;
- } catch (KeeperException.SessionExpiredException e) {
- // if the session has expired try to reconnect to ZK, then perform query
- try {
- // TODO: ZK-REFACTOR: We should not reconnect - we should just quit and restart.
- reconnectToZk();
- return zooKeeper.exists(getZNode(parentZNode, znode), watch?this:null) != null;
- } catch (IOException e1) {
- LOG.error("Error reconnecting to zookeeper", e1);
- throw new RuntimeException("Error reconnecting to zookeeper", e1);
- } catch (KeeperException e1) {
- LOG.error("Error reading after reconnecting to zookeeper", e1);
- throw new RuntimeException("Error reading after reconnecting to zookeeper", e1);
- } catch (InterruptedException e1) {
- LOG.error("Error reading after reconnecting to zookeeper", e1);
- throw new RuntimeException("Error reading after reconnecting to zookeeper", e1);
- }
+ return zooKeeper.exists(getZNode(parentZNode, znode), watch ? this : null)
+ != null;
} catch (KeeperException e) {
+ LOG.error("Received KeeperException on exists() call, aborting", e);
+ abort();
return false;
} catch (InterruptedException e) {
return false;
@@ -455,15 +487,6 @@ public class ZooKeeperWrapper implements
}
/**
- * Read address of master server.
- * @return HServerAddress of master server.
- * @throws IOException if there's a problem reading the ZNode.
- */
- public HServerAddress readMasterAddressOrThrow() throws IOException {
- return readAddressOrThrow(masterElectionZNode, null);
- }
-
- /**
* Read master address and set a watch on it.
* @param watcher Watcher to set on master address ZNode if not null.
* @return HServerAddress of master or null if there was a problem reading the
@@ -521,23 +544,26 @@ public class ZooKeeperWrapper implements
}
/**
- * Set a watcher on the master address ZNode. The watcher will be set unless
- * an exception occurs with ZooKeeper.
+ * Set a watcher on the master address ZNode whether or not the node currently
+ * exists. The watcher will always be set unless this method throws an
+ * exception. Method will return true if node existed when watch was set,
+ * false if not.
* @param watcher Watcher to set on master address ZNode.
- * @return true if watcher was set, false otherwise.
+ * @return true if node exists when watch set, false if not
*/
- public boolean watchMasterAddress(Watcher watcher) {
+ public boolean watchMasterAddress(Watcher watcher)
+ throws KeeperException {
try {
- zooKeeper.exists(masterElectionZNode, watcher);
+ Stat s = zooKeeper.exists(masterElectionZNode, watcher);
+ LOG.debug("<" + instanceName + ">" + " Set watcher on master address ZNode " + masterElectionZNode);
+ return s != null;
} catch (KeeperException e) {
- LOG.warn("<" + instanceName + ">" + "Failed to set watcher on ZNode " + masterElectionZNode, e);
- return false;
+ LOG.warn("<" + instanceName + ">" + " Failed to set watcher on ZNode " + masterElectionZNode, e);
+ throw e;
} catch (InterruptedException e) {
- LOG.warn("<" + instanceName + ">" + "Failed to set watcher on ZNode " + masterElectionZNode, e);
+ LOG.warn("<" + instanceName + ">" + " Failed to set watcher on ZNode " + masterElectionZNode, e);
return false;
}
- LOG.debug("<" + instanceName + ">" + "Set watcher on master address ZNode " + masterElectionZNode);
- return true;
}
/**
@@ -551,20 +577,30 @@ public class ZooKeeperWrapper implements
try {
LOG.debug("<" + instanceName + ">" + "Trying to read " + znode);
return readAddressOrThrow(znode, watcher);
- } catch (IOException e) {
+ } catch (KeeperException e) {
LOG.debug("<" + instanceName + ">" + "Failed to read " + e.getMessage());
return null;
}
}
- private HServerAddress readAddressOrThrow(String znode, Watcher watcher) throws IOException {
+ /**
+ * Reads the specified address from the specified zk node, setting the
+ * specified watcher. Returns null if the node does not exist.
+ * @param znode
+ * @param watcher
+ * @return
+ * @throws KeeperException
+ */
+ public HServerAddress readAddressOrThrow(String znode, Watcher watcher)
+ throws KeeperException {
byte[] data;
try {
data = zooKeeper.getData(znode, watcher, null);
} catch (InterruptedException e) {
- throw new IOException(e);
- } catch (KeeperException e) {
- throw new IOException(e);
+ // This should not happen
+ return null;
+ } catch (KeeperException.NoNodeException e) {
+ return null;
}
String addressString = Bytes.toString(data);
@@ -1110,18 +1146,21 @@ public class ZooKeeperWrapper implements
// create the znode
zooKeeper.create(fullyQualifiedZNodeName, data, Ids.OPEN_ACL_UNSAFE, createMode);
LOG.debug("<" + instanceName + ">" + "Created ZNode " + fullyQualifiedZNodeName + " in ZooKeeper");
- // watch the znode for deletion, data change, creation of children
- if(watch) {
- watchZNode(zNodeName);
- }
- return fullyQualifiedZNodeName;
+ } catch (KeeperException.NodeExistsException nee) {
+ LOG.debug("<" + instanceName + "> " + "ZNode " + fullyQualifiedZNodeName + " already exists, still setting watch");
} catch (InterruptedException e) {
LOG.warn("<" + instanceName + ">" + "Failed to create ZNode " + fullyQualifiedZNodeName + " in ZooKeeper", e);
+ return null;
} catch (KeeperException e) {
- LOG.warn("<" + instanceName + ">" + "Failed to create ZNode " + fullyQualifiedZNodeName + " in ZooKeeper", e);
+ LOG.error("<" + instanceName + ">" + "Failed to create ZNode " + fullyQualifiedZNodeName + " in ZooKeeper", e);
+ return null;
}
+ // watch the znode for deletion, data change, creation of children
+ if(watch) {
+ watchZNode(zNodeName);
+ }
- return null;
+ return fullyQualifiedZNodeName;
}
public byte[] readZNode(String znodeName, Stat stat) throws IOException {
@@ -1281,29 +1320,11 @@ public class ZooKeeperWrapper implements
unassignedZNodesWatched.remove(znode);
deleteZNode(znode);
}
- } catch (KeeperException.SessionExpiredException e) {
- LOG.error("Zookeeper session has expired", e);
- // if the session has expired try to reconnect to ZK, then perform query
- try {
- // TODO: ZK-REFACTOR: should just quit on reconnect??
- reconnectToZk();
- synchronized(unassignedZNodesWatched) {
- unassignedZNodesWatched.remove(znode);
- deleteZNode(znode);
- }
- } catch (IOException e1) {
- LOG.error("Error reconnecting to zookeeper", e1);
- throw new RuntimeException("Error reconnecting to zookeeper", e1);
- } catch (KeeperException.SessionExpiredException e1) {
- LOG.error("Error reading after reconnecting to zookeeper", e1);
- throw new RuntimeException("Error reading after reconnecting to zookeeper", e1);
- } catch (KeeperException e1) {
- LOG.error("Error reading after reconnecting to zookeeper", e1);
- } catch (InterruptedException e1) {
- LOG.error("Error reading after reconnecting to zookeeper", e1);
- }
+ } catch (KeeperException.NoNodeException e) {
+ LOG.warn("Attempted to delete an unassigned region node but it DNE");
} catch (KeeperException e) {
LOG.error("Error deleting region " + regionName, e);
+ abort();
} catch (InterruptedException e) {
LOG.error("Error deleting region " + regionName, e);
}
@@ -1358,4 +1379,9 @@ public class ZooKeeperWrapper implements
}
}
+
+ private void abort() {
+ LOG.fatal("<" + instanceName + "> Aborting process because of fatal ZK error");
+ System.exit(1);
+ }
}