You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/05/28 07:49:27 UTC
svn commit: r779444 - in /hadoop/hbase/trunk_on_hadoop-0.18.3: ./ bin/
src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/client/
src/java/org/apache/hadoop/hbase/io/ src/java/org/apache/hadoop/hbase/ipc/
src/java/org/apache/hadoop/hbas...
Author: apurtell
Date: Thu May 28 05:49:26 2009
New Revision: 779444
URL: http://svn.apache.org/viewvc?rev=779444&view=rev
Log:
HBASE-1302, HBASE-1454
Added:
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ClusterStatus.java
Modified:
hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt
hadoop/hbase/trunk_on_hadoop-0.18.3/bin/HBase.rb
hadoop/hbase/trunk_on_hadoop-0.18.3/bin/hirb.rb
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/HServerLoad.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnection.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/HMaster.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/CHANGES.txt Thu May 28 05:49:26 2009
@@ -154,6 +154,8 @@
localhost_1237525439599_56094" <- You'd have to be perverse
to recognize that as a hostname, startcode, and port
HBASE-1395 InfoServers no longer put up a UI
+ HBASE-1302 When a new master comes up, regionservers should continue with
+ their region assignments from the last master
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
@@ -284,6 +286,7 @@
HBASE-1430 Read the logs in batches during log splitting to avoid OOME
HBASE-1017 Region balancing does not bring newly added node within
acceptable range (Evgeny Ryabitskiy via Stack)
+ HBASE-1454 HBaseAdmin.getClusterStatus
OPTIMIZATIONS
HBASE-1412 Change values for delete column and column family in KeyValue
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/bin/HBase.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/bin/HBase.rb?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/bin/HBase.rb (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/bin/HBase.rb Thu May 28 05:49:26 2009
@@ -253,6 +253,31 @@
@admin.shutdown()
end
+ def status(format)
+ status = @admin.getClusterStatus()
+ if format != nil and format == "detailed"
+ puts("%d live servers" % [ status.getServers() ])
+ for server in status.getServerInfo()
+ puts(" %s:%d %d" % \
+ [ server.getServerAddress().getHostname(), \
+ server.getServerAddress().getPort(), server.getStartCode() ])
+ puts(" %s" % [ server.getLoad().toString() ])
+ for region in server.getLoad().getRegionsLoad()
+ puts(" %s" % [ region.getNameAsString() ])
+ puts(" %s" % [ region.toString() ])
+ end
+ end
+ puts("%d dead servers" % [ status.getDeadServers() ])
+ for server in status.getDeadServerNames()
+ puts(" %s" % [ server ])
+ end
+ else
+ puts("%d servers, %d dead, %.4f average load" % \
+ [ status.getServers(), status.getDeadServers(), \
+ status.getAverageLoad()])
+ end
+ end
+
def hcd(arg)
# Return a new HColumnDescriptor made of passed args
# TODO: This is brittle code.
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/bin/hirb.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/bin/hirb.rb?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/bin/hirb.rb (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/bin/hirb.rb Thu May 28 05:49:26 2009
@@ -237,6 +237,13 @@
hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, \\
STARTROW => 'xyz'}
+ status Show cluster status. Can be 'simple' or 'detailed'. The default is
+ 'simple'. Examples:
+
+ hbase> status
+ hbase> status 'simple'
+ hbase> status 'detailed'
+
shutdown Shut down the cluster.
truncate Disables, drops and recreates the specified table.
@@ -334,7 +341,11 @@
def close_region(regionName, server = nil)
admin().close_region(regionName, server)
end
-
+
+def status(format = 'simple')
+ admin().status(format)
+end
+
# CRUD
def get(table, row, args = {})
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ClusterStatus.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ClusterStatus.java?rev=779444&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ClusterStatus.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ClusterStatus.java Thu May 28 05:49:26 2009
@@ -0,0 +1,207 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.apache.hadoop.io.VersionedWritable;
+
+/**
+ * Status information on the HBase cluster.
+ * <p>
+ * <tt>ClusterStatus</tt> provides clients with information such as:
+ * <ul>
+ * <li>The count and names of region servers in the cluster.</li>
+ * <li>The count and names of dead region servers in the cluster.</li>
+ * <li>The average cluster load.</li>
+ * <li>The number of regions deployed on the cluster.</li>
+ * <li>The number of requests since last report.</li>
+ * <li>Detailed region server loading and resource usage information,
+ * per server and per region.</li>
+ * </ul>
+ */
+public class ClusterStatus extends VersionedWritable {
+ private static final byte VERSION = 0;
+ private Collection<HServerInfo> liveServerInfo;
+ private Collection<String> deadServers;
+
+ /**
+ * Constructor, for Writable
+ */
+ public ClusterStatus() {
+ }
+
+ /**
+ * @return the names of region servers in the cluster
+ */
+ public Collection<String> getServerNames() {
+ ArrayList<String> names = new ArrayList<String>(liveServerInfo.size());
+ for (HServerInfo server: liveServerInfo) {
+ names.add(server.getName());
+ }
+ return names;
+ }
+
+ /**
+ * @return the names of region servers on the dead list
+ */
+ public Collection<String> getDeadServerNames() {
+ return Collections.unmodifiableCollection(deadServers);
+ }
+
+ /**
+ * @return the number of region servers in the cluster
+ */
+ public int getServers() {
+ return liveServerInfo.size();
+ }
+
+ /**
+ * @return the number of dead region servers in the cluster
+ */
+ public int getDeadServers() {
+ return deadServers.size();
+ }
+
+ /**
+ * @return the average cluster load
+ */
+ public double getAverageLoad() {
+ int load = 0;
+ for (HServerInfo server: liveServerInfo) {
+ load += server.getLoad().getLoad();
+ }
+ return (double)load / (double)liveServerInfo.size();
+ }
+
+ /**
+ * @return the number of regions deployed on the cluster
+ */
+ public int getRegionsCount() {
+ int count = 0;
+ for (HServerInfo server: liveServerInfo) {
+ count += server.getLoad().getNumberOfRegions();
+ }
+ return count;
+ }
+
+ /**
+ * @return the number of requests since last report
+ */
+ public int getRequestsCount() {
+ int count = 0;
+ for (HServerInfo server: liveServerInfo) {
+ count += server.getLoad().getNumberOfRequests();
+ }
+ return count;
+ }
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ClusterStatus)) {
+ return false;
+ }
+ return (getVersion() == ((ClusterStatus)o).getVersion()) &&
+ liveServerInfo.equals(((ClusterStatus)o).liveServerInfo) &&
+ deadServers.equals(((ClusterStatus)o).deadServers);
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ public int hashCode() {
+ return VERSION + liveServerInfo.hashCode() + deadServers.hashCode();
+ }
+
+ /** @return the object version number */
+ public byte getVersion() {
+ return VERSION;
+ }
+
+ //
+ // Getters
+ //
+
+ /**
+ * Returns detailed region server information: A list of
+ * {@link HServerInfo}, containing server load and resource usage
+ * statistics as {@link HServerLoad}, containing per-region
+ * statistics as {@link HServerLoad.RegionLoad}.
+ * @return region server information
+ */
+ public Collection<HServerInfo> getServerInfo() {
+ return Collections.unmodifiableCollection(liveServerInfo);
+ }
+
+ //
+ // Setters
+ //
+
+ public void setServerInfo(Collection<HServerInfo> serverInfo) {
+ this.liveServerInfo = serverInfo;
+ }
+
+ public void setDeadServers(Collection<String> deadServers) {
+ this.deadServers = deadServers;
+ }
+
+ //
+ // Writable
+ //
+
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ out.writeInt(liveServerInfo.size());
+ for (HServerInfo server: liveServerInfo) {
+ server.write(out);
+ }
+ out.writeInt(deadServers.size());
+ for (String server: deadServers) {
+ out.writeUTF(server);
+ }
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ int count = in.readInt();
+ liveServerInfo = new ArrayList<HServerInfo>(count);
+ for (int i = 0; i < count; i++) {
+ HServerInfo info = new HServerInfo();
+ info.readFields(in);
+ liveServerInfo.add(info);
+ }
+ count = in.readInt();
+ deadServers = new ArrayList<String>(count);
+ for (int i = 0; i < count; i++) {
+ deadServers.add(in.readUTF());
+ }
+ }
+}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/HServerLoad.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/HServerLoad.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/HServerLoad.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/HServerLoad.java Thu May 28 05:49:26 2009
@@ -23,7 +23,10 @@
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@@ -95,6 +98,13 @@
}
/**
+ * @return the region name as a string
+ */
+ public String getNameAsString() {
+ return Bytes.toString(name);
+ }
+
+ /**
* @return the number of stores
*/
public int getStores() {
@@ -323,6 +333,13 @@
}
/**
+ * @return region load metrics
+ */
+ public Collection<RegionLoad> getRegionsLoad() {
+ return Collections.unmodifiableCollection(regionLoad);
+ }
+
+ /**
* @return Count of storefiles on this regionserver
*/
public int getStorefiles() {
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java Thu May 28 05:49:26 2009
@@ -24,6 +24,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
@@ -791,6 +792,17 @@
}
}
+ /**
+ * @return cluster status
+ * @throws IOException
+ */
+ public ClusterStatus getClusterStatus() throws IOException {
+ if (this.master == null) {
+ throw new MasterNotRunningException("master has been shut down");
+ }
+ return this.master.getClusterStatus();
+ }
+
private HRegionLocation getFirstMetaServerForTable(final byte [] tableName)
throws IOException {
return connection.locateRegion(HConstants.META_TABLE_NAME,
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnection.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnection.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnection.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnection.java Thu May 28 05:49:26 2009
@@ -133,6 +133,17 @@
public HRegionInterface getHRegionConnection(HServerAddress regionServer)
throws IOException;
+ /**
+ * Establishes a connection to the region server at the specified address.
+ * @param regionServer - the server to connect to
+ * @param getMaster - do we check if master is alive
+ * @return proxy for HRegionServer
+ * @throws IOException
+ */
+ public HRegionInterface getHRegionConnection(
+ HServerAddress regionServer, boolean getMaster)
+ throws IOException;
+
/**
* Find region location hosting passed row
* @param tableName
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Thu May 28 05:49:26 2009
@@ -116,6 +116,7 @@
}
}
+
/* Encapsulates finding the servers for an HBase instance */
private static class TableServers implements ServerConnection, HConstants, Watcher {
private static final Log LOG = LogFactory.getLog(TableServers.class);
@@ -766,9 +767,12 @@
tableLocations.put(startKey, location);
}
- public HRegionInterface getHRegionConnection(HServerAddress regionServer)
+ public HRegionInterface getHRegionConnection(
+ HServerAddress regionServer, boolean getMaster)
throws IOException {
- getMaster();
+ if(getMaster) {
+ getMaster();
+ }
HRegionInterface server;
synchronized (this.servers) {
// See if we already have a connection
@@ -787,6 +791,12 @@
}
return server;
}
+
+ public HRegionInterface getHRegionConnection(
+ HServerAddress regionServer)
+ throws IOException {
+ return getHRegionConnection(regionServer, true);
+ }
public synchronized ZooKeeperWrapper getZooKeeperWrapper() throws IOException {
if (zooKeeperWrapper == null) {
@@ -1060,4 +1070,4 @@
}
}
}
-}
\ No newline at end of file
+}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java Thu May 28 05:49:26 2009
@@ -30,6 +30,7 @@
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HMsg;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -133,6 +134,7 @@
e.printStackTrace();
}
addToMap(BatchUpdate[].class, code++);
+ addToMap(ClusterStatus.class, code++);
}
private Class<?> declaredClass;
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java Thu May 28 05:49:26 2009
@@ -69,7 +69,9 @@
* HMasterInterface.findRootRegion. We use ZooKeeper to store root region
* location instead.</li>
* <li>Version 17: Added incrementColumnValue.</li>
+ * <li>Version 18: HBASE-1302.</li>
+ * <li>Version 19: Added getClusterStatus().</li>
* </ul>
*/
- public static final long versionID = 17L;
+ public static final long versionID = 19L;
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java Thu May 28 05:49:26 2009
@@ -21,6 +21,7 @@
import java.io.IOException;
+import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.io.Writable;
@@ -116,4 +117,9 @@
* @throws IOException
*/
public void shutdown() throws IOException;
+
+ /**
+ * Return cluster status.
+ */
+ public ClusterStatus getClusterStatus();
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java Thu May 28 05:49:26 2009
@@ -28,6 +28,7 @@
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.NotServingRegionException;
/**
@@ -306,4 +307,18 @@
*/
public long incrementColumnValue(byte [] regionName, byte [] row,
byte [] column, long amount) throws IOException;
+
+ /**
+ * Method used when a master is taking the place of another failed one.
+ * @return All regions assigned on this region server
+ * @throws IOException
+ */
+ public HRegionInfo[] getRegionsAssignment() throws IOException;
+
+ /**
+ * Method used when a master is taking the place of another failed one.
+ * @return The HSI
+ * @throws IOException
+ */
+ public HServerInfo getHServerInfo() throws IOException;
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/HMaster.java Thu May 28 05:49:26 2009
@@ -25,6 +25,7 @@
import java.lang.reflect.Constructor;
import java.net.InetAddress;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -41,11 +42,13 @@
import org.apache.hadoop.dfs.FSConstants;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HMsg;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HServerLoad;
@@ -374,6 +377,7 @@
public void run() {
final String threadName = "HMaster";
Thread.currentThread().setName(threadName);
+ verifyClusterState();
startServiceThreads();
/* Main processing loop */
try {
@@ -504,6 +508,61 @@
}
/*
+ * Verifies if this instance of HBase is fresh or the master was started
+ * following a failover. In the second case, it inspects the region server
+ * directory and gets their regions assignment.
+ */
+ private void verifyClusterState() {
+ try {
+ LOG.debug("Checking cluster state...");
+ HServerAddress rootLocation = zooKeeperWrapper.readRootRegionLocation();
+ List<HServerAddress> addresses = zooKeeperWrapper.scanRSDirectory();
+
+ // Check if this is a fresh start of the cluster
+ if(addresses.size() == 0) {
+ LOG.debug("This is a fresh start, proceeding with normal startup");
+ return;
+ }
+ LOG.info("This is a failover, ZK inspection begins...");
+ boolean isRootRegionAssigned = false;
+ Map<byte[], HRegionInfo> assignedRegions =
+ new HashMap<byte[], HRegionInfo>();
+ // This is a failover case. We must:
+ // - contact every region server to add them to the regionservers list
+ // - get their current regions assignment
+ for (HServerAddress address : addresses) {
+ HRegionInterface hri =
+ this.connection.getHRegionConnection(address, false);
+ HServerInfo info = hri.getHServerInfo();
+ LOG.debug("Inspection found server " + info.getName());
+ serverManager.recordNewServer(info);
+ HRegionInfo[] regions = hri.getRegionsAssignment();
+ for (HRegionInfo region : regions) {
+ if(region.isRootRegion()) {
+ connection.setRootRegionLocation(
+ new HRegionLocation(region, rootLocation));
+ regionManager.setRootRegionLocation(rootLocation);
+ // Undo the unassign work in the RegionManager constructor
+ regionManager.removeRegion(region);
+ isRootRegionAssigned = true;
+ }
+ else if(region.isMetaRegion()) {
+ MetaRegion m =
+ new MetaRegion(new HServerAddress(address),
+ region.getRegionName(), region.getStartKey());
+ regionManager.addMetaRegionToScan(m);
+ }
+ assignedRegions.put(region.getRegionName(), region);
+ }
+ }
+ LOG.info("Inspection found " + assignedRegions.size() + " regions, " +
+ (isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA"));
+ } catch(IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ /*
* Start up all services. If any of these threads gets an unhandled exception
* then they just die with a logged message. This should be fine because
* in general, we do not expect the master to get such unhandled exceptions
@@ -912,6 +971,16 @@
}
/**
+ * @return cluster status
+ */
+ public ClusterStatus getClusterStatus() {
+ ClusterStatus status = new ClusterStatus();
+ status.setServerInfo(serverManager.serversToServerInfo.values());
+ status.setDeadServers(serverManager.deadServers);
+ return status;
+ }
+
+ /**
* @return Server metrics
*/
public MasterMetrics getMetrics() {
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/RegionManager.java Thu May 28 05:49:26 2009
@@ -554,6 +554,7 @@
} catch(Exception iex) {
LOG.warn("meta scanner", iex);
}
+ zooKeeperWrapper.clearRSDirectory();
zooKeeperWrapper.close();
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/master/ServerManager.java Thu May 28 05:49:26 2009
@@ -161,8 +161,6 @@
LOG.debug("deadServers.contains: " + deadServers.contains(serverName));
throw new Leases.LeaseStillHeldException(serverName);
}
- Watcher watcher = new ServerExpirer(serverName, info.getServerAddress());
- zooKeeperWrapper.updateRSLocationGetWatch(info, watcher);
LOG.info("Received start message from: " + serverName);
// Go on to process the regionserver registration.
@@ -198,9 +196,21 @@
LOG.error("Insertion into toDoQueue was interrupted", e);
}
}
- // record new server
- load = new HServerLoad();
+ recordNewServer(info);
+ }
+
+ /**
+ * Adds the HSI to the RS list
+ * @param info The region server informations
+ */
+ public void recordNewServer(HServerInfo info) {
+ HServerLoad load = new HServerLoad();
+ String serverName = HServerInfo.getServerName(info);
info.setLoad(load);
+ // We must set this watcher here because it can be set on a fresh start
+ // or on a failover
+ Watcher watcher = new ServerExpirer(serverName, info.getServerAddress());
+ zooKeeperWrapper.updateRSLocationGetWatch(info, watcher);
serversToServerInfo.put(serverName, info);
serverAddressToServerInfo.put(info.getServerAddress(), info);
serversToLoad.put(serverName, load);
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu May 28 05:49:26 2009
@@ -323,15 +323,6 @@
private void reinitializeZooKeeper() throws IOException {
zooKeeperWrapper = new ZooKeeperWrapper(conf);
watchMasterAddress();
-
- boolean startCodeOk = false;
- while(!startCodeOk) {
- serverInfo.setStartCode(System.currentTimeMillis());
- startCodeOk = zooKeeperWrapper.writeRSLocation(serverInfo);
- if(!startCodeOk) {
- LOG.debug("Start code already taken, trying another one");
- }
- }
}
private void reinitializeThreads() {
@@ -384,6 +375,8 @@
if (state == KeeperState.Expired) {
LOG.error("ZooKeeper session expired");
restart();
+ } else if (type == EventType.NodeDeleted) {
+ watchMasterAddress();
} else if (type == EventType.NodeCreated) {
getMaster();
@@ -1298,7 +1291,8 @@
// should retry indefinitely.
master = (HMasterRegionInterface)HBaseRPC.waitForProxy(
HMasterRegionInterface.class, HBaseRPCProtocolVersion.versionID,
- masterAddress.getInetSocketAddress(), this.conf, -1);
+ masterAddress.getInetSocketAddress(),
+ this.conf, -1);
} catch (IOException e) {
LOG.warn("Unable to connect to master. Retrying. Error was:", e);
sleeper.sleep();
@@ -1329,6 +1323,14 @@
if (LOG.isDebugEnabled())
LOG.debug("sending initial server load: " + hsl);
lastMsg = System.currentTimeMillis();
+ boolean startCodeOk = false;
+ while(!startCodeOk) {
+ serverInfo.setStartCode(System.currentTimeMillis());
+ startCodeOk = zooKeeperWrapper.writeRSLocation(serverInfo);
+ if(!startCodeOk) {
+ LOG.debug("Start code already taken, trying another one");
+ }
+ }
result = this.hbaseMaster.regionServerStartup(serverInfo);
break;
} catch (Leases.LeaseStillHeldException e) {
@@ -2450,7 +2452,20 @@
checkFileSystem();
throw e;
}
-
-
+ }
+
+ /** {@inheritDoc} */
+ public HRegionInfo[] getRegionsAssignment() throws IOException {
+ HRegionInfo[] regions = new HRegionInfo[onlineRegions.size()];
+ Iterator<HRegion> ite = onlineRegions.values().iterator();
+ for(int i = 0; ite.hasNext(); i++) {
+ regions[i] = ite.next().getRegionInfo();
+ }
+ return regions;
+ }
+
+ /** {@inheritDoc} */
+ public HServerInfo getHServerInfo() throws IOException {
+ return serverInfo;
}
}
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=779444&r1=779443&r2=779444&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Thu May 28 05:49:26 2009
@@ -462,12 +462,12 @@
*/
public boolean writeRSLocation(HServerInfo info) {
ensureExists(rsZNode);
- byte[] data = Bytes.toBytes(info.getServerAddress().getBindAddress());
+ byte[] data = Bytes.toBytes(info.getServerAddress().toString());
String znode = joinPath(rsZNode, Long.toString(info.getStartCode()));
try {
zooKeeper.create(znode, data, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
LOG.debug("Created ZNode " + znode
- + " with data " + info.getServerAddress().getBindAddress());
+ + " with data " + info.getServerAddress().toString());
return true;
} catch (KeeperException e) {
LOG.warn("Failed to create " + znode + " znode in ZooKeeper: " + e);
@@ -484,12 +484,12 @@
* @return true if the update is done, false if it failed
*/
public boolean updateRSLocationGetWatch(HServerInfo info, Watcher watcher) {
- byte[] data = Bytes.toBytes(info.getServerAddress().getBindAddress());
- String znode = rsZNode + "/" + info.getStartCode();
+ byte[] data = Bytes.toBytes(info.getServerAddress().toString());
+ String znode = rsZNode + ZNODE_PATH_SEPARATOR + info.getStartCode();
try {
zooKeeper.setData(znode, data, -1);
LOG.debug("Updated ZNode " + znode
- + " with data " + info.getServerAddress().getBindAddress());
+ + " with data " + info.getServerAddress().toString());
zooKeeper.getData(znode, watcher, null);
return true;
} catch (KeeperException e) {
@@ -501,6 +501,43 @@
return false;
}
+ /**
+ * Scans the regions servers directory
+ * @return A list of server addresses
+ */
+ public List<HServerAddress> scanRSDirectory() {
+ List<HServerAddress> addresses = new ArrayList<HServerAddress>();
+ try {
+ List<String> nodes = zooKeeper.getChildren(rsZNode, false);
+ for (String node : nodes) {
+ addresses.add(readAddress(rsZNode + ZNODE_PATH_SEPARATOR + node, null));
+ }
+ } catch (KeeperException e) {
+ LOG.warn("Failed to read " + rsZNode + " znode in ZooKeeper: " + e);
+ } catch (InterruptedException e) {
+ LOG.warn("Failed to read " + rsZNode + " znode in ZooKeeper: " + e);
+ }
+ return addresses;
+ }
+
+ /**
+ * Method used to make sure the region server directory is empty.
+ *
+ */
+ public void clearRSDirectory() {
+ try {
+ List<String> nodes = zooKeeper.getChildren(rsZNode, false);
+ for (String node : nodes) {
+ LOG.debug("Deleting node: " + node);
+ zooKeeper.delete(node, -1);
+ }
+ } catch (KeeperException e) {
+ LOG.warn("Failed to delete " + rsZNode + " znode in ZooKeeper: " + e);
+ } catch (InterruptedException e) {
+ LOG.warn("Failed to delete " + rsZNode + " znode in ZooKeeper: " + e);
+ }
+ }
+
private boolean checkExistenceOf(String path) {
Stat stat = null;
try {