You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by dd...@apache.org on 2014/08/05 03:00:36 UTC

git commit: HBASE-10674. HBCK should be updated to do replica related checks

Repository: hbase
Updated Branches:
  refs/heads/master af4e0a78d -> 856275214


HBASE-10674. HBCK should be updated to do replica related checks


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/85627521
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/85627521
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/85627521

Branch: refs/heads/master
Commit: 8562752143f57744ac522ace1a2b196c45b428d4
Parents: af4e0a7
Author: Devaraj Das <dd...@apache.org>
Authored: Mon Aug 4 18:00:29 2014 -0700
Committer: Devaraj Das <dd...@apache.org>
Committed: Mon Aug 4 18:00:29 2014 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/HRegionInfo.java    |   2 +-
 .../org/apache/hadoop/hbase/util/HBaseFsck.java | 211 +++++++++++++++----
 .../hadoop/hbase/util/HBaseFsckRepair.java      |  24 ++-
 .../apache/hadoop/hbase/util/TestHBaseFsck.java | 187 +++++++++++++++-
 4 files changed, 367 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/85627521/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java
index 557f22e..5518f3a 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java
@@ -145,7 +145,7 @@ public class HRegionInfo implements Comparable<HRegionInfo> {
   public static final byte REPLICA_ID_DELIMITER = (byte)'_';
 
   private static final int MAX_REPLICA_ID = 0xFFFF;
-  static final int DEFAULT_REPLICA_ID = 0;
+  public static final int DEFAULT_REPLICA_ID = 0;
   /**
    * Does region name contain its encoded name?
    * @param regionName region name

http://git-wip-us.apache.org/repos/asf/hbase/blob/85627521/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index 3450516..e5365da 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.RegionLocations;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
@@ -1560,9 +1561,23 @@ public class HBaseFsck extends Configured {
    */
   private void checkAndFixConsistency()
   throws IOException, KeeperException, InterruptedException {
+	  // Divide the checks in two phases. One for default/primary replicas and another
+	  // for the non-primary ones. Keeps code cleaner this way.
     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
-      checkRegionConsistency(e.getKey(), e.getValue());
+      if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+        checkRegionConsistency(e.getKey(), e.getValue());
+      }
+    }
+    boolean prevHdfsCheck = shouldCheckHdfs();
+    setCheckHdfs(false); //replicas don't have any hdfs data
+    // Run a pass over the replicas and fix any assignment issues that exist on the currently
+    // deployed/undeployed replicas.
+    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
+      if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
+        checkRegionConsistency(e.getKey(), e.getValue());
+      }
     }
+    setCheckHdfs(prevHdfsCheck);
   }
 
   private void preCheckPermission() throws IOException, AccessControlException {
@@ -1664,6 +1679,27 @@ public class HBaseFsck extends Configured {
   }
 
   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
+    undeployRegionsForHbi(hi);
+    // undeploy replicas of the region (but only if the method is invoked for the primary)
+    if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
+      return;
+    }
+    int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
+    for (int i = 1; i < numReplicas; i++) {
+      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
+      HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
+          hi.getPrimaryHRIForDeployedReplica(), i);
+      HbckInfo h = regionInfoMap.get(hri.getEncodedName());
+      if (h != null) {
+        undeployRegionsForHbi(h);
+        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
+        //in consistency checks
+        h.setSkipChecks(true);
+      }
+    }
+  }
+
+  private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
     for (OnlineEntry rse : hi.deployedEntries) {
       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
       try {
@@ -1699,27 +1735,41 @@ public class HBaseFsck extends Configured {
     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
+    // also get the locations of the replicas to close if the primary region is being closed
+    if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+      int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
+      for (int i = 0; i < numReplicas; i++) {
+        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
+        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
+      }
+    }
     Result r = meta.get(get);
-    ServerName serverName = HRegionInfo.getServerName(r);
-    if (serverName == null) {
-      errors.reportError("Unable to close region "
-          + hi.getRegionNameAsString() +  " because meta does not "
-          + "have handle to reach it.");
+    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
+    if (rl == null) {
+      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
+          " since meta does not have handle to reach it");
       return;
     }
-
-    HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
-    if (hri == null) {
-      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
-          + " because hbase:meta had invalid or missing "
-          + HConstants.CATALOG_FAMILY_STR + ":"
-          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
-          + " qualifier value.");
-      return;
+    for (HRegionLocation h : rl.getRegionLocations()) {
+      ServerName serverName = h.getServerName();
+      if (serverName == null) {
+        errors.reportError("Unable to close region "
+            + hi.getRegionNameAsString() +  " because meta does not "
+            + "have handle to reach it.");
+        continue;
+      }
+      HRegionInfo hri = h.getRegionInfo();
+      if (hri == null) {
+        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
+            + " because hbase:meta had invalid or missing "
+            + HConstants.CATALOG_FAMILY_STR + ":"
+            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
+            + " qualifier value.");
+        continue;
+      }
+      // close the region -- close files and remove assignment
+      HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
     }
-
-    // close the region -- close files and remove assignment
-    HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
   }
 
   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
@@ -1735,6 +1785,23 @@ public class HBaseFsck extends Configured {
       }
       HBaseFsckRepair.fixUnassigned(admin, hri);
       HBaseFsckRepair.waitUntilAssigned(admin, hri);
+
+      // also assign replicas if needed (do it only when this call operates on a primary replica)
+      if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
+      int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
+      for (int i = 1; i < replicationCount; i++) {
+        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
+        HbckInfo h = regionInfoMap.get(hri.getEncodedName());
+        if (h != null) {
+          undeployRegions(h);
+          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
+          //in consistency checks
+          h.setSkipChecks(true);
+        }
+        HBaseFsckRepair.fixUnassigned(admin, hri);
+        HBaseFsckRepair.waitUntilAssigned(admin, hri);
+      }
+
     }
   }
 
@@ -1743,8 +1810,9 @@ public class HBaseFsck extends Configured {
    */
   private void checkRegionConsistency(final String key, final HbckInfo hbi)
   throws IOException, KeeperException, InterruptedException {
-    String descriptiveName = hbi.toString();
 
+	if (hbi.isSkipChecks()) return;
+	String descriptiveName = hbi.toString();
     boolean inMeta = hbi.metaEntry != null;
     // In case not checking HDFS, assume the region is on HDFS
     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
@@ -1764,7 +1832,6 @@ public class HBaseFsck extends Configured {
     if (hbi.containsOnlyHdfsEdits()) {
       return;
     }
-    if (hbi.isSkipChecks()) return;
     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
       return;
     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
@@ -1809,7 +1876,9 @@ public class HBaseFsck extends Configured {
         }
 
         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
-        HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
+        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
+        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
+            admin.getClusterStatus().getServers(), numReplicas);
 
         tryAssignmentRepair(hbi, "Trying to reassign region...");
       }
@@ -1818,15 +1887,25 @@ public class HBaseFsck extends Configured {
       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
       debugLsr(hbi.getHdfsRegionDir());
-      if (shouldFixMeta()) {
+      if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
+        // for replicas, this means that we should undeploy the region (we would have
+        // gone over the primaries and fixed meta holes in first phase under
+        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
+        // this stage unless unwanted replica)
+        if (shouldFixAssignments()) {
+          undeployRegionsForHbi(hbi);
+        }
+      }
+      if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
         if (!hbi.isHdfsRegioninfoPresent()) {
           LOG.error("This should have been repaired in table integrity repair phase");
           return;
         }
 
         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
-        HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
-
+        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
+        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
+            admin.getClusterStatus().getServers(), numReplicas);
         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
       }
 
@@ -2146,7 +2225,8 @@ public class HBaseFsck extends Configured {
     public void addRegionInfo(HbckInfo hir) {
       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
         // end key is absolute end key, just add it.
-        sc.add(hir);
+        // ignore replicas other than primary for these checks
+        if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
         return;
       }
 
@@ -2163,7 +2243,8 @@ public class HBaseFsck extends Configured {
       }
 
       // main case, add to split calculator
-      sc.add(hir);
+      // ignore replicas other than primary for these checks
+      if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
     }
 
     public void addServer(ServerName server) {
@@ -2538,8 +2619,10 @@ public class HBaseFsck extends Configured {
           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
           //  this dumb and n^2 but this shouldn't happen often
           for (HbckInfo r1 : ranges) {
+            if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
             subRange.remove(r1);
             for (HbckInfo r2 : subRange) {
+              if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
                 handler.handleDuplicateStartKeys(r1,r2);
               } else {
@@ -2843,33 +2926,49 @@ public class HBaseFsck extends Configured {
 
           // record the latest modification of this META record
           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
-          Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
-          if (pair == null || pair.getFirst() == null) {
+          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
+          if (rl == null) {
             emptyRegionInfoQualifiers.add(result);
             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
             return true;
           }
           ServerName sn = null;
-          if (pair.getSecond() != null) {
-            sn = pair.getSecond();
+          if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
+              rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
+            emptyRegionInfoQualifiers.add(result);
+            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
+              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
+            return true;
           }
-          HRegionInfo hri = pair.getFirst();
+          HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
           if (!(isTableIncluded(hri.getTable())
               || hri.isMetaRegion())) {
             return true;
           }
           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
-          MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
-          HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
-          if (previous == null) {
-            regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
-          } else if (previous.metaEntry == null) {
-            previous.metaEntry = m;
-          } else {
-            throw new IOException("Two entries in hbase:meta are same " + previous);
+          for (HRegionLocation h : rl.getRegionLocations()) {
+            if (h == null || h.getRegionInfo() == null) {
+              continue;
+            }
+            sn = h.getServerName();
+            hri = h.getRegionInfo();
+
+            MetaEntry m = null;
+            if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+              m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
+            } else {
+              m = new MetaEntry(hri, sn, ts, null, null);
+            }
+            HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
+            if (previous == null) {
+              regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
+            } else if (previous.metaEntry == null) {
+              previous.metaEntry = m;
+            } else {
+              throw new IOException("Two entries in hbase:meta are same " + previous);
+            }
           }
-
           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
           for (HRegionInfo mergeRegion : new HRegionInfo[] {
               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
@@ -2987,17 +3086,28 @@ public class HBaseFsck extends Configured {
     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
     private boolean skipChecks = false; // whether to skip further checks to this region info.
     private boolean isMerged = false;// whether this region has already been merged into another one
+    private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
+    private HRegionInfo primaryHRIForDeployedReplica = null;
 
     HbckInfo(MetaEntry metaEntry) {
       this.metaEntry = metaEntry;
     }
 
+    public int getReplicaId() {
+      if (metaEntry != null) return metaEntry.getReplicaId();
+      return deployedReplicaId;
+    }
+
     public synchronized void addServer(HRegionInfo hri, ServerName server) {
       OnlineEntry rse = new OnlineEntry() ;
       rse.hri = hri;
       rse.hsa = server;
       this.deployedEntries.add(rse);
       this.deployedOn.add(server);
+      // save the replicaId that we see deployed in the cluster
+      this.deployedReplicaId = hri.getReplicaId();
+      this.primaryHRIForDeployedReplica =
+          RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
     }
 
     @Override
@@ -3007,6 +3117,7 @@ public class HBaseFsck extends Configured {
       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
       sb.append( ", hdfs => " + getHdfsRegionDir());
       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
+      sb.append( ", replicaId => " + getReplicaId());
       sb.append(" }");
       return sb.toString();
     }
@@ -3044,8 +3155,10 @@ public class HBaseFsck extends Configured {
         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
         return FSUtils.getTableName(tableDir);
       } else {
-        // Currently no code exercises this path, but we could add one for
-        // getting table name from OnlineEntry
+        // return the info from the first online/deployed hri
+        for (OnlineEntry e : deployedEntries) {
+          return e.hri.getTable();
+        }
         return null;
       }
     }
@@ -3057,6 +3170,11 @@ public class HBaseFsck extends Configured {
         if (hdfsEntry.hri != null) {
           return hdfsEntry.hri.getRegionNameAsString();
         }
+      } else {
+        // return the info from the first online/deployed hri
+        for (OnlineEntry e : deployedEntries) {
+          return e.hri.getRegionNameAsString();
+        }
       }
       return null;
     }
@@ -3067,10 +3185,18 @@ public class HBaseFsck extends Configured {
       } else if (hdfsEntry != null) {
         return hdfsEntry.hri.getRegionName();
       } else {
+        // return the info from the first online/deployed hri
+        for (OnlineEntry e : deployedEntries) {
+          return e.hri.getRegionName();
+        }
         return null;
       }
     }
 
+    public HRegionInfo getPrimaryHRIForDeployedReplica() {
+      return primaryHRIForDeployedReplica;
+    }
+
     Path getHdfsRegionDir() {
       if (hdfsEntry == null) {
         return null;
@@ -3399,7 +3525,6 @@ public class HBaseFsck extends Configured {
         // check to see if the existence of this region matches the region in META
         for (HRegionInfo r:regions) {
           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
-          if (!RegionReplicaUtil.isDefaultReplica(r)) hbi.setSkipChecks(true);
           hbi.addServer(r, rsinfo);
         }
       } catch (IOException e) {          // unable to connect to the region server.

http://git-wip-us.apache.org/repos/asf/hbase/blob/85627521/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
index 641fd36..84ffec8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
@@ -19,8 +19,10 @@
 package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -37,6 +39,7 @@ import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
@@ -171,12 +174,25 @@ public class HBaseFsckRepair {
   }
 
   /**
-   * Puts the specified HRegionInfo into META.
+   * Puts the specified HRegionInfo into META with replica related columns
    */
-  public static void fixMetaHoleOnline(Configuration conf,
-      HRegionInfo hri) throws IOException {
+  public static void fixMetaHoleOnlineAndAddReplicas(Configuration conf,
+      HRegionInfo hri, Collection<ServerName> servers, int numReplicas) throws IOException {
     HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
-    MetaTableAccessor.addRegionToMeta(meta, hri);
+    Put put = MetaTableAccessor.makePutFromRegionInfo(hri);
+    if (numReplicas > 1) {
+      Random r = new Random();
+      ServerName[] serversArr = servers.toArray(new ServerName[servers.size()]);
+      for (int i = 1; i < numReplicas; i++) {
+        ServerName sn = serversArr[r.nextInt(serversArr.length)];
+        // the column added here is just to make sure the master is able to
+        // see the additional replicas when it is asked to assign. The
+        // final value of these columns will be different and will be updated
+        // by the actual regionservers that start hosting the respective replicas
+        MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), i);
+      }
+    }
+    meta.put(put);
     meta.close();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/85627521/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index fc4c3d0..e0ee8fc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -30,12 +30,17 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 import java.util.Map.Entry;
+import java.util.NavigableMap;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
@@ -59,6 +64,7 @@ import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.LargeTests;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.RegionLocations;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.MetaTableAccessor;
@@ -71,13 +77,17 @@ import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.MetaScanner;
+import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.RowMutations;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.hfile.TestHFile;
 import org.apache.hadoop.hbase.master.AssignmentManager;
 import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.master.TableLockManager;
 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
@@ -283,7 +293,7 @@ public class TestHBaseFsck {
   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
       boolean hdfs) throws IOException, InterruptedException {
-    deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
+    deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false, HRegionInfo.DEFAULT_REPLICA_ID);
   }
 
   /**
@@ -292,10 +302,12 @@ public class TestHBaseFsck {
    * @param metaRow  if true remove region's row from META
    * @param hdfs if true remove region's dir in HDFS
    * @param regionInfoOnly if true remove a region dir's .regioninfo file
+   * @param replicaId replica id
    */
   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
-      boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
+      boolean hdfs, boolean regionInfoOnly, int replicaId)
+          throws IOException, InterruptedException {
     LOG.info("** Before delete:");
     dumpMeta(htd.getTableName());
 
@@ -304,7 +316,8 @@ public class TestHBaseFsck {
       HRegionInfo hri = e.getKey();
       ServerName hsa = e.getValue();
       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
-          && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
+          && Bytes.compareTo(hri.getEndKey(), endKey) == 0
+          && hri.getReplicaId() == replicaId) {
 
         LOG.info("RegionName: " +hri.getRegionNameAsString());
         byte[] deleteRow = hri.getRegionName();
@@ -573,16 +586,99 @@ public class TestHBaseFsck {
   @Test
   public void testHbckWithRegionReplica() throws Exception {
     TableName table =
-        TableName.valueOf("tableWithReplica");
+        TableName.valueOf("testHbckWithRegionReplica");
     try {
       setupTableWithRegionReplica(table, 2);
+      TEST_UTIL.getHBaseAdmin().flush(table.getName());
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      deleteTable(table);
+    }
+  }
+
+  @Test
+  public void testHbckWithFewerReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithFewerReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      TEST_UTIL.getHBaseAdmin().flush(table.getName());
       assertNoErrors(doFsck(conf, false));
       assertEquals(ROWKEYS.length, countRows());
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), true, false, false, false, 1); // unassign one replica
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_DEPLOYED});
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[]{});
     } finally {
       deleteTable(table);
     }
   }
 
+  @Test
+  public void testHbckWithExcessReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithExcessReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      TEST_UTIL.getHBaseAdmin().flush(table.getName());
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+      // the next few lines inject a location in meta for a replica, and then
+      // asks the master to assign the replica (the meta needs to be injected
+      // for the master to treat the request for assignment as valid; the master
+      // checks the region is valid either from its memory or meta)
+      HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
+      List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getTableRegions(table);
+      byte[] startKey = Bytes.toBytes("B");
+      byte[] endKey = Bytes.toBytes("C");
+      byte[] metaKey = null;
+      HRegionInfo newHri = null;
+      for (HRegionInfo h : regions) {
+        if (Bytes.compareTo(h.getStartKey(), startKey) == 0  &&
+            Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
+            h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+          metaKey = h.getRegionName();
+          //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+          newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
+          break;
+        }
+      }
+      Put put = new Put(metaKey);
+      ServerName sn = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers()
+          .toArray(new ServerName[0])[0];
+      //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+      MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), 2);
+      meta.put(put);
+      meta.flushCommits();
+      // assign the new replica
+      HBaseFsckRepair.fixUnassigned(TEST_UTIL.getHBaseAdmin(), newHri);
+      HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getHBaseAdmin(), newHri);
+      // now reset the meta row to its original value
+      Delete delete = new Delete(metaKey);
+      delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
+      delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
+      delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
+      meta.delete(delete);
+      meta.flushCommits();
+      meta.close();
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_IN_META});
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[]{});
+    } finally {
+      deleteTable(table);
+    }
+  }
   /**
    * Get region info from local cluster.
    */
@@ -860,7 +956,7 @@ public class TestHBaseFsck {
       // Mess it up by creating an overlap in the metadata
       TEST_UTIL.getHBaseAdmin().disableTable(table);
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-          Bytes.toBytes("B"), true, true, false, true);
+          Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
       TEST_UTIL.getHBaseAdmin().enableTable(table);
 
       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
@@ -981,7 +1077,7 @@ public class TestHBaseFsck {
       // Mess it up by leaving a hole in the meta data
       TEST_UTIL.getHBaseAdmin().disableTable(table);
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
-          Bytes.toBytes("C"), true, true, false, true);
+          Bytes.toBytes("C"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
       TEST_UTIL.getHBaseAdmin().enableTable(table);
 
       HBaseFsck hbck = doFsck(conf, false);
@@ -1110,6 +1206,79 @@ public class TestHBaseFsck {
   }
 
   /**
+   * This creates and fixes a bad table with a region that is in meta but has
+   * no deployment or data hdfs. The table has region_replication set to 2.
+   */
+  @Test
+  public void testNotInHdfsWithReplicas() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInHdfs");
+    HBaseAdmin admin = new HBaseAdmin(conf);
+    try {
+      HRegionInfo[] oldHris = new HRegionInfo[2];
+      setupTableWithRegionReplica(table, 2);
+      assertEquals(ROWKEYS.length, countRows());
+      NavigableMap<HRegionInfo, ServerName> map = MetaScanner.allTableRegions(conf, null,
+          tbl.getName(), false);
+      int i = 0;
+      // store the HRIs of the regions we will mess up
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
+          oldHris[i++] = m.getKey();
+        }
+      }
+      // make sure data in regions
+      TEST_UTIL.getHBaseAdmin().flush(table.getName());
+
+      // Mess it up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), false, false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf,false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+
+      // the following code checks whether the old primary/secondary has
+      // been unassigned and the new primary/secondary has been assigned
+      i = 0;
+      HRegionInfo[] newHris = new HRegionInfo[2];
+      // get all table's regions from meta
+      map = MetaScanner.allTableRegions(conf, null, tbl.getName(), false);
+      // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          newHris[i++] = m.getKey();
+        }
+      }
+      // get all the online regions in the regionservers
+      Collection<ServerName> servers = admin.getClusterStatus().getServers();
+      Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
+      for (ServerName s : servers) {
+        List<HRegionInfo> list = admin.getOnlineRegions(s);
+        onlineRegions.addAll(list);
+      }
+      // the new HRIs must be a subset of the online regions
+      assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
+      // the old HRIs must not be part of the set (removeAll would return false if
+      // the set didn't change)
+      assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
+    } finally {
+      deleteTable(table);
+      admin.close();
+    }
+  }
+
+
+  /**
    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
    * remove the table.
    */
@@ -1586,7 +1755,7 @@ public class TestHBaseFsck {
 
       // Mess it up by closing a region
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-        Bytes.toBytes("B"), true, false, false, false);
+        Bytes.toBytes("B"), true, false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
 
       // verify there is no other errors
       HBaseFsck hbck = doFsck(conf, false);
@@ -1636,7 +1805,7 @@ public class TestHBaseFsck {
 
       // Mess it up by deleting a region from the metadata
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-        Bytes.toBytes("B"), false, true, false, false);
+        Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
 
       // verify there is no other errors
       HBaseFsck hbck = doFsck(conf, false);
@@ -1691,7 +1860,7 @@ public class TestHBaseFsck {
       // Mess it up by creating an overlap in the metadata
       TEST_UTIL.getHBaseAdmin().disableTable(table);
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-        Bytes.toBytes("B"), true, true, false, true);
+        Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
       TEST_UTIL.getHBaseAdmin().enableTable(table);
 
       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),