You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2019/10/30 20:11:20 UTC

[hbase] branch branch-2 updated: HBASE-23192 CatalogJanitor consistencyCheck does not log problematic row on exception (#734)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 77de8e2  HBASE-23192 CatalogJanitor consistencyCheck does not log problematic row on exception (#734)
77de8e2 is described below

commit 77de8e21e48c41dfe39409f78e0083bd3d324e7a
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Wed Oct 30 13:10:19 2019 -0700

    HBASE-23192 CatalogJanitor consistencyCheck does not log problematic row on exception (#734)
    
    Adds logging of row and complaint if consistency check fails during CJ
    checking. Adds a few more null checks. Does edit on the 'HBCK Report'
    top line.
    
    Signed-off-by: Reid Chan <re...@apache.org>
---
 .../org/apache/hadoop/hbase/master/CatalogJanitor.java   | 13 ++++++++++++-
 .../src/main/resources/hbase-webapps/master/hbck.jsp     |  2 +-
 .../hadoop/hbase/master/TestCatalogJanitorCluster.java   | 16 +++++++++++++++-
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index a9becdf..e14ca8b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -571,7 +571,12 @@ public class CatalogJanitor extends ScheduledChore {
         return true;
       }
       this.report.count++;
-      RegionInfo regionInfo = metaTableConsistencyCheck(r);
+      RegionInfo regionInfo = null;
+      try {
+        regionInfo = metaTableConsistencyCheck(r);
+      } catch(Throwable t) {
+        LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
+      }
       if (regionInfo != null) {
         LOG.trace(regionInfo.toString());
         if (regionInfo.isSplitParent()) { // splitParent means split and offline.
@@ -695,8 +700,14 @@ public class CatalogJanitor extends ScheduledChore {
       if (locations == null) {
         return;
       }
+      if (locations.getRegionLocations() == null) {
+        return;
+      }
       // Check referenced servers are known/online.
       for (HRegionLocation location: locations.getRegionLocations()) {
+        if (location == null) {
+          continue;
+        }
         ServerName sn = location.getServerName();
         if (sn == null) {
           continue;
diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
index 7b4e164..27cb121 100644
--- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
+++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
@@ -78,7 +78,7 @@
 
   <div class="row">
     <div class="page-header">
-  <p><span>This page displays two reports. Only the report titles show if reports are empty.</span></p>
+  <p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are transitory as regions migrate.</span></p>
     </div>
   </div>
   <div class="row">
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
index 2bbdaea..5d5e81d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
@@ -43,9 +43,12 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @Category({MasterTests.class, LargeTests.class})
 public class TestCatalogJanitorCluster {
+  private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class);
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class);
@@ -130,7 +133,18 @@ public class TestCatalogJanitorCluster {
     report = janitor.getLastReport();
     assertFalse(report.isEmpty());
     assertEquals(1, report.getUnknownServers().size());
-    // Finally, make an empty regioninfo in t1.
+    // Test what happens if we blow away an info:server row, if it is null. Should not kill CJ
+    // and we should log the row that had the problem. HBASE-23192. Just make sure we don't
+    // break if this happens.
+    LOG.info("Make null info:server");
+    Put emptyInfoServerPut = new Put(t1Ri1.getRegionName());
+    emptyInfoServerPut.addColumn(MetaTableAccessor.getCatalogFamily(),
+        MetaTableAccessor.getServerColumn(0), Bytes.toBytes(""));
+    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut));
+    gc = janitor.scan();
+    report = janitor.getLastReport();
+    assertEquals(0, report.getUnknownServers().size());
+    // Mke an empty regioninfo in t1.
     RegionInfo t1Ri2 = t1Ris.get(2);
     Put pEmptyRI = new Put(t1Ri2.getRegionName());
     pEmptyRI.addColumn(MetaTableAccessor.getCatalogFamily(),