You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zg...@apache.org on 2019/08/05 07:21:40 UTC

[hbase] branch branch-2.2 updated: HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng Signed-off-by: Guanghao Zhang

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.2 by this push:
     new 2052058  HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng <op...@gmail.com> Signed-off-by: Guanghao Zhang <zg...@apache.org>
2052058 is described below

commit 2052058c7c27d46e509011773a07a8c9873f98e7
Author: stack <st...@apache.org>
AuthorDate: Mon Jul 29 17:10:58 2019 -0700

    HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page
    Signed-off-by: huzheng <op...@gmail.com>
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../apache/hadoop/hbase/master/CatalogJanitor.java |  91 +++++++++----
 .../main/resources/hbase-webapps/master/hbck.jsp   | 145 +++++++++++++++++++--
 .../hbase/master/TestCatalogJanitorCluster.java    |  22 ++--
 3 files changed, 210 insertions(+), 48 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index 37108d5..e6564a5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -30,6 +29,7 @@ import java.util.Properties;
 import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -43,8 +43,11 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableState;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
@@ -240,7 +243,7 @@ public class CatalogJanitor extends ScheduledChore {
    * @return Returns last published Report that comes of last successful scan
    *   of hbase:meta.
    */
-  Report getLastReport() {
+  public Report getLastReport() {
     return this.lastReport;
   }
 
@@ -444,29 +447,49 @@ public class CatalogJanitor extends ScheduledChore {
   }
 
   /**
-   * Report made by {@link ReportMakingVisitor}.
+   * Report made by ReportMakingVisitor
    */
-  static class Report {
+  public static class Report {
     private final long now = EnvironmentEdgeManager.currentTime();
 
     // Keep Map of found split parents. These are candidates for cleanup.
     // Use a comparator that has split parents come before its daughters.
     final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
     final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
-
-    final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
-    final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
-    final Map<ServerName, RegionInfo> unknownServers = new HashMap<ServerName, RegionInfo>();
-    final List<byte []> emptyRegionInfo = new ArrayList<>();
     int count = 0;
 
+    private final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
+    private final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
+    private final List<Pair<MetaRow, ServerName>> unknownServers = new ArrayList<>();
+    private final List<byte []> emptyRegionInfo = new ArrayList<>();
+
     @VisibleForTesting
     Report() {}
 
+    public long getCreateTime() {
+      return this.now;
+    }
+
+    public List<Pair<MetaRow, MetaRow>> getHoles() {
+      return this.holes;
+    }
+
+    public List<Pair<MetaRow, MetaRow>> getOverlaps() {
+      return this.overlaps;
+    }
+
+    public List<Pair<MetaRow, ServerName>> getUnknownServers() {
+      return unknownServers;
+    }
+
+    public List<byte[]> getEmptyRegionInfo() {
+      return emptyRegionInfo;
+    }
+
     /**
      * @return True if an 'empty' lastReport -- no problems found.
      */
-    boolean isEmpty() {
+    public boolean isEmpty() {
       return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
           this.emptyRegionInfo.isEmpty();
     }
@@ -478,28 +501,28 @@ public class CatalogJanitor extends ScheduledChore {
         if (sb.length() > 0) {
           sb.append(", ");
         }
-        sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" +
-            Bytes.toString(p.getSecond().metaRow));
+        sb.append("hole=" + Bytes.toStringBinary(p.getFirst().metaRow) + "/" +
+            Bytes.toStringBinary(p.getSecond().metaRow));
       }
       for (Pair<MetaRow, MetaRow> p: this.overlaps) {
         if (sb.length() > 0) {
           sb.append(", ");
         }
-        sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/").
-            append(Bytes.toString(p.getSecond().metaRow));
+        sb.append("overlap=").append(Bytes.toStringBinary(p.getFirst().metaRow)).append("/").
+            append(Bytes.toStringBinary(p.getSecond().metaRow));
       }
       for (byte [] r: this.emptyRegionInfo) {
         if (sb.length() > 0) {
           sb.append(", ");
         }
-        sb.append("empty=").append(Bytes.toString(r));
+        sb.append("empty=").append(Bytes.toStringBinary(r));
       }
-      for (Map.Entry<ServerName, RegionInfo> e: this.unknownServers.entrySet()) {
+      for (Pair<MetaRow, ServerName> p: this.unknownServers) {
         if (sb.length() > 0) {
           sb.append(", ");
         }
-        sb.append("unknown_server=").append(e.getKey()).append("/").
-            append(e.getValue().getRegionNameAsString());
+        sb.append("unknown_server=").append(p.getSecond()).append("/").
+            append(Bytes.toStringBinary(p.getFirst().metaRow));
       }
       return sb.toString();
     }
@@ -508,7 +531,7 @@ public class CatalogJanitor extends ScheduledChore {
   /**
    * Simple datastructure to hold a MetaRow content.
    */
-  static class MetaRow {
+  public static class MetaRow {
     /**
      * A marker for use in case where there is a hole at the very
      * first row in hbase:meta. Should never happen.
@@ -519,17 +542,25 @@ public class CatalogJanitor extends ScheduledChore {
     /**
      * Row from hbase:meta table.
      */
-    final byte [] metaRow;
+    private final byte [] metaRow;
 
     /**
      * The decoded RegionInfo gotten from hbase:meta.
      */
-    final RegionInfo regionInfo;
+    private final RegionInfo regionInfo;
 
     MetaRow(byte [] metaRow, RegionInfo regionInfo) {
       this.metaRow = metaRow;
       this.regionInfo = regionInfo;
     }
+
+    public RegionInfo getRegionInfo() {
+      return regionInfo;
+    }
+
+    public byte[] getMetaRow() {
+      return metaRow;
+    }
   }
 
   /**
@@ -608,13 +639,14 @@ public class CatalogJanitor extends ScheduledChore {
             MetaTableAccessor.getRegionInfoColumn());
       } else {
         ri = locations.getDefaultRegionLocation().getRegion();
-        checkServer(locations);
+        checkServer(metaTableRow.getRow(), locations);
       }
 
       if (ri == null) {
         this.report.emptyRegionInfo.add(metaTableRow.getRow());
         return ri;
       }
+
       MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri);
       // If table is disabled, skip integrity check.
       if (!isTableDisabled(ri)) {
@@ -673,7 +705,7 @@ public class CatalogJanitor extends ScheduledChore {
     /**
      * Run through referenced servers and save off unknown and the dead.
      */
-    private void checkServer(RegionLocations locations) {
+    private void checkServer(byte [] metaTableRow, RegionLocations locations) {
       if (this.services == null) {
         // Can't do this test if no services.
         return;
@@ -691,7 +723,8 @@ public class CatalogJanitor extends ScheduledChore {
             isServerKnownAndOnline(sn);
         switch (state) {
           case UNKNOWN:
-            this.report.unknownServers.put(sn, location.getRegion());
+            this.report.unknownServers.add(
+                new Pair(new MetaRow(metaTableRow, location.getRegion()), sn));
             break;
 
           default:
@@ -736,20 +769,22 @@ public class CatalogJanitor extends ScheduledChore {
   public static void main(String [] args) throws IOException {
     checkLog4jProperties();
     ReportMakingVisitor visitor = new ReportMakingVisitor(null);
-    try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) {
+    Configuration configuration = HBaseConfiguration.create();
+    configuration.setBoolean("hbase.defaults.for.version.skip", true);
+    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
       /* Used to generate an overlap.
-      Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9."));
+      */
+      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
       g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
       try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
         Result r = t.get(g);
         byte [] row = g.getRow();
-        row[row.length - 3] <<= ((byte)row[row.length -3]);
+        row[row.length - 2] <<= ((byte)row[row.length - 2]);
         Put p = new Put(g.getRow());
         p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
             r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
         t.put(p);
       }
-      */
       MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
       Report report = visitor.getReport();
       LOG.info(report != null? report.toString(): "empty");
diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
index 6138d35..fc212e8 100644
--- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
+++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
@@ -18,15 +18,23 @@
  */
 --%>
 <%@ page contentType="text/html;charset=UTF-8"
+         import="java.time.Instant"
+         import="java.time.ZoneId"
          import="java.util.Date"
          import="java.util.List"
          import="java.util.Map"
          import="java.util.stream.Collectors"
+         import="java.time.ZonedDateTime"
+         import="java.time.format.DateTimeFormatter"
 %>
 <%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
 <%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
 <%@ page import="org.apache.hadoop.hbase.ServerName" %>
+<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
 <%@ page import="org.apache.hadoop.hbase.util.Pair" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %>
 <%
   HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
   pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
@@ -43,6 +51,14 @@
     startTimestamp = hbckChore.getCheckingStartTimestamp();
     endTimestamp = hbckChore.getCheckingEndTimestamp();
   }
+  ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
+    ZoneId.systemDefault());
+  String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+  zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp),
+    ZoneId.systemDefault());
+  String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+  CatalogJanitor cj = master.getCatalogJanitor();
+  CatalogJanitor.Report report = cj == null? null: cj.getLastReport();
 %>
 <jsp:include page="header.jsp">
   <jsp:param name="pageTitle" value="${pageTitle}"/>
@@ -61,29 +77,32 @@
 
   <div class="row">
     <div class="page-header">
-      <h1>HBCK Report</h1>
+      <h1>HBCK Chore Report</h1>
       <p>
-        <span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
+        <span>Checking started at <%= iso8601start %> and generated report at <%= iso8601end %>. Execute 'hbck_chore_run' in hbase shell to generate a new sub-report.</span>
       </p>
     </div>
   </div>
 
+
   <div class="row">
     <div class="page-header">
       <h2>Inconsistent Regions</h2>
+    </div>
+  </div>
+
+  <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
       <p>
         <span>
-        There are three case: 1. Master thought this region opened, but no regionserver reported it.
-        2. Master thought this region opened on Server1, but regionserver reported Server2.
-        3. More than one regionservers reported opened this region.
+        There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns
+        command; 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
+        need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1):
+        3. More than one regionservers reported opened this region (Fix: restart the RegionServers).
         Notice: the reported online regionservers may be not right when there are regions in transition.
         Please check them in regionserver's web UI.
         </span>
       </p>
-    </div>
-  </div>
 
-  <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
   <table class="table table-striped">
     <tr>
       <th>Region</th>
@@ -147,7 +166,115 @@
   </table>
   <% } %>
 
+  <div class="row inner_header">
+    <div class="page-header">
+      <h1>CatalogJanitor <em>hbase:meta</em> Consistency Issues</h1>
+    </div>
+  </div>
+  <% if (report != null && !report.isEmpty()) {
+    zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.getCreateTime()),
+      ZoneId.systemDefault());
+    String iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+    zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()),
+      ZoneId.systemDefault());
+    String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+  %>
+  <p>Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>). Run <i>catalogjanitor_run</i> in hbase shell to generate a new sub-report.</p>
+      <% if (!report.getHoles().isEmpty()) { %>
+          <div class="row inner_header">
+            <div class="page-header">
+              <h2>Holes</h2>
+            </div>
+          </div>
+          <table class="table table-striped">
+            <tr>
+              <th>Row before hole</th>
+              <th>RegionInfo</th>
+              <th>Row after hole</th>
+              <th>RegionInfo</th>
+            </tr>
+            <% for (Pair<MetaRow, MetaRow> p : report.getHoles()) { %>
+            <tr>
+              <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+              <td><%= p.getFirst().getRegionInfo() %></td>
+              <td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
+              <td><%= p.getSecond().getRegionInfo() %></td>
+            </tr>
+            <% } %>
+
+            <p><%= report.getHoles().size() %> hole(s).</p>
+          </table>
+      <% } %>
+      <% if (!report.getOverlaps().isEmpty()) { %>
+            <div class="row inner_header">
+              <div class="page-header">
+                <h2>Overlaps</h2>
+              </div>
+            </div>
+            <table class="table table-striped">
+              <tr>
+                <th>Row</th>
+                <th>RegionInfo</th>
+                <th>Other Row</th>
+                <th>Other RegionInfo</th>
+              </tr>
+              <% for (Pair<MetaRow, MetaRow> p : report.getOverlaps()) { %>
+              <tr>
+                <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+                <td><%= p.getFirst().getRegionInfo() %></td>
+                <td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
+                <td><%= p.getSecond().getRegionInfo() %></td>
+              </tr>
+              <% } %>
+
+              <p><%= report.getOverlaps().size() %> overlap(s).</p>
+            </table>
+      <% } %>
+      <% if (!report.getUnknownServers().isEmpty()) { %>
+            <div class="row inner_header">
+              <div class="page-header">
+                <h2>Unknown Servers</h2>
+              </div>
+            </div>
+            <table class="table table-striped">
+              <tr>
+                <th>Row</th>
+                <th>ServerName</th>
+                <th>RegionInfo</th>
+              </tr>
+              <% for (Pair<MetaRow, ServerName> p: report.getUnknownServers()) { %>
+              <tr>
+                <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+                <td><%= p.getSecond() %></td>
+                <td><%= p.getFirst().getRegionInfo() %></td>
+              </tr>
+              <% } %>
+
+              <p><%= report.getUnknownServers().size() %> unknown servers(s).</p>
+            </table>
+      <% } %>
+      <% if (!report.getEmptyRegionInfo().isEmpty()) { %>
+            <div class="row inner_header">
+              <div class="page-header">
+                <h2>Empty <em>info:regioninfo</em></h2>
+              </div>
+            </div>
+            <table class="table table-striped">
+              <tr>
+                <th>Row</th>
+              </tr>
+              <% for (byte [] row: report.getEmptyRegionInfo()) { %>
+              <tr>
+                <td><%= Bytes.toStringBinary(row) %></td>
+              </tr>
+              <% } %>
+
+              <p><%= report.getEmptyRegionInfo().size() %> emptyRegionInfo(s).</p>
+            </table>
+      <% } %>
+  <% } %>
+
   <% } %>
 </div>
 
-<jsp:include page="footer.jsp"/>
\ No newline at end of file
+<jsp:include page="footer.jsp"/>
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
index d9fb073..090690b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
@@ -92,11 +92,11 @@ public class TestCatalogJanitorCluster {
     gc = janitor.scan();
     report = janitor.getLastReport();
     assertFalse(report.isEmpty());
-    assertEquals(1, report.holes.size());
-    assertTrue(report.holes.get(0).getFirst().regionInfo.getTable().equals(T1));
-    assertTrue(report.holes.get(0).getFirst().regionInfo.isLast());
-    assertTrue(report.holes.get(0).getSecond().regionInfo.getTable().equals(T2));
-    assertEquals(0, report.overlaps.size());
+    assertEquals(1, report.getHoles().size());
+    assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().getTable().equals(T1));
+    assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().isLast());
+    assertTrue(report.getHoles().get(0).getSecond().getRegionInfo().getTable().equals(T2));
+    assertEquals(0, report.getOverlaps().size());
     // Next, add overlaps to first row in t3
     List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
     RegionInfo ri = t3Ris.get(0);
@@ -113,12 +113,12 @@ public class TestCatalogJanitorCluster {
     report = janitor.getLastReport();
     assertFalse(report.isEmpty());
     // We added two overlaps so total three.
-    assertEquals(3, report.overlaps.size());
+    assertEquals(3, report.getOverlaps().size());
     // Assert hole is still there.
-    assertEquals(1, report.holes.size());
+    assertEquals(1, report.getHoles().size());
     // Assert other attributes are empty still.
-    assertTrue(report.emptyRegionInfo.isEmpty());
-    assertTrue(report.unknownServers.isEmpty());
+    assertTrue(report.getEmptyRegionInfo().isEmpty());
+    assertTrue(report.getUnknownServers().isEmpty());
     // Now make bad server in t1.
     List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
     RegionInfo t1Ri1 = t1Ris.get(1);
@@ -129,7 +129,7 @@ public class TestCatalogJanitorCluster {
     gc = janitor.scan();
     report = janitor.getLastReport();
     assertFalse(report.isEmpty());
-    assertEquals(1, report.unknownServers.size());
+    assertEquals(1, report.getUnknownServers().size());
     // Finally, make an empty regioninfo in t1.
     RegionInfo t1Ri2 = t1Ris.get(2);
     Put pEmptyRI = new Put(t1Ri2.getRegionName());
@@ -138,7 +138,7 @@ public class TestCatalogJanitorCluster {
     MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
     gc = janitor.scan();
     report = janitor.getLastReport();
-    assertEquals(1, report.emptyRegionInfo.size());
+    assertEquals(1, report.getEmptyRegionInfo().size());
   }
 
   /**