You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zg...@apache.org on 2019/08/05 07:21:40 UTC
[hbase] branch branch-2.2 updated: HBASE-22741 Show catalogjanitor
consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng
Signed-off-by: Guanghao Zhang
This is an automated email from the ASF dual-hosted git repository.
zghao pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.2 by this push:
new 2052058 HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng <op...@gmail.com> Signed-off-by: Guanghao Zhang <zg...@apache.org>
2052058 is described below
commit 2052058c7c27d46e509011773a07a8c9873f98e7
Author: stack <st...@apache.org>
AuthorDate: Mon Jul 29 17:10:58 2019 -0700
HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page
Signed-off-by: huzheng <op...@gmail.com>
Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
.../apache/hadoop/hbase/master/CatalogJanitor.java | 91 +++++++++----
.../main/resources/hbase-webapps/master/hbck.jsp | 145 +++++++++++++++++++--
.../hbase/master/TestCatalogJanitorCluster.java | 22 ++--
3 files changed, 210 insertions(+), 48 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index 37108d5..e6564a5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -22,7 +22,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Comparator;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -30,6 +29,7 @@ import java.util.Properties;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -43,8 +43,11 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
@@ -240,7 +243,7 @@ public class CatalogJanitor extends ScheduledChore {
* @return Returns last published Report that comes of last successful scan
* of hbase:meta.
*/
- Report getLastReport() {
+ public Report getLastReport() {
return this.lastReport;
}
@@ -444,29 +447,49 @@ public class CatalogJanitor extends ScheduledChore {
}
/**
- * Report made by {@link ReportMakingVisitor}.
+ * Report made by ReportMakingVisitor
*/
- static class Report {
+ public static class Report {
private final long now = EnvironmentEdgeManager.currentTime();
// Keep Map of found split parents. These are candidates for cleanup.
// Use a comparator that has split parents come before its daughters.
final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
-
- final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
- final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
- final Map<ServerName, RegionInfo> unknownServers = new HashMap<ServerName, RegionInfo>();
- final List<byte []> emptyRegionInfo = new ArrayList<>();
int count = 0;
+ private final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
+ private final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
+ private final List<Pair<MetaRow, ServerName>> unknownServers = new ArrayList<>();
+ private final List<byte []> emptyRegionInfo = new ArrayList<>();
+
@VisibleForTesting
Report() {}
+ public long getCreateTime() {
+ return this.now;
+ }
+
+ public List<Pair<MetaRow, MetaRow>> getHoles() {
+ return this.holes;
+ }
+
+ public List<Pair<MetaRow, MetaRow>> getOverlaps() {
+ return this.overlaps;
+ }
+
+ public List<Pair<MetaRow, ServerName>> getUnknownServers() {
+ return unknownServers;
+ }
+
+ public List<byte[]> getEmptyRegionInfo() {
+ return emptyRegionInfo;
+ }
+
/**
* @return True if an 'empty' lastReport -- no problems found.
*/
- boolean isEmpty() {
+ public boolean isEmpty() {
return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
this.emptyRegionInfo.isEmpty();
}
@@ -478,28 +501,28 @@ public class CatalogJanitor extends ScheduledChore {
if (sb.length() > 0) {
sb.append(", ");
}
- sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" +
- Bytes.toString(p.getSecond().metaRow));
+ sb.append("hole=" + Bytes.toStringBinary(p.getFirst().metaRow) + "/" +
+ Bytes.toStringBinary(p.getSecond().metaRow));
}
for (Pair<MetaRow, MetaRow> p: this.overlaps) {
if (sb.length() > 0) {
sb.append(", ");
}
- sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/").
- append(Bytes.toString(p.getSecond().metaRow));
+ sb.append("overlap=").append(Bytes.toStringBinary(p.getFirst().metaRow)).append("/").
+ append(Bytes.toStringBinary(p.getSecond().metaRow));
}
for (byte [] r: this.emptyRegionInfo) {
if (sb.length() > 0) {
sb.append(", ");
}
- sb.append("empty=").append(Bytes.toString(r));
+ sb.append("empty=").append(Bytes.toStringBinary(r));
}
- for (Map.Entry<ServerName, RegionInfo> e: this.unknownServers.entrySet()) {
+ for (Pair<MetaRow, ServerName> p: this.unknownServers) {
if (sb.length() > 0) {
sb.append(", ");
}
- sb.append("unknown_server=").append(e.getKey()).append("/").
- append(e.getValue().getRegionNameAsString());
+ sb.append("unknown_server=").append(p.getSecond()).append("/").
+ append(Bytes.toStringBinary(p.getFirst().metaRow));
}
return sb.toString();
}
@@ -508,7 +531,7 @@ public class CatalogJanitor extends ScheduledChore {
/**
* Simple datastructure to hold a MetaRow content.
*/
- static class MetaRow {
+ public static class MetaRow {
/**
* A marker for use in case where there is a hole at the very
* first row in hbase:meta. Should never happen.
@@ -519,17 +542,25 @@ public class CatalogJanitor extends ScheduledChore {
/**
* Row from hbase:meta table.
*/
- final byte [] metaRow;
+ private final byte [] metaRow;
/**
* The decoded RegionInfo gotten from hbase:meta.
*/
- final RegionInfo regionInfo;
+ private final RegionInfo regionInfo;
MetaRow(byte [] metaRow, RegionInfo regionInfo) {
this.metaRow = metaRow;
this.regionInfo = regionInfo;
}
+
+ public RegionInfo getRegionInfo() {
+ return regionInfo;
+ }
+
+ public byte[] getMetaRow() {
+ return metaRow;
+ }
}
/**
@@ -608,13 +639,14 @@ public class CatalogJanitor extends ScheduledChore {
MetaTableAccessor.getRegionInfoColumn());
} else {
ri = locations.getDefaultRegionLocation().getRegion();
- checkServer(locations);
+ checkServer(metaTableRow.getRow(), locations);
}
if (ri == null) {
this.report.emptyRegionInfo.add(metaTableRow.getRow());
return ri;
}
+
MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri);
// If table is disabled, skip integrity check.
if (!isTableDisabled(ri)) {
@@ -673,7 +705,7 @@ public class CatalogJanitor extends ScheduledChore {
/**
* Run through referenced servers and save off unknown and the dead.
*/
- private void checkServer(RegionLocations locations) {
+ private void checkServer(byte [] metaTableRow, RegionLocations locations) {
if (this.services == null) {
// Can't do this test if no services.
return;
@@ -691,7 +723,8 @@ public class CatalogJanitor extends ScheduledChore {
isServerKnownAndOnline(sn);
switch (state) {
case UNKNOWN:
- this.report.unknownServers.put(sn, location.getRegion());
+ this.report.unknownServers.add(
+ new Pair(new MetaRow(metaTableRow, location.getRegion()), sn));
break;
default:
@@ -736,20 +769,22 @@ public class CatalogJanitor extends ScheduledChore {
public static void main(String [] args) throws IOException {
checkLog4jProperties();
ReportMakingVisitor visitor = new ReportMakingVisitor(null);
- try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) {
+ Configuration configuration = HBaseConfiguration.create();
+ configuration.setBoolean("hbase.defaults.for.version.skip", true);
+ try (Connection connection = ConnectionFactory.createConnection(configuration)) {
/* Used to generate an overlap.
- Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9."));
+ */
+ Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
Result r = t.get(g);
byte [] row = g.getRow();
- row[row.length - 3] <<= ((byte)row[row.length -3]);
+ row[row.length - 2] <<= ((byte)row[row.length - 2]);
Put p = new Put(g.getRow());
p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
t.put(p);
}
- */
MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
Report report = visitor.getReport();
LOG.info(report != null? report.toString(): "empty");
diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
index 6138d35..fc212e8 100644
--- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
+++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
@@ -18,15 +18,23 @@
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
+ import="java.time.Instant"
+ import="java.time.ZoneId"
import="java.util.Date"
import="java.util.List"
import="java.util.Map"
import="java.util.stream.Collectors"
+ import="java.time.ZonedDateTime"
+ import="java.time.format.DateTimeFormatter"
%>
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
+<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %>
+<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %>
<%
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
@@ -43,6 +51,14 @@
startTimestamp = hbckChore.getCheckingStartTimestamp();
endTimestamp = hbckChore.getCheckingEndTimestamp();
}
+ ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
+ ZoneId.systemDefault());
+ String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+ zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp),
+ ZoneId.systemDefault());
+ String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+ CatalogJanitor cj = master.getCatalogJanitor();
+ CatalogJanitor.Report report = cj == null? null: cj.getLastReport();
%>
<jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/>
@@ -61,29 +77,32 @@
<div class="row">
<div class="page-header">
- <h1>HBCK Report</h1>
+ <h1>HBCK Chore Report</h1>
<p>
- <span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
+ <span>Checking started at <%= iso8601start %> and generated report at <%= iso8601end %>. Execute 'hbck_chore_run' in hbase shell to generate a new sub-report.</span>
</p>
</div>
</div>
+
<div class="row">
<div class="page-header">
<h2>Inconsistent Regions</h2>
+ </div>
+ </div>
+
+ <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<p>
<span>
- There are three case: 1. Master thought this region opened, but no regionserver reported it.
- 2. Master thought this region opened on Server1, but regionserver reported Server2.
- 3. More than one regionservers reported opened this region.
+ There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns
+ command; 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
+ need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1):
+ 3. More than one regionservers reported opened this region (Fix: restart the RegionServers).
Notice: the reported online regionservers may be not right when there are regions in transition.
Please check them in regionserver's web UI.
</span>
</p>
- </div>
- </div>
- <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
@@ -147,7 +166,115 @@
</table>
<% } %>
+ <div class="row inner_header">
+ <div class="page-header">
+ <h1>CatalogJanitor <em>hbase:meta</em> Consistency Issues</h1>
+ </div>
+ </div>
+ <% if (report != null && !report.isEmpty()) {
+ zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.getCreateTime()),
+ ZoneId.systemDefault());
+ String iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+ zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()),
+ ZoneId.systemDefault());
+ String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+ %>
+ <p>Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>). Run <i>catalogjanitor_run</i> in hbase shell to generate a new sub-report.</p>
+ <% if (!report.getHoles().isEmpty()) { %>
+ <div class="row inner_header">
+ <div class="page-header">
+ <h2>Holes</h2>
+ </div>
+ </div>
+ <table class="table table-striped">
+ <tr>
+ <th>Row before hole</th>
+ <th>RegionInfo</th>
+ <th>Row after hole</th>
+ <th>RegionInfo</th>
+ </tr>
+ <% for (Pair<MetaRow, MetaRow> p : report.getHoles()) { %>
+ <tr>
+ <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+ <td><%= p.getFirst().getRegionInfo() %></td>
+ <td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
+ <td><%= p.getSecond().getRegionInfo() %></td>
+ </tr>
+ <% } %>
+
+ <p><%= report.getHoles().size() %> hole(s).</p>
+ </table>
+ <% } %>
+ <% if (!report.getOverlaps().isEmpty()) { %>
+ <div class="row inner_header">
+ <div class="page-header">
+ <h2>Overlaps</h2>
+ </div>
+ </div>
+ <table class="table table-striped">
+ <tr>
+ <th>Row</th>
+ <th>RegionInfo</th>
+ <th>Other Row</th>
+ <th>Other RegionInfo</th>
+ </tr>
+ <% for (Pair<MetaRow, MetaRow> p : report.getOverlaps()) { %>
+ <tr>
+ <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+ <td><%= p.getFirst().getRegionInfo() %></td>
+ <td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
+ <td><%= p.getSecond().getRegionInfo() %></td>
+ </tr>
+ <% } %>
+
+ <p><%= report.getOverlaps().size() %> overlap(s).</p>
+ </table>
+ <% } %>
+ <% if (!report.getUnknownServers().isEmpty()) { %>
+ <div class="row inner_header">
+ <div class="page-header">
+ <h2>Unknown Servers</h2>
+ </div>
+ </div>
+ <table class="table table-striped">
+ <tr>
+ <th>Row</th>
+ <th>ServerName</th>
+ <th>RegionInfo</th>
+ </tr>
+ <% for (Pair<MetaRow, ServerName> p: report.getUnknownServers()) { %>
+ <tr>
+ <td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
+ <td><%= p.getSecond() %></td>
+ <td><%= p.getFirst().getRegionInfo() %></td>
+ </tr>
+ <% } %>
+
+ <p><%= report.getUnknownServers().size() %> unknown servers(s).</p>
+ </table>
+ <% } %>
+ <% if (!report.getEmptyRegionInfo().isEmpty()) { %>
+ <div class="row inner_header">
+ <div class="page-header">
+ <h2>Empty <em>info:regioninfo</em></h2>
+ </div>
+ </div>
+ <table class="table table-striped">
+ <tr>
+ <th>Row</th>
+ </tr>
+ <% for (byte [] row: report.getEmptyRegionInfo()) { %>
+ <tr>
+ <td><%= Bytes.toStringBinary(row) %></td>
+ </tr>
+ <% } %>
+
+ <p><%= report.getEmptyRegionInfo().size() %> emptyRegionInfo(s).</p>
+ </table>
+ <% } %>
+ <% } %>
+
<% } %>
</div>
-<jsp:include page="footer.jsp"/>
\ No newline at end of file
+<jsp:include page="footer.jsp"/>
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
index d9fb073..090690b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java
@@ -92,11 +92,11 @@ public class TestCatalogJanitorCluster {
gc = janitor.scan();
report = janitor.getLastReport();
assertFalse(report.isEmpty());
- assertEquals(1, report.holes.size());
- assertTrue(report.holes.get(0).getFirst().regionInfo.getTable().equals(T1));
- assertTrue(report.holes.get(0).getFirst().regionInfo.isLast());
- assertTrue(report.holes.get(0).getSecond().regionInfo.getTable().equals(T2));
- assertEquals(0, report.overlaps.size());
+ assertEquals(1, report.getHoles().size());
+ assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().getTable().equals(T1));
+ assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().isLast());
+ assertTrue(report.getHoles().get(0).getSecond().getRegionInfo().getTable().equals(T2));
+ assertEquals(0, report.getOverlaps().size());
// Next, add overlaps to first row in t3
List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
RegionInfo ri = t3Ris.get(0);
@@ -113,12 +113,12 @@ public class TestCatalogJanitorCluster {
report = janitor.getLastReport();
assertFalse(report.isEmpty());
// We added two overlaps so total three.
- assertEquals(3, report.overlaps.size());
+ assertEquals(3, report.getOverlaps().size());
// Assert hole is still there.
- assertEquals(1, report.holes.size());
+ assertEquals(1, report.getHoles().size());
// Assert other attributes are empty still.
- assertTrue(report.emptyRegionInfo.isEmpty());
- assertTrue(report.unknownServers.isEmpty());
+ assertTrue(report.getEmptyRegionInfo().isEmpty());
+ assertTrue(report.getUnknownServers().isEmpty());
// Now make bad server in t1.
List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
RegionInfo t1Ri1 = t1Ris.get(1);
@@ -129,7 +129,7 @@ public class TestCatalogJanitorCluster {
gc = janitor.scan();
report = janitor.getLastReport();
assertFalse(report.isEmpty());
- assertEquals(1, report.unknownServers.size());
+ assertEquals(1, report.getUnknownServers().size());
// Finally, make an empty regioninfo in t1.
RegionInfo t1Ri2 = t1Ris.get(2);
Put pEmptyRI = new Put(t1Ri2.getRegionName());
@@ -138,7 +138,7 @@ public class TestCatalogJanitorCluster {
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
gc = janitor.scan();
report = janitor.getLastReport();
- assertEquals(1, report.emptyRegionInfo.size());
+ assertEquals(1, report.getEmptyRegionInfo().size());
}
/**