You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2019/10/29 20:15:48 UTC
[hbase] branch branch-1 updated: HBASE-23213 Backport HBASE-22460
to branch-1 (#761)
This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-1 by this push:
new 5e414f2 HBASE-23213 Backport HBASE-22460 to branch-1 (#761)
5e414f2 is described below
commit 5e414f2d4690b2a474fbc50599a8a64a782571ab
Author: Viraj Jasani <vi...@gmail.com>
AuthorDate: Wed Oct 30 01:45:39 2019 +0530
HBASE-23213 Backport HBASE-22460 to branch-1 (#761)
Signed-off-by: Andrew Purtell <ap...@apache.org>
---
.../org/apache/hadoop/hbase/ClusterStatus.java | 7 +
.../java/org/apache/hadoop/hbase/RegionLoad.java | 9 +
.../java/org/apache/hadoop/hbase/HConstants.java | 7 +
hbase-common/src/main/resources/hbase-default.xml | 29 +++
.../regionserver/MetricsRegionServerSource.java | 1 +
.../hbase/regionserver/MetricsRegionWrapper.java | 6 +
.../regionserver/MetricsRegionSourceImpl.java | 4 +
.../regionserver/TestMetricsRegionSourceImpl.java | 5 +
.../protobuf/generated/ClusterStatusProtos.java | 206 +++++++++++++++++----
.../src/main/protobuf/ClusterStatus.proto | 6 +
.../hadoop/hbase/master/AssignmentManager.java | 10 +
.../org/apache/hadoop/hbase/master/HMaster.java | 58 ++++++
.../hadoop/hbase/master/RegionsRecoveryChore.java | 183 ++++++++++++++++++
.../hadoop/hbase/regionserver/HRegionServer.java | 11 ++
.../apache/hadoop/hbase/regionserver/HStore.java | 18 ++
.../regionserver/MetricsRegionWrapperImpl.java | 14 +-
.../regionserver/MetricsRegionWrapperStub.java | 5 +
src/main/asciidoc/_chapters/hbase-default.adoc | 40 ++++
src/main/asciidoc/_chapters/ops_mgt.adoc | 23 +++
19 files changed, 607 insertions(+), 35 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
index e2f10d3..ed859ee 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
@@ -166,6 +166,13 @@ public class ClusterStatus extends VersionedWritable {
}
/**
+ * @return map of the names of region servers on the live list with associated ServerLoad
+ */
+ public Map<ServerName, ServerLoad> getLiveServersLoad() {
+ return Collections.unmodifiableMap(liveServers);
+ }
+
+ /**
* @return the average cluster load
*/
public double getAverageLoad() {
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
index 158105b..24c33a6 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
@@ -195,6 +195,14 @@ public class RegionLoad {
}
/**
+ * @return the max reference count for any store file among all stores files
+ * of this region
+ */
+ public int getMaxStoreFileRefCount() {
+ return regionLoadPB.getMaxStoreFileRefCount();
+ }
+
+ /**
* @see java.lang.Object#toString()
*/
@Override
@@ -204,6 +212,7 @@ public class RegionLoad {
sb = Strings.appendKeyValue(sb, "numberOfStorefiles",
this.getStorefiles());
sb = Strings.appendKeyValue(sb, "storeRefCount", this.getStoreRefCount());
+ sb = Strings.appendKeyValue(sb, "maxStoreFileRefCount", this.getMaxStoreFileRefCount());
sb = Strings.appendKeyValue(sb, "storefileUncompressedSizeMB",
this.getStoreUncompressedSizeMB());
sb = Strings.appendKeyValue(sb, "lastMajorCompactionTimestamp",
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 6e66f57..59d4fb3 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1341,6 +1341,13 @@ public final class HConstants {
// User defined Default TTL config key
public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl";
+ // Regions Recovery based on high storeFileRefCount threshold value
+ public static final String STORE_FILE_REF_COUNT_THRESHOLD =
+ "hbase.regions.recovery.store.file.ref.count";
+
+ // default -1 indicates there is no threshold on high storeRefCount
+ public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
+
/**
* Configurations for master executor services.
*/
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 3f409d7..169c65c 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -1643,4 +1643,33 @@ possible configurations would overwhelm and obscure the important.
automatically deleted until it is manually deleted
</description>
</property>
+ <property>
+ <name>hbase.master.regions.recovery.check.interval</name>
+ <value>1200000</value>
+ <description>
+ Regions Recovery Chore interval in milliseconds.
+ This chore keeps running at this interval to
+ find all regions with configurable max store file ref count
+ and reopens them.
+ </description>
+ </property>
+ <property>
+ <name>hbase.regions.recovery.store.file.ref.count</name>
+ <value>-1</value>
+ <description>
+ Very large ref count on a file indicates
+ that it is a ref leak on that object. Such files
+ can not be removed even after it is invalidated
+ via compaction. Only way to recover in such
+ scenario is to reopen the region which can
+ release all resources, like the refcount, leases, etc.
+ This config represents Store files Ref Count threshold
+ value considered for reopening regions.
+ Any region with store files ref count > this value
+ would be eligible for reopening by master.
+ Default value -1 indicates this feature is turned off.
+ Only positive integer value should be provided to enable
+ this feature.
+ </description>
+ </property>
</configuration>
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index 635ba70..a5564ce 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -231,6 +231,7 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
String STOREFILE_COUNT_DESC = "Number of Store Files";
String STORE_REF_COUNT = "storeRefCount";
String STORE_REF_COUNT_DESC = "Store reference count";
+ String MAX_STORE_FILE_REF_COUNT = "maxStoreFileRefCount";
String MEMSTORE_SIZE = "memStoreSize";
String MEMSTORE_SIZE_DESC = "Size of the memstore";
String STOREFILE_SIZE = "storeFileSize";
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
index b519e57..b92ad55 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
@@ -147,4 +147,10 @@ public interface MetricsRegionWrapper {
* @return the number of references active on the store
*/
long getStoreRefCount();
+
+ /**
+ * @return the max reference count for any store file among all stores files
+ * of this region
+ */
+ int getMaxStoreFileRefCount();
}
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
index 7da7686..578ff0d 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
@@ -218,6 +218,10 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
MetricsRegionServerSource.STORE_REF_COUNT),
this.regionWrapper.getStoreRefCount());
mrb.addGauge(Interns.info(
+ regionNamePrefix + MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT,
+ MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT),
+ this.regionWrapper.getMaxStoreFileRefCount());
+ mrb.addGauge(Interns.info(
regionNamePrefix + MetricsRegionServerSource.MEMSTORE_SIZE,
MetricsRegionServerSource.MEMSTORE_SIZE_DESC),
this.regionWrapper.getMemstoreSize());
diff --git a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
index 043ff3d..5a4caa7 100644
--- a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
+++ b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
@@ -97,6 +97,11 @@ public class TestMetricsRegionSourceImpl {
}
@Override
+ public int getMaxStoreFileRefCount() {
+ return 0;
+ }
+
+ @Override
public long getMemstoreSize() {
return 0;
}
diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
index 735ef98..aac9ab8 100644
--- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
+++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
@@ -3631,6 +3631,28 @@ public final class ClusterStatusProtos {
* </pre>
*/
int getStoreRefCount();
+
+ // optional int32 max_store_file_ref_count = 22 [default = 0];
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ boolean hasMaxStoreFileRefCount();
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ int getMaxStoreFileRefCount();
}
/**
* Protobuf type {@code hbase.pb.RegionLoad}
@@ -3789,6 +3811,11 @@ public final class ClusterStatusProtos {
storeRefCount_ = input.readInt32();
break;
}
+ case 176: {
+ bitField0_ |= 0x00040000;
+ maxStoreFileRefCount_ = input.readInt32();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -4330,6 +4357,34 @@ public final class ClusterStatusProtos {
return storeRefCount_;
}
+ // optional int32 max_store_file_ref_count = 22 [default = 0];
+ public static final int MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER = 22;
+ private int maxStoreFileRefCount_;
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public boolean hasMaxStoreFileRefCount() {
+ return ((bitField0_ & 0x00040000) == 0x00040000);
+ }
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public int getMaxStoreFileRefCount() {
+ return maxStoreFileRefCount_;
+ }
+
private void initFields() {
regionSpecifier_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.getDefaultInstance();
stores_ = 0;
@@ -4350,6 +4405,7 @@ public final class ClusterStatusProtos {
lastMajorCompactionTs_ = 0L;
storeCompleteSequenceId_ = java.util.Collections.emptyList();
storeRefCount_ = 0;
+ maxStoreFileRefCount_ = 0;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -4434,6 +4490,9 @@ public final class ClusterStatusProtos {
if (((bitField0_ & 0x00020000) == 0x00020000)) {
output.writeInt32(21, storeRefCount_);
}
+ if (((bitField0_ & 0x00040000) == 0x00040000)) {
+ output.writeInt32(22, maxStoreFileRefCount_);
+ }
getUnknownFields().writeTo(output);
}
@@ -4519,6 +4578,10 @@ public final class ClusterStatusProtos {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(21, storeRefCount_);
}
+ if (((bitField0_ & 0x00040000) == 0x00040000)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeInt32Size(22, maxStoreFileRefCount_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -4633,6 +4696,11 @@ public final class ClusterStatusProtos {
result = result && (getStoreRefCount()
== other.getStoreRefCount());
}
+ result = result && (hasMaxStoreFileRefCount() == other.hasMaxStoreFileRefCount());
+ if (hasMaxStoreFileRefCount()) {
+ result = result && (getMaxStoreFileRefCount()
+ == other.getMaxStoreFileRefCount());
+ }
result = result &&
getUnknownFields().equals(other.getUnknownFields());
return result;
@@ -4723,6 +4791,10 @@ public final class ClusterStatusProtos {
hash = (37 * hash) + STORE_REF_COUNT_FIELD_NUMBER;
hash = (53 * hash) + getStoreRefCount();
}
+ if (hasMaxStoreFileRefCount()) {
+ hash = (37 * hash) + MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER;
+ hash = (53 * hash) + getMaxStoreFileRefCount();
+ }
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
@@ -4880,6 +4952,8 @@ public final class ClusterStatusProtos {
}
storeRefCount_ = 0;
bitField0_ = (bitField0_ & ~0x00040000);
+ maxStoreFileRefCount_ = 0;
+ bitField0_ = (bitField0_ & ~0x00080000);
return this;
}
@@ -4993,6 +5067,10 @@ public final class ClusterStatusProtos {
to_bitField0_ |= 0x00020000;
}
result.storeRefCount_ = storeRefCount_;
+ if (((from_bitField0_ & 0x00080000) == 0x00080000)) {
+ to_bitField0_ |= 0x00040000;
+ }
+ result.maxStoreFileRefCount_ = maxStoreFileRefCount_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -5089,6 +5167,9 @@ public final class ClusterStatusProtos {
if (other.hasStoreRefCount()) {
setStoreRefCount(other.getStoreRefCount());
}
+ if (other.hasMaxStoreFileRefCount()) {
+ setMaxStoreFileRefCount(other.getMaxStoreFileRefCount());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -6428,6 +6509,63 @@ public final class ClusterStatusProtos {
return this;
}
+ // optional int32 max_store_file_ref_count = 22 [default = 0];
+ private int maxStoreFileRefCount_ ;
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public boolean hasMaxStoreFileRefCount() {
+ return ((bitField0_ & 0x00080000) == 0x00080000);
+ }
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public int getMaxStoreFileRefCount() {
+ return maxStoreFileRefCount_;
+ }
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public Builder setMaxStoreFileRefCount(int value) {
+ bitField0_ |= 0x00080000;
+ maxStoreFileRefCount_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+ *
+ * <pre>
+ **
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ * </pre>
+ */
+ public Builder clearMaxStoreFileRefCount() {
+ bitField0_ = (bitField0_ & ~0x00080000);
+ maxStoreFileRefCount_ = 0;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:hbase.pb.RegionLoad)
}
@@ -14845,7 +14983,7 @@ public final class ClusterStatusProtos {
"e\030\001 \002(\014\022\023\n\013sequence_id\030\002 \002(\004\"p\n\026RegionSt" +
"oreSequenceIds\022 \n\030last_flushed_sequence_" +
"id\030\001 \002(\004\0224\n\021store_sequence_id\030\002 \003(\0132\031.hb" +
- "ase.pb.StoreSequenceId\"\360\004\n\nRegionLoad\0223\n" +
+ "ase.pb.StoreSequenceId\"\225\005\n\nRegionLoad\0223\n" +
"\020region_specifier\030\001 \002(\0132\031.hbase.pb.Regio" +
"nSpecifier\022\016\n\006stores\030\002 \001(\r\022\022\n\nstorefiles",
"\030\003 \001(\r\022\"\n\032store_uncompressed_size_MB\030\004 \001" +
@@ -14861,38 +14999,38 @@ public final class ClusterStatusProtos {
"\002\022#\n\030last_major_compaction_ts\030\021 \001(\004:\0010\022=" +
"\n\032store_complete_sequence_id\030\022 \003(\0132\031.hba" +
"se.pb.StoreSequenceId\022\032\n\017store_ref_count" +
- "\030\025 \001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageO" +
- "fLastAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLast" +
- "AppliedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSourc" +
- "e\022\016\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030" +
- "\002 \002(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeSta" +
- "mpOfLastShippedOp\030\004 \002(\004\022\026\n\016replicationLa" +
- "g\030\005 \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_requ",
- "ests\030\001 \001(\004\022 \n\030total_number_of_requests\030\002" +
- " \001(\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_M" +
- "B\030\004 \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb" +
- ".RegionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbas" +
- "e.pb.Coprocessor\022\031\n\021report_start_time\030\007 " +
- "\001(\004\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_ser" +
- "ver_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037" +
- ".hbase.pb.ReplicationLoadSource\0223\n\014replL" +
- "oadSink\030\013 \001(\0132\035.hbase.pb.ReplicationLoad" +
- "Sink\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024",
- ".hbase.pb.ServerName\022)\n\013server_load\030\002 \002(" +
- "\0132\024.hbase.pb.ServerLoad\"\250\003\n\rClusterStatu" +
- "s\0228\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBas" +
- "eVersionFileContent\022.\n\014live_servers\030\002 \003(" +
- "\0132\030.hbase.pb.LiveServerInfo\022*\n\014dead_serv" +
- "ers\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regio" +
- "ns_in_transition\030\004 \003(\0132\034.hbase.pb.Region" +
- "InTransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase" +
- ".pb.ClusterId\0222\n\023master_coprocessors\030\006 \003" +
- "(\0132\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(",
- "\0132\024.hbase.pb.ServerName\022,\n\016backup_master" +
- "s\030\010 \003(\0132\024.hbase.pb.ServerName\022\023\n\013balance" +
- "r_on\030\t \001(\010BF\n*org.apache.hadoop.hbase.pr" +
- "otobuf.generatedB\023ClusterStatusProtosH\001\240" +
- "\001\001"
+ "\030\025 \001(\005:\0010\022#\n\030max_store_file_ref_count\030\026 " +
+ "\001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageOfLa" +
+ "stAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLastApp" +
+ "liedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSource\022\016" +
+ "\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030\002 \002" +
+ "(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeStampO" +
+ "fLastShippedOp\030\004 \002(\004\022\026\n\016replicationLag\030\005",
+ " \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_request" +
+ "s\030\001 \001(\004\022 \n\030total_number_of_requests\030\002 \001(" +
+ "\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_MB\030\004" +
+ " \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb.Re" +
+ "gionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbase.p" +
+ "b.Coprocessor\022\031\n\021report_start_time\030\007 \001(\004" +
+ "\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_server" +
+ "_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037.hb" +
+ "ase.pb.ReplicationLoadSource\0223\n\014replLoad" +
+ "Sink\030\013 \001(\0132\035.hbase.pb.ReplicationLoadSin",
+ "k\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024.hb" +
+ "ase.pb.ServerName\022)\n\013server_load\030\002 \002(\0132\024" +
+ ".hbase.pb.ServerLoad\"\250\003\n\rClusterStatus\0228" +
+ "\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBaseVe" +
+ "rsionFileContent\022.\n\014live_servers\030\002 \003(\0132\030" +
+ ".hbase.pb.LiveServerInfo\022*\n\014dead_servers" +
+ "\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regions_" +
+ "in_transition\030\004 \003(\0132\034.hbase.pb.RegionInT" +
+ "ransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase.pb" +
+ ".ClusterId\0222\n\023master_coprocessors\030\006 \003(\0132",
+ "\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(\0132\024" +
+ ".hbase.pb.ServerName\022,\n\016backup_masters\030\010" +
+ " \003(\0132\024.hbase.pb.ServerName\022\023\n\013balancer_o" +
+ "n\030\t \001(\010BF\n*org.apache.hadoop.hbase.proto" +
+ "buf.generatedB\023ClusterStatusProtosH\001\240\001\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -14928,7 +15066,7 @@ public final class ClusterStatusProtos {
internal_static_hbase_pb_RegionLoad_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_hbase_pb_RegionLoad_descriptor,
- new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", });
+ new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", "MaxStoreFileRefCount", });
internal_static_hbase_pb_ReplicationLoadSink_descriptor =
getDescriptor().getMessageTypes().get(5);
internal_static_hbase_pb_ReplicationLoadSink_fieldAccessorTable = new
diff --git a/hbase-protocol/src/main/protobuf/ClusterStatus.proto b/hbase-protocol/src/main/protobuf/ClusterStatus.proto
index 0762d33..8e4f5fe 100644
--- a/hbase-protocol/src/main/protobuf/ClusterStatus.proto
+++ b/hbase-protocol/src/main/protobuf/ClusterStatus.proto
@@ -145,6 +145,12 @@ message RegionLoad {
/** the number of references active on the store */
optional int32 store_ref_count = 21 [ default = 0 ];
+
+ /**
+ * The max number of references active on single store file among all store files
+ * that belong to given region
+ */
+ optional int32 max_store_file_ref_count = 22 [default = 0];
}
/* Server-level protobufs */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index ef49f99..5386f6e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -843,6 +843,16 @@ public class AssignmentManager extends ZooKeeperListener {
}
/**
+ * Retrieve HRegionInfo for given region name
+ *
+ * @param regionName Region name in byte[]
+ * @return HRegionInfo
+ */
+ public HRegionInfo getRegionInfo(final byte[] regionName) {
+ return regionStates.getRegionInfo(regionName);
+ }
+
+ /**
* This call is invoked only (1) master assign meta;
* (2) during failover mode startup, zk assignment node processing.
* The locker is set in the caller. It returns true if the region
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index d560a37..c6bbf24 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -117,6 +117,7 @@ import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
+import org.apache.hadoop.hbase.master.procedure.MasterDDLOperationHelper;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.ProcedureEvent;
@@ -302,6 +303,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// manager of assignment nodes in zookeeper
AssignmentManager assignmentManager;
+ private RegionsRecoveryChore regionsRecoveryChore = null;
+
// buffer for "fatal error" notices from region servers
// in the cluster. This is only used for assisting
// operations/debugging.
@@ -1261,6 +1264,20 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
getChoreService().scheduleChore(hfileCleaner);
+ // Regions Reopen based on very high storeFileRefCount is considered enabled
+ // only if hbase.regions.recovery.store.file.ref.count has value > 0
+ final int maxStoreFileRefCount = conf.getInt(
+ HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+ HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+ if (maxStoreFileRefCount > 0) {
+ this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
+ getChoreService().scheduleChore(this.regionsRecoveryChore);
+ } else {
+ LOG.info("Reopening regions with very high storeFileRefCount is disabled. "
+ + "Provide threshold value > 0 for " + HConstants.STORE_FILE_REF_COUNT_THRESHOLD
+ + " to enable it.\"");
+ }
+
final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
.isSnapshotCleanupEnabled();
this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
@@ -1409,6 +1426,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
choreService.cancelChore(this.replicationZKLockCleanerChore);
choreService.cancelChore(this.replicationZKNodeCleanerChore);
choreService.cancelChore(this.snapshotCleanerChore);
+ choreService.cancelChore(this.regionsRecoveryChore);
}
}
@@ -3263,6 +3281,46 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
}
}
+ /**
+ * Reopen regions provided in the argument
+ *
+ * @param tableName The current table name
+ * @param hRegionInfos List of HRegionInfo of the regions to reopen
+ * @param nonceGroup Identifier for the source of the request, a client or process
+ * @param nonce A unique identifier for this operation from the client or process identified by
+ * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
+ * @return procedure Id
+ * @throws IOException if reopening region fails while running procedure
+ */
+ long reopenRegions(final TableName tableName, final List<HRegionInfo> hRegionInfos,
+ final long nonceGroup, final long nonce)
+ throws IOException {
+
+ return MasterProcedureUtil
+ .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
+
+ @Override
+ protected void run() throws IOException {
+ boolean areAllRegionsReopened = MasterDDLOperationHelper.reOpenAllRegions(
+ procedureExecutor.getEnvironment(), tableName, hRegionInfos);
+ if (areAllRegionsReopened) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("All required regions reopened for table: " + tableName);
+ }
+ } else {
+ LOG.warn("Error while reopening regions of table: " + tableName);
+ }
+ }
+
+ @Override
+ protected String getDescription() {
+ return "ReopenTableRegionsProcedure";
+ }
+
+ });
+
+ }
+
@Override
public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
return getClusterStatusWithoutCoprocessor().getLastMajorCompactionTsForTable(table);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
new file mode 100644
index 0000000..78d4b78
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.collections.MapUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.ServerLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * This chore, every time it runs, will try to recover regions with high store ref count
+ * by reopening them
+ */
+@InterfaceAudience.Private
+public class RegionsRecoveryChore extends ScheduledChore {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class);
+
+ private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
+
+ private static final String REGIONS_RECOVERY_INTERVAL =
+ "hbase.master.regions.recovery.check.interval";
+
+ private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
+
+ private static final String ERROR_REOPEN_REIONS_MSG =
+ "Error reopening regions with high storeRefCount. ";
+
+ private final HMaster hMaster;
+ private final int storeFileRefCountThreshold;
+
+ private static final PerClientRandomNonceGenerator NONCE_GENERATOR =
+ new PerClientRandomNonceGenerator();
+
+ /**
+ * Construct RegionsRecoveryChore with provided params
+ *
+ * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup
+ * @param configuration The configuration params to be used
+ * @param hMaster HMaster instance to initiate RegionTableRegions
+ */
+ RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
+ final HMaster hMaster) {
+
+ super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
+ DEFAULT_REGIONS_RECOVERY_INTERVAL));
+ this.hMaster = hMaster;
+ this.storeFileRefCountThreshold = configuration.getInt(
+ HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+ HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+
+ }
+
+ @Override
+ protected void chore() {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(
+ "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount...");
+ }
+ try {
+ // only if storeFileRefCountThreshold > 0, consider the feature turned on
+ if (storeFileRefCountThreshold > 0) {
+ final ClusterStatus clusterStatus = hMaster.getClusterStatus();
+ final Map<ServerName, ServerLoad> serverMetricsMap =
+ clusterStatus.getLiveServersLoad();
+ final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap =
+ getTableToRegionsByRefCount(serverMetricsMap);
+ if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) {
+ for (Map.Entry<TableName, List<HRegionInfo>> tableRegionEntry :
+ tableToReopenRegionsMap.entrySet()) {
+ TableName tableName = tableRegionEntry.getKey();
+ List<HRegionInfo> hRegionInfos = tableRegionEntry.getValue();
+ try {
+ LOG.warn("Reopening regions due to high storeFileRefCount. " +
+ "TableName: {} , noOfRegions: {}", tableName, hRegionInfos.size());
+ hMaster.reopenRegions(tableName, hRegionInfos, NONCE_GENERATOR.getNonceGroup(),
+ NONCE_GENERATOR.newNonce());
+ } catch (IOException e) {
+ List<String> regionNames = new ArrayList<>();
+ for (HRegionInfo hRegionInfo : hRegionInfos) {
+ regionNames.add(hRegionInfo.getRegionNameAsString());
+ }
+ LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG,
+ tableName, regionNames, e);
+ }
+ }
+ }
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " +
+ "Provide threshold value > 0 for {} to enable it.",
+ HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
+ }
+ }
+ } catch (Exception e) {
+ LOG.error("Error while reopening regions based on storeRefCount threshold", e);
+ }
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(
+ "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount...");
+ }
+ }
+
+ private Map<TableName, List<HRegionInfo>> getTableToRegionsByRefCount(
+ final Map<ServerName, ServerLoad> serverMetricsMap) {
+
+ final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap = new HashMap<>();
+ for (ServerLoad serverLoad : serverMetricsMap.values()) {
+ Map<byte[], RegionLoad> regionLoadsMap = serverLoad.getRegionsLoad();
+ for (RegionLoad regionLoad : regionLoadsMap.values()) {
+ // For each region, each store file can have different ref counts
+ // We need to find maximum of all such ref counts and if that max count
+ // is beyond a threshold value, we should reopen the region.
+ // Here, we take max ref count of all store files and not the cumulative
+ // count of all store files
+ final int maxStoreFileRefCount = regionLoad.getMaxStoreFileRefCount();
+
+ if (maxStoreFileRefCount > storeFileRefCountThreshold) {
+ final byte[] regionName = regionLoad.getName();
+ prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
+ maxStoreFileRefCount);
+ }
+ }
+ }
+ return tableToReopenRegionsMap;
+
+ }
+
+ private void prepareTableToReopenRegionsMap(
+ final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap,
+ final byte[] regionName, final int regionStoreRefCount) {
+
+ final HRegionInfo hRegionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
+ final TableName tableName = hRegionInfo.getTable();
+ if (TableName.META_TABLE_NAME.equals(tableName)) {
+ // Do not reopen regions of meta table even if it has
+ // high store file reference count
+ return;
+ }
+ LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
+ hRegionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
+ if (!tableToReopenRegionsMap.containsKey(tableName)) {
+ tableToReopenRegionsMap.put(tableName, new ArrayList<HRegionInfo>());
+ }
+ tableToReopenRegionsMap.get(tableName).add(hRegionInfo);
+
+ }
+
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index cfd57a6..2ba4429 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -1578,6 +1578,8 @@ public class HRegionServer extends HasThread implements
byte[] name = r.getRegionInfo().getRegionName();
int stores = 0;
int storefiles = 0;
+ int storeRefCount = 0;
+ int maxStoreFileRefCount = 0;
int storeUncompressedSizeMB = 0;
int storefileSizeMB = 0;
int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024);
@@ -1591,6 +1593,13 @@ public class HRegionServer extends HasThread implements
stores += storeList.size();
for (Store store : storeList) {
storefiles += store.getStorefilesCount();
+ if (store instanceof HStore) {
+ HStore hStore = (HStore) store;
+ int currentStoreRefCount = hStore.getStoreRefCount();
+ storeRefCount += currentStoreRefCount;
+ int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
+ maxStoreFileRefCount = Math.max(maxStoreFileRefCount, currentMaxStoreFileRefCount);
+ }
storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed() / 1024 / 1024);
storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
@@ -1617,6 +1626,8 @@ public class HRegionServer extends HasThread implements
regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
.setStores(stores)
.setStorefiles(storefiles)
+ .setStoreRefCount(storeRefCount)
+ .setMaxStoreFileRefCount(maxStoreFileRefCount)
.setStoreUncompressedSizeMB(storeUncompressedSizeMB)
.setStorefileSizeMB(storefileSizeMB)
.setMemstoreSizeMB(memstoreSizeMB)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 41e8918..9c0897f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -2868,4 +2868,22 @@ public class HStore implements Store {
}
return refCount;
}
+
+ /**
+ * @return get maximum ref count of storeFile among all HStore Files
+ * for the HStore
+ */
+ public int getMaxStoreFileRefCount() {
+ int maxStoreFileRefCount = 0;
+ for (StoreFile store : storeEngine.getStoreFileManager().getStorefiles()) {
+ if (store.isHFile()) {
+ StoreFile.Reader storeReader = store.getReader();
+ if (storeReader != null) {
+ maxStoreFileRefCount = Math.max(maxStoreFileRefCount, storeReader.getRefCount());
+ }
+ }
+ }
+ return maxStoreFileRefCount;
+ }
+
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
index 02ab26b..9027357 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
@@ -55,6 +55,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
private long numReferenceFiles;
private long maxFlushQueueSize;
private long maxCompactionQueueSize;
+ private int maxStoreFileRefCount;
private ScheduledFuture<?> regionMetricsUpdateTask;
@@ -124,6 +125,11 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
}
@Override
+ public int getMaxStoreFileRefCount() {
+ return maxStoreFileRefCount;
+ }
+
+ @Override
public long getReadRequestCount() {
return this.region.getReadRequestsCount();
}
@@ -216,6 +222,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
public void run() {
long tempNumStoreFiles = 0;
int tempStoreRefCount = 0;
+ int tempMaxStoreFileRefCount = 0;
long tempMemstoreSize = 0;
long tempStoreFileSize = 0;
long tempMaxStoreFileAge = 0;
@@ -247,13 +254,18 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
if (store instanceof HStore) {
// Cast here to avoid interface changes to Store
- tempStoreRefCount += ((HStore)store).getStoreRefCount();
+ HStore hStore = ((HStore) store);
+ tempStoreRefCount += hStore.getStoreRefCount();
+ int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
+ tempMaxStoreFileRefCount = Math.max(tempMaxStoreFileRefCount,
+ currentMaxStoreFileRefCount);
}
}
}
numStoreFiles = tempNumStoreFiles;
storeRefCount = tempStoreRefCount;
+ maxStoreFileRefCount = tempMaxStoreFileRefCount;
memstoreSize = tempMemstoreSize;
storeFileSize = tempStoreFileSize;
maxStoreFileAge = tempMaxStoreFileAge;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
index 82ce53f..bc53162 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
@@ -66,6 +66,11 @@ public class MetricsRegionWrapperStub implements MetricsRegionWrapper {
}
@Override
+ public int getMaxStoreFileRefCount() {
+ return 0;
+ }
+
+ @Override
public long getMemstoreSize() {
return 103;
}
diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc
index caff490..aea457a 100644
--- a/src/main/asciidoc/_chapters/hbase-default.adoc
+++ b/src/main/asciidoc/_chapters/hbase-default.adoc
@@ -2208,3 +2208,43 @@ The percent of region server RPC threads failed to abort RS.
+
.Default
`0`
+
+
+[[hbase.master.regions.recovery.check.interval]]
+*`hbase.master.regions.recovery.check.interval`*::
++
+.Description
+
+ Regions Recovery Chore interval in milliseconds.
+ This chore keeps running at this interval to
+ find all regions with configurable max store file ref count
+ and reopens them.
+
++
+.Default
+`1200000`
+
+
+[[hbase.regions.recovery.store.file.ref.count]]
+*`hbase.regions.recovery.store.file.ref.count`*::
++
+.Description
+
+ Very large ref count on a file indicates
+ that it is a ref leak on that object. Such files
+ can not be removed even after it is invalidated
+ via compaction. Only way to recover in such
+ scenario is to reopen the region which can
+ release all resources, like the refcount, leases, etc.
+ This config represents Store files Ref Count threshold
+ value considered for reopening regions.
+ Any region with store files ref count > this value
+ would be eligible for reopening by master.
+ Default value -1 indicates this feature is turned off.
+ Only positive integer value should be provided to enable
+ this feature.
+
++
+.Default
+`-1`
+
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index d62816f..97ca275 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -2365,3 +2365,26 @@ void rename(Admin admin, String oldTableName, String newTableName) {
admin.deleteTable(oldTableName);
}
----
+
+
+
+[[auto_reopen_regions]]
+== Auto Region Reopen
+
+We can leak store reader references if a coprocessor or core function somehow
+opens a scanner, or wraps one, and then does not take care to call close on the
+scanner or the wrapped instance. Leaked store files can not be removed even
+after it is invalidated via compaction.
+A reasonable mitigation for a reader reference
+leak would be a fast reopen of the region on the same server.
+This will release all resources, like the refcount, leases, etc.
+The clients should gracefully ride over this like any other region in
+transition.
+By default this auto reopen of region feature would be disabled.
+To enabled it, please provide high ref count value for config
+`hbase.regions.recovery.store.file.ref.count`.
+
+Please refer to config descriptions for
+`hbase.master.regions.recovery.check.interval` and
+`hbase.regions.recovery.store.file.ref.count`.
+