You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2019/10/29 20:15:48 UTC

[hbase] branch branch-1 updated: HBASE-23213 Backport HBASE-22460 to branch-1 (#761)

This is an automated email from the ASF dual-hosted git repository.

apurtell pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-1 by this push:
     new 5e414f2  HBASE-23213 Backport HBASE-22460 to branch-1 (#761)
5e414f2 is described below

commit 5e414f2d4690b2a474fbc50599a8a64a782571ab
Author: Viraj Jasani <vi...@gmail.com>
AuthorDate: Wed Oct 30 01:45:39 2019 +0530

    HBASE-23213 Backport HBASE-22460 to branch-1 (#761)
    
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../org/apache/hadoop/hbase/ClusterStatus.java     |   7 +
 .../java/org/apache/hadoop/hbase/RegionLoad.java   |   9 +
 .../java/org/apache/hadoop/hbase/HConstants.java   |   7 +
 hbase-common/src/main/resources/hbase-default.xml  |  29 +++
 .../regionserver/MetricsRegionServerSource.java    |   1 +
 .../hbase/regionserver/MetricsRegionWrapper.java   |   6 +
 .../regionserver/MetricsRegionSourceImpl.java      |   4 +
 .../regionserver/TestMetricsRegionSourceImpl.java  |   5 +
 .../protobuf/generated/ClusterStatusProtos.java    | 206 +++++++++++++++++----
 .../src/main/protobuf/ClusterStatus.proto          |   6 +
 .../hadoop/hbase/master/AssignmentManager.java     |  10 +
 .../org/apache/hadoop/hbase/master/HMaster.java    |  58 ++++++
 .../hadoop/hbase/master/RegionsRecoveryChore.java  | 183 ++++++++++++++++++
 .../hadoop/hbase/regionserver/HRegionServer.java   |  11 ++
 .../apache/hadoop/hbase/regionserver/HStore.java   |  18 ++
 .../regionserver/MetricsRegionWrapperImpl.java     |  14 +-
 .../regionserver/MetricsRegionWrapperStub.java     |   5 +
 src/main/asciidoc/_chapters/hbase-default.adoc     |  40 ++++
 src/main/asciidoc/_chapters/ops_mgt.adoc           |  23 +++
 19 files changed, 607 insertions(+), 35 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
index e2f10d3..ed859ee 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
@@ -166,6 +166,13 @@ public class ClusterStatus extends VersionedWritable {
   }
 
   /**
+   * @return map of the names of region servers on the live list with associated ServerLoad
+   */
+  public Map<ServerName, ServerLoad> getLiveServersLoad() {
+    return Collections.unmodifiableMap(liveServers);
+  }
+
+  /**
    * @return the average cluster load
    */
   public double getAverageLoad() {
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
index 158105b..24c33a6 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/RegionLoad.java
@@ -195,6 +195,14 @@ public class RegionLoad {
   }
 
   /**
+   * @return the max reference count for any store file among all stores files
+   *   of this region
+   */
+  public int getMaxStoreFileRefCount() {
+    return regionLoadPB.getMaxStoreFileRefCount();
+  }
+
+  /**
    * @see java.lang.Object#toString()
    */
   @Override
@@ -204,6 +212,7 @@ public class RegionLoad {
     sb = Strings.appendKeyValue(sb, "numberOfStorefiles",
         this.getStorefiles());
     sb = Strings.appendKeyValue(sb, "storeRefCount", this.getStoreRefCount());
+    sb = Strings.appendKeyValue(sb, "maxStoreFileRefCount", this.getMaxStoreFileRefCount());
     sb = Strings.appendKeyValue(sb, "storefileUncompressedSizeMB",
       this.getStoreUncompressedSizeMB());
     sb = Strings.appendKeyValue(sb, "lastMajorCompactionTimestamp",
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 6e66f57..59d4fb3 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1341,6 +1341,13 @@ public final class HConstants {
   // User defined Default TTL config key
   public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl";
 
+  // Regions Recovery based on high storeFileRefCount threshold value
+  public static final String STORE_FILE_REF_COUNT_THRESHOLD =
+    "hbase.regions.recovery.store.file.ref.count";
+
+  // default -1 indicates there is no threshold on high storeRefCount
+  public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
+
   /**
    * Configurations for master executor services.
    */
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 3f409d7..169c65c 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -1643,4 +1643,33 @@ possible configurations would overwhelm and obscure the important.
       automatically deleted until it is manually deleted
     </description>
   </property>
+  <property>
+    <name>hbase.master.regions.recovery.check.interval</name>
+    <value>1200000</value>
+    <description>
+      Regions Recovery Chore interval in milliseconds.
+      This chore keeps running at this interval to
+      find all regions with configurable max store file ref count
+      and reopens them.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regions.recovery.store.file.ref.count</name>
+    <value>-1</value>
+    <description>
+      Very large ref count on a file indicates
+      that it is a ref leak on that object. Such files
+      can not be removed even after it is invalidated
+      via compaction. Only way to recover in such
+      scenario is to reopen the region which can
+      release all resources, like the refcount, leases, etc.
+      This config represents Store files Ref Count threshold
+      value considered for reopening regions.
+      Any region with store files ref count > this value
+      would be eligible for reopening by master.
+      Default value -1 indicates this feature is turned off.
+      Only positive integer value should be provided to enable
+      this feature.
+    </description>
+  </property>
 </configuration>
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index 635ba70..a5564ce 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -231,6 +231,7 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
   String STOREFILE_COUNT_DESC = "Number of Store Files";
   String STORE_REF_COUNT = "storeRefCount";
   String STORE_REF_COUNT_DESC = "Store reference count";
+  String MAX_STORE_FILE_REF_COUNT = "maxStoreFileRefCount";
   String MEMSTORE_SIZE = "memStoreSize";
   String MEMSTORE_SIZE_DESC = "Size of the memstore";
   String STOREFILE_SIZE = "storeFileSize";
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
index b519e57..b92ad55 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java
@@ -147,4 +147,10 @@ public interface MetricsRegionWrapper {
    * @return the number of references active on the store
    */
   long getStoreRefCount();
+
+  /**
+   * @return the max reference count for any store file among all stores files
+   *   of this region
+   */
+  int getMaxStoreFileRefCount();
 }
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
index 7da7686..578ff0d 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
@@ -218,6 +218,10 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
               MetricsRegionServerSource.STORE_REF_COUNT),
           this.regionWrapper.getStoreRefCount());
       mrb.addGauge(Interns.info(
+        regionNamePrefix + MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT,
+        MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT),
+        this.regionWrapper.getMaxStoreFileRefCount());
+      mrb.addGauge(Interns.info(
               regionNamePrefix + MetricsRegionServerSource.MEMSTORE_SIZE,
               MetricsRegionServerSource.MEMSTORE_SIZE_DESC),
           this.regionWrapper.getMemstoreSize());
diff --git a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
index 043ff3d..5a4caa7 100644
--- a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
+++ b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java
@@ -97,6 +97,11 @@ public class TestMetricsRegionSourceImpl {
     }
 
     @Override
+    public int getMaxStoreFileRefCount() {
+      return 0;
+    }
+
+    @Override
     public long getMemstoreSize() {
       return 0;
     }
diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
index 735ef98..aac9ab8 100644
--- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
+++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
@@ -3631,6 +3631,28 @@ public final class ClusterStatusProtos {
      * </pre>
      */
     int getStoreRefCount();
+
+    // optional int32 max_store_file_ref_count = 22 [default = 0];
+    /**
+     * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+     *
+     * <pre>
+     **
+     *  The max number of references active on single store file among all store files
+     *  that belong to given region
+     * </pre>
+     */
+    boolean hasMaxStoreFileRefCount();
+    /**
+     * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+     *
+     * <pre>
+     **
+     *  The max number of references active on single store file among all store files
+     *  that belong to given region
+     * </pre>
+     */
+    int getMaxStoreFileRefCount();
   }
   /**
    * Protobuf type {@code hbase.pb.RegionLoad}
@@ -3789,6 +3811,11 @@ public final class ClusterStatusProtos {
               storeRefCount_ = input.readInt32();
               break;
             }
+            case 176: {
+              bitField0_ |= 0x00040000;
+              maxStoreFileRefCount_ = input.readInt32();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -4330,6 +4357,34 @@ public final class ClusterStatusProtos {
       return storeRefCount_;
     }
 
+    // optional int32 max_store_file_ref_count = 22 [default = 0];
+    public static final int MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER = 22;
+    private int maxStoreFileRefCount_;
+    /**
+     * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+     *
+     * <pre>
+     **
+     *  The max number of references active on single store file among all store files
+     *  that belong to given region
+     * </pre>
+     */
+    public boolean hasMaxStoreFileRefCount() {
+      return ((bitField0_ & 0x00040000) == 0x00040000);
+    }
+    /**
+     * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+     *
+     * <pre>
+     **
+     *  The max number of references active on single store file among all store files
+     *  that belong to given region
+     * </pre>
+     */
+    public int getMaxStoreFileRefCount() {
+      return maxStoreFileRefCount_;
+    }
+
     private void initFields() {
       regionSpecifier_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.getDefaultInstance();
       stores_ = 0;
@@ -4350,6 +4405,7 @@ public final class ClusterStatusProtos {
       lastMajorCompactionTs_ = 0L;
       storeCompleteSequenceId_ = java.util.Collections.emptyList();
       storeRefCount_ = 0;
+      maxStoreFileRefCount_ = 0;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -4434,6 +4490,9 @@ public final class ClusterStatusProtos {
       if (((bitField0_ & 0x00020000) == 0x00020000)) {
         output.writeInt32(21, storeRefCount_);
       }
+      if (((bitField0_ & 0x00040000) == 0x00040000)) {
+        output.writeInt32(22, maxStoreFileRefCount_);
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -4519,6 +4578,10 @@ public final class ClusterStatusProtos {
         size += com.google.protobuf.CodedOutputStream
           .computeInt32Size(21, storeRefCount_);
       }
+      if (((bitField0_ & 0x00040000) == 0x00040000)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(22, maxStoreFileRefCount_);
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -4633,6 +4696,11 @@ public final class ClusterStatusProtos {
         result = result && (getStoreRefCount()
             == other.getStoreRefCount());
       }
+      result = result && (hasMaxStoreFileRefCount() == other.hasMaxStoreFileRefCount());
+      if (hasMaxStoreFileRefCount()) {
+        result = result && (getMaxStoreFileRefCount()
+            == other.getMaxStoreFileRefCount());
+      }
       result = result &&
           getUnknownFields().equals(other.getUnknownFields());
       return result;
@@ -4723,6 +4791,10 @@ public final class ClusterStatusProtos {
         hash = (37 * hash) + STORE_REF_COUNT_FIELD_NUMBER;
         hash = (53 * hash) + getStoreRefCount();
       }
+      if (hasMaxStoreFileRefCount()) {
+        hash = (37 * hash) + MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER;
+        hash = (53 * hash) + getMaxStoreFileRefCount();
+      }
       hash = (29 * hash) + getUnknownFields().hashCode();
       memoizedHashCode = hash;
       return hash;
@@ -4880,6 +4952,8 @@ public final class ClusterStatusProtos {
         }
         storeRefCount_ = 0;
         bitField0_ = (bitField0_ & ~0x00040000);
+        maxStoreFileRefCount_ = 0;
+        bitField0_ = (bitField0_ & ~0x00080000);
         return this;
       }
 
@@ -4993,6 +5067,10 @@ public final class ClusterStatusProtos {
           to_bitField0_ |= 0x00020000;
         }
         result.storeRefCount_ = storeRefCount_;
+        if (((from_bitField0_ & 0x00080000) == 0x00080000)) {
+          to_bitField0_ |= 0x00040000;
+        }
+        result.maxStoreFileRefCount_ = maxStoreFileRefCount_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -5089,6 +5167,9 @@ public final class ClusterStatusProtos {
         if (other.hasStoreRefCount()) {
           setStoreRefCount(other.getStoreRefCount());
         }
+        if (other.hasMaxStoreFileRefCount()) {
+          setMaxStoreFileRefCount(other.getMaxStoreFileRefCount());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -6428,6 +6509,63 @@ public final class ClusterStatusProtos {
         return this;
       }
 
+      // optional int32 max_store_file_ref_count = 22 [default = 0];
+      private int maxStoreFileRefCount_ ;
+      /**
+       * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+       *
+       * <pre>
+       **
+       *  The max number of references active on single store file among all store files
+       *  that belong to given region
+       * </pre>
+       */
+      public boolean hasMaxStoreFileRefCount() {
+        return ((bitField0_ & 0x00080000) == 0x00080000);
+      }
+      /**
+       * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+       *
+       * <pre>
+       **
+       *  The max number of references active on single store file among all store files
+       *  that belong to given region
+       * </pre>
+       */
+      public int getMaxStoreFileRefCount() {
+        return maxStoreFileRefCount_;
+      }
+      /**
+       * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+       *
+       * <pre>
+       **
+       *  The max number of references active on single store file among all store files
+       *  that belong to given region
+       * </pre>
+       */
+      public Builder setMaxStoreFileRefCount(int value) {
+        bitField0_ |= 0x00080000;
+        maxStoreFileRefCount_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
+       *
+       * <pre>
+       **
+       *  The max number of references active on single store file among all store files
+       *  that belong to given region
+       * </pre>
+       */
+      public Builder clearMaxStoreFileRefCount() {
+        bitField0_ = (bitField0_ & ~0x00080000);
+        maxStoreFileRefCount_ = 0;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:hbase.pb.RegionLoad)
     }
 
@@ -14845,7 +14983,7 @@ public final class ClusterStatusProtos {
       "e\030\001 \002(\014\022\023\n\013sequence_id\030\002 \002(\004\"p\n\026RegionSt" +
       "oreSequenceIds\022 \n\030last_flushed_sequence_" +
       "id\030\001 \002(\004\0224\n\021store_sequence_id\030\002 \003(\0132\031.hb" +
-      "ase.pb.StoreSequenceId\"\360\004\n\nRegionLoad\0223\n" +
+      "ase.pb.StoreSequenceId\"\225\005\n\nRegionLoad\0223\n" +
       "\020region_specifier\030\001 \002(\0132\031.hbase.pb.Regio" +
       "nSpecifier\022\016\n\006stores\030\002 \001(\r\022\022\n\nstorefiles",
       "\030\003 \001(\r\022\"\n\032store_uncompressed_size_MB\030\004 \001" +
@@ -14861,38 +14999,38 @@ public final class ClusterStatusProtos {
       "\002\022#\n\030last_major_compaction_ts\030\021 \001(\004:\0010\022=" +
       "\n\032store_complete_sequence_id\030\022 \003(\0132\031.hba" +
       "se.pb.StoreSequenceId\022\032\n\017store_ref_count" +
-      "\030\025 \001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageO" +
-      "fLastAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLast" +
-      "AppliedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSourc" +
-      "e\022\016\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030" +
-      "\002 \002(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeSta" +
-      "mpOfLastShippedOp\030\004 \002(\004\022\026\n\016replicationLa" +
-      "g\030\005 \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_requ",
-      "ests\030\001 \001(\004\022 \n\030total_number_of_requests\030\002" +
-      " \001(\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_M" +
-      "B\030\004 \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb" +
-      ".RegionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbas" +
-      "e.pb.Coprocessor\022\031\n\021report_start_time\030\007 " +
-      "\001(\004\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_ser" +
-      "ver_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037" +
-      ".hbase.pb.ReplicationLoadSource\0223\n\014replL" +
-      "oadSink\030\013 \001(\0132\035.hbase.pb.ReplicationLoad" +
-      "Sink\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024",
-      ".hbase.pb.ServerName\022)\n\013server_load\030\002 \002(" +
-      "\0132\024.hbase.pb.ServerLoad\"\250\003\n\rClusterStatu" +
-      "s\0228\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBas" +
-      "eVersionFileContent\022.\n\014live_servers\030\002 \003(" +
-      "\0132\030.hbase.pb.LiveServerInfo\022*\n\014dead_serv" +
-      "ers\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regio" +
-      "ns_in_transition\030\004 \003(\0132\034.hbase.pb.Region" +
-      "InTransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase" +
-      ".pb.ClusterId\0222\n\023master_coprocessors\030\006 \003" +
-      "(\0132\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(",
-      "\0132\024.hbase.pb.ServerName\022,\n\016backup_master" +
-      "s\030\010 \003(\0132\024.hbase.pb.ServerName\022\023\n\013balance" +
-      "r_on\030\t \001(\010BF\n*org.apache.hadoop.hbase.pr" +
-      "otobuf.generatedB\023ClusterStatusProtosH\001\240" +
-      "\001\001"
+      "\030\025 \001(\005:\0010\022#\n\030max_store_file_ref_count\030\026 " +
+      "\001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageOfLa" +
+      "stAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLastApp" +
+      "liedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSource\022\016" +
+      "\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030\002 \002" +
+      "(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeStampO" +
+      "fLastShippedOp\030\004 \002(\004\022\026\n\016replicationLag\030\005",
+      " \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_request" +
+      "s\030\001 \001(\004\022 \n\030total_number_of_requests\030\002 \001(" +
+      "\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_MB\030\004" +
+      " \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb.Re" +
+      "gionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbase.p" +
+      "b.Coprocessor\022\031\n\021report_start_time\030\007 \001(\004" +
+      "\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_server" +
+      "_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037.hb" +
+      "ase.pb.ReplicationLoadSource\0223\n\014replLoad" +
+      "Sink\030\013 \001(\0132\035.hbase.pb.ReplicationLoadSin",
+      "k\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024.hb" +
+      "ase.pb.ServerName\022)\n\013server_load\030\002 \002(\0132\024" +
+      ".hbase.pb.ServerLoad\"\250\003\n\rClusterStatus\0228" +
+      "\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBaseVe" +
+      "rsionFileContent\022.\n\014live_servers\030\002 \003(\0132\030" +
+      ".hbase.pb.LiveServerInfo\022*\n\014dead_servers" +
+      "\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regions_" +
+      "in_transition\030\004 \003(\0132\034.hbase.pb.RegionInT" +
+      "ransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase.pb" +
+      ".ClusterId\0222\n\023master_coprocessors\030\006 \003(\0132",
+      "\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(\0132\024" +
+      ".hbase.pb.ServerName\022,\n\016backup_masters\030\010" +
+      " \003(\0132\024.hbase.pb.ServerName\022\023\n\013balancer_o" +
+      "n\030\t \001(\010BF\n*org.apache.hadoop.hbase.proto" +
+      "buf.generatedB\023ClusterStatusProtosH\001\240\001\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -14928,7 +15066,7 @@ public final class ClusterStatusProtos {
           internal_static_hbase_pb_RegionLoad_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_hbase_pb_RegionLoad_descriptor,
-              new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", });
+              new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", "MaxStoreFileRefCount", });
           internal_static_hbase_pb_ReplicationLoadSink_descriptor =
             getDescriptor().getMessageTypes().get(5);
           internal_static_hbase_pb_ReplicationLoadSink_fieldAccessorTable = new
diff --git a/hbase-protocol/src/main/protobuf/ClusterStatus.proto b/hbase-protocol/src/main/protobuf/ClusterStatus.proto
index 0762d33..8e4f5fe 100644
--- a/hbase-protocol/src/main/protobuf/ClusterStatus.proto
+++ b/hbase-protocol/src/main/protobuf/ClusterStatus.proto
@@ -145,6 +145,12 @@ message RegionLoad {
 
   /** the number of references active on the store */
   optional int32 store_ref_count = 21 [ default = 0 ];
+
+  /**
+   *  The max number of references active on single store file among all store files
+   *  that belong to given region
+   */
+  optional int32 max_store_file_ref_count = 22 [default = 0];
 }
 
 /* Server-level protobufs */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index ef49f99..5386f6e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -843,6 +843,16 @@ public class AssignmentManager extends ZooKeeperListener {
   }
 
   /**
+   * Retrieve HRegionInfo for given region name
+   *
+   * @param regionName Region name in byte[]
+   * @return HRegionInfo
+   */
+  public HRegionInfo getRegionInfo(final byte[] regionName) {
+    return regionStates.getRegionInfo(regionName);
+  }
+
+  /**
    * This call is invoked only (1) master assign meta;
    * (2) during failover mode startup, zk assignment node processing.
    * The locker is set in the caller. It returns true if the region
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index d560a37..c6bbf24 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -117,6 +117,7 @@ import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
 import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
 import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
 import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
+import org.apache.hadoop.hbase.master.procedure.MasterDDLOperationHelper;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.ProcedureEvent;
@@ -302,6 +303,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
   // manager of assignment nodes in zookeeper
   AssignmentManager assignmentManager;
 
+  private RegionsRecoveryChore regionsRecoveryChore = null;
+
   // buffer for "fatal error" notices from region servers
   // in the cluster. This is only used for assisting
   // operations/debugging.
@@ -1261,6 +1264,20 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
       getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
     getChoreService().scheduleChore(hfileCleaner);
 
+    // Regions Reopen based on very high storeFileRefCount is considered enabled
+    // only if hbase.regions.recovery.store.file.ref.count has value > 0
+    final int maxStoreFileRefCount = conf.getInt(
+      HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+    if (maxStoreFileRefCount > 0) {
+      this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
+      getChoreService().scheduleChore(this.regionsRecoveryChore);
+    } else {
+      LOG.info("Reopening regions with very high storeFileRefCount is disabled. "
+        + "Provide threshold value > 0 for " + HConstants.STORE_FILE_REF_COUNT_THRESHOLD
+        + " to enable it.\"");
+    }
+
     final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
         .isSnapshotCleanupEnabled();
     this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
@@ -1409,6 +1426,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
       choreService.cancelChore(this.replicationZKLockCleanerChore);
       choreService.cancelChore(this.replicationZKNodeCleanerChore);
       choreService.cancelChore(this.snapshotCleanerChore);
+      choreService.cancelChore(this.regionsRecoveryChore);
     }
   }
 
@@ -3263,6 +3281,46 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     }
   }
 
+  /**
+   * Reopen regions provided in the argument
+   *
+   * @param tableName The current table name
+   * @param hRegionInfos List of HRegionInfo of the regions to reopen
+   * @param nonceGroup Identifier for the source of the request, a client or process
+   * @param nonce A unique identifier for this operation from the client or process identified by
+   *   <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
+   * @return procedure Id
+   * @throws IOException if reopening region fails while running procedure
+   */
+  long reopenRegions(final TableName tableName, final List<HRegionInfo> hRegionInfos,
+      final long nonceGroup, final long nonce)
+      throws IOException {
+
+    return MasterProcedureUtil
+      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
+
+        @Override
+        protected void run() throws IOException {
+          boolean areAllRegionsReopened = MasterDDLOperationHelper.reOpenAllRegions(
+            procedureExecutor.getEnvironment(), tableName, hRegionInfos);
+          if (areAllRegionsReopened) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("All required regions reopened for table: " + tableName);
+            }
+          } else {
+            LOG.warn("Error while reopening regions of table: " + tableName);
+          }
+        }
+
+        @Override
+        protected String getDescription() {
+          return "ReopenTableRegionsProcedure";
+        }
+
+      });
+
+  }
+
   @Override
   public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
     return getClusterStatusWithoutCoprocessor().getLastMajorCompactionTsForTable(table);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
new file mode 100644
index 0000000..78d4b78
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.collections.MapUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.ServerLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * This chore, every time it runs, will try to recover regions with high store ref count
+ * by reopening them
+ */
+@InterfaceAudience.Private
+public class RegionsRecoveryChore extends ScheduledChore {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class);
+
+  private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
+
+  private static final String REGIONS_RECOVERY_INTERVAL =
+    "hbase.master.regions.recovery.check.interval";
+
+  private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
+
+  private static final String ERROR_REOPEN_REIONS_MSG =
+    "Error reopening regions with high storeRefCount. ";
+
+  private final HMaster hMaster;
+  private final int storeFileRefCountThreshold;
+
+  private static final PerClientRandomNonceGenerator NONCE_GENERATOR =
+    new PerClientRandomNonceGenerator();
+
+  /**
+   * Construct RegionsRecoveryChore with provided params
+   *
+   * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup
+   * @param configuration The configuration params to be used
+   * @param hMaster HMaster instance to initiate RegionTableRegions
+   */
+  RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
+      final HMaster hMaster) {
+
+    super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
+      DEFAULT_REGIONS_RECOVERY_INTERVAL));
+    this.hMaster = hMaster;
+    this.storeFileRefCountThreshold = configuration.getInt(
+      HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+
+  }
+
+  @Override
+  protected void chore() {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(
+        "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount...");
+    }
+    try {
+      // only if storeFileRefCountThreshold > 0, consider the feature turned on
+      if (storeFileRefCountThreshold > 0) {
+        final ClusterStatus clusterStatus = hMaster.getClusterStatus();
+        final Map<ServerName, ServerLoad> serverMetricsMap =
+          clusterStatus.getLiveServersLoad();
+        final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap =
+          getTableToRegionsByRefCount(serverMetricsMap);
+        if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) {
+          for (Map.Entry<TableName, List<HRegionInfo>> tableRegionEntry :
+              tableToReopenRegionsMap.entrySet()) {
+            TableName tableName = tableRegionEntry.getKey();
+            List<HRegionInfo> hRegionInfos = tableRegionEntry.getValue();
+            try {
+              LOG.warn("Reopening regions due to high storeFileRefCount. " +
+                "TableName: {} , noOfRegions: {}", tableName, hRegionInfos.size());
+              hMaster.reopenRegions(tableName, hRegionInfos, NONCE_GENERATOR.getNonceGroup(),
+                NONCE_GENERATOR.newNonce());
+            } catch (IOException e) {
+              List<String> regionNames = new ArrayList<>();
+              for (HRegionInfo hRegionInfo : hRegionInfos) {
+                regionNames.add(hRegionInfo.getRegionNameAsString());
+              }
+              LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG,
+                tableName, regionNames, e);
+            }
+          }
+        }
+      } else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " +
+              "Provide threshold value > 0 for {} to enable it.",
+            HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
+        }
+      }
+    } catch (Exception e) {
+      LOG.error("Error while reopening regions based on storeRefCount threshold", e);
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(
+        "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount...");
+    }
+  }
+
+  private Map<TableName, List<HRegionInfo>> getTableToRegionsByRefCount(
+      final Map<ServerName, ServerLoad> serverMetricsMap) {
+
+    final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap = new HashMap<>();
+    for (ServerLoad serverLoad : serverMetricsMap.values()) {
+      Map<byte[], RegionLoad> regionLoadsMap = serverLoad.getRegionsLoad();
+      for (RegionLoad regionLoad : regionLoadsMap.values()) {
+        // For each region, each store file can have different ref counts
+        // We need to find maximum of all such ref counts and if that max count
+        // is beyond a threshold value, we should reopen the region.
+        // Here, we take max ref count of all store files and not the cumulative
+        // count of all store files
+        final int maxStoreFileRefCount = regionLoad.getMaxStoreFileRefCount();
+
+        if (maxStoreFileRefCount > storeFileRefCountThreshold) {
+          final byte[] regionName = regionLoad.getName();
+          prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
+            maxStoreFileRefCount);
+        }
+      }
+    }
+    return tableToReopenRegionsMap;
+
+  }
+
+  private void prepareTableToReopenRegionsMap(
+      final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap,
+      final byte[] regionName, final int regionStoreRefCount) {
+
+    final HRegionInfo hRegionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
+    final TableName tableName = hRegionInfo.getTable();
+    if (TableName.META_TABLE_NAME.equals(tableName)) {
+      // Do not reopen regions of meta table even if it has
+      // high store file reference count
+      return;
+    }
+    LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
+      hRegionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
+    if (!tableToReopenRegionsMap.containsKey(tableName)) {
+      tableToReopenRegionsMap.put(tableName, new ArrayList<HRegionInfo>());
+    }
+    tableToReopenRegionsMap.get(tableName).add(hRegionInfo);
+
+  }
+
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index cfd57a6..2ba4429 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -1578,6 +1578,8 @@ public class HRegionServer extends HasThread implements
     byte[] name = r.getRegionInfo().getRegionName();
     int stores = 0;
     int storefiles = 0;
+    int storeRefCount = 0;
+    int maxStoreFileRefCount = 0;
     int storeUncompressedSizeMB = 0;
     int storefileSizeMB = 0;
     int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024);
@@ -1591,6 +1593,13 @@ public class HRegionServer extends HasThread implements
     stores += storeList.size();
     for (Store store : storeList) {
       storefiles += store.getStorefilesCount();
+      if (store instanceof HStore) {
+        HStore hStore = (HStore) store;
+        int currentStoreRefCount = hStore.getStoreRefCount();
+        storeRefCount += currentStoreRefCount;
+        int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
+        maxStoreFileRefCount = Math.max(maxStoreFileRefCount, currentMaxStoreFileRefCount);
+      }
       storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed() / 1024 / 1024);
       storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
       storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
@@ -1617,6 +1626,8 @@ public class HRegionServer extends HasThread implements
     regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
       .setStores(stores)
       .setStorefiles(storefiles)
+      .setStoreRefCount(storeRefCount)
+      .setMaxStoreFileRefCount(maxStoreFileRefCount)
       .setStoreUncompressedSizeMB(storeUncompressedSizeMB)
       .setStorefileSizeMB(storefileSizeMB)
       .setMemstoreSizeMB(memstoreSizeMB)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 41e8918..9c0897f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -2868,4 +2868,22 @@ public class HStore implements Store {
     }
     return refCount;
   }
+
+  /**
+   * @return get maximum ref count of storeFile among all HStore Files
+   *   for the HStore
+   */
+  public int getMaxStoreFileRefCount() {
+    int maxStoreFileRefCount = 0;
+    for (StoreFile store : storeEngine.getStoreFileManager().getStorefiles()) {
+      if (store.isHFile()) {
+        StoreFile.Reader storeReader = store.getReader();
+        if (storeReader != null) {
+          maxStoreFileRefCount = Math.max(maxStoreFileRefCount, storeReader.getRefCount());
+        }
+      }
+    }
+    return maxStoreFileRefCount;
+  }
+
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
index 02ab26b..9027357 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java
@@ -55,6 +55,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
   private long numReferenceFiles;
   private long maxFlushQueueSize;
   private long maxCompactionQueueSize;
+  private int maxStoreFileRefCount;
 
   private ScheduledFuture<?> regionMetricsUpdateTask;
 
@@ -124,6 +125,11 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
   }
 
   @Override
+  public int getMaxStoreFileRefCount() {
+    return maxStoreFileRefCount;
+  }
+
+  @Override
   public long getReadRequestCount() {
     return this.region.getReadRequestsCount();
   }
@@ -216,6 +222,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
     public void run() {
       long tempNumStoreFiles = 0;
       int tempStoreRefCount = 0;
+      int tempMaxStoreFileRefCount = 0;
       long tempMemstoreSize = 0;
       long tempStoreFileSize = 0;
       long tempMaxStoreFileAge = 0;
@@ -247,13 +254,18 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
 
           if (store instanceof HStore) {
             // Cast here to avoid interface changes to Store
-            tempStoreRefCount += ((HStore)store).getStoreRefCount();
+            HStore hStore = ((HStore) store);
+            tempStoreRefCount += hStore.getStoreRefCount();
+            int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
+            tempMaxStoreFileRefCount = Math.max(tempMaxStoreFileRefCount,
+              currentMaxStoreFileRefCount);
           }
         }
       }
 
       numStoreFiles = tempNumStoreFiles;
       storeRefCount = tempStoreRefCount;
+      maxStoreFileRefCount = tempMaxStoreFileRefCount;
       memstoreSize = tempMemstoreSize;
       storeFileSize = tempStoreFileSize;
       maxStoreFileAge = tempMaxStoreFileAge;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
index 82ce53f..bc53162 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java
@@ -66,6 +66,11 @@ public class MetricsRegionWrapperStub implements MetricsRegionWrapper {
   }
 
   @Override
+  public int getMaxStoreFileRefCount() {
+    return 0;
+  }
+
+  @Override
   public long getMemstoreSize() {
     return 103;
   }
diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc
index caff490..aea457a 100644
--- a/src/main/asciidoc/_chapters/hbase-default.adoc
+++ b/src/main/asciidoc/_chapters/hbase-default.adoc
@@ -2208,3 +2208,43 @@ The percent of region server RPC threads failed to abort RS.
 +
 .Default
 `0`
+
+
+[[hbase.master.regions.recovery.check.interval]]
+*`hbase.master.regions.recovery.check.interval`*::
++
+.Description
+
+    Regions Recovery Chore interval in milliseconds.
+    This chore keeps running at this interval to
+    find all regions with configurable max store file ref count
+    and reopens them.
+
++
+.Default
+`1200000`
+
+
+[[hbase.regions.recovery.store.file.ref.count]]
+*`hbase.regions.recovery.store.file.ref.count`*::
++
+.Description
+
+      Very large ref count on a file indicates
+      that it is a ref leak on that object. Such files
+      can not be removed even after it is invalidated
+      via compaction. Only way to recover in such
+      scenario is to reopen the region which can
+      release all resources, like the refcount, leases, etc.
+      This config represents Store files Ref Count threshold
+      value considered for reopening regions.
+      Any region with store files ref count > this value
+      would be eligible for reopening by master.
+      Default value -1 indicates this feature is turned off.
+      Only positive integer value should be provided to enable
+      this feature.
+
++
+.Default
+`-1`
+
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index d62816f..97ca275 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -2365,3 +2365,26 @@ void rename(Admin admin, String oldTableName, String newTableName) {
   admin.deleteTable(oldTableName);
 }
 ----
+
+
+
+[[auto_reopen_regions]]
+== Auto Region Reopen
+
+We can leak store reader references if a coprocessor or core function somehow
+opens a scanner, or wraps one, and then does not take care to call close on the
+scanner or the wrapped instance. Leaked store files can not be removed even
+after it is invalidated via compaction.
+A reasonable mitigation for a reader reference
+leak would be a fast reopen of the region on the same server.
+This will release all resources, like the refcount, leases, etc.
+The clients should gracefully ride over this like any other region in
+transition.
+By default this auto reopen of region feature would be disabled.
+To enabled it, please provide high ref count value for config
+`hbase.regions.recovery.store.file.ref.count`.
+
+Please refer to config descriptions for
+`hbase.master.regions.recovery.check.interval` and
+`hbase.regions.recovery.store.file.ref.count`.
+