You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bb...@apache.org on 2022/07/12 02:00:10 UTC

[hbase] branch master updated: HBASE-27186 Report block cache size metrics separately for L1 and L2 (#4608)

This is an automated email from the ASF dual-hosted git repository.

bbeaudreault pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 779c4e23169 HBASE-27186 Report block cache size metrics separately for L1 and L2 (#4608)
779c4e23169 is described below

commit 779c4e231695f0a1056505887ca768d0d9acad84
Author: Bryan Beaudreault <bb...@hubspot.com>
AuthorDate: Mon Jul 11 22:00:04 2022 -0400

    HBASE-27186 Report block cache size metrics separately for L1 and L2 (#4608)
    
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../regionserver/MetricsRegionServerSource.java    | 19 +++++++++
 .../MetricsRegionServerSourceImpl.java             | 14 +++++++
 .../regionserver/MetricsRegionServerWrapper.java   | 45 +++++++++++++++++++++
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |  4 ++
 .../MetricsRegionServerWrapperImpl.java            | 47 ++++++++++++++++++++++
 .../MetricsRegionServerWrapperStub.java            | 45 +++++++++++++++++++++
 .../regionserver/TestMetricsRegionServer.java      |  9 +++++
 src/main/asciidoc/_chapters/architecture.adoc      |  4 ++
 8 files changed, 187 insertions(+)

diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index 7bffc57d0c0..a271b1d2187 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -316,6 +316,8 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
   String BLOCK_CACHE_FREE_DESC = "Size of the block cache that is not occupied.";
   String BLOCK_CACHE_COUNT = "blockCacheCount";
   String BLOCK_CACHE_COUNT_DESC = "Number of block in the block cache.";
+  String BLOCK_CACHE_DATA_BLOCK_COUNT = "blockCacheDataBlockCount";
+  String BLOCK_CACHE_DATA_BLOCK_COUNT_DESC = "Number of DATA block in the block cache.";
   String BLOCK_CACHE_SIZE = "blockCacheSize";
   String BLOCK_CACHE_SIZE_DESC = "Size of the block cache.";
   String BLOCK_CACHE_HIT_COUNT = "blockCacheHitCount";
@@ -365,6 +367,15 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
   String BLOCK_CACHE_GENERAL_BLOOM_META_HIT_COUNT = "blockCacheGeneralBloomMetaHitCount";
   String BLOCK_CACHE_DELETE_FAMILY_BLOOM_HIT_COUNT = "blockCacheDeleteFamilyBloomHitCount";
   String BLOCK_CACHE_TRAILER_HIT_COUNT = "blockCacheTrailerHitCount";
+  String L1_CACHE_FREE_SIZE = "l1CacheFreeSize";
+  String L1_CACHE_FREE_SIZE_DESC = "Amount of free bytes in the L1 cache";
+  String L1_CACHE_SIZE = "l1CacheSize";
+  String L1_CACHE_SIZE_DESC = "Size of the L1 cache in bytes";
+  String L1_CACHE_COUNT = "l1CacheCount";
+  String L1_CACHE_COUNT_DESC = "Count of blocks in the L1 cache";
+  String L1_CACHE_EVICTION_COUNT = "l1CacheEvictionCount";
+  String L1_CACHE_EVICTION_COUNT_DESC = "Count of blocks evicted from the L1 cache";
+
   String L1_CACHE_HIT_COUNT = "l1CacheHitCount";
   String L1_CACHE_HIT_COUNT_DESC = "L1 cache hit count.";
   String L1_CACHE_MISS_COUNT = "l1CacheMissCount";
@@ -373,6 +384,14 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
   String L1_CACHE_HIT_RATIO_DESC = "L1 cache hit ratio.";
   String L1_CACHE_MISS_RATIO = "l1CacheMissRatio";
   String L1_CACHE_MISS_RATIO_DESC = "L1 cache miss ratio.";
+  String L2_CACHE_FREE_SIZE = "l2CacheFreeSize";
+  String L2_CACHE_FREE_SIZE_DESC = "Amount of free bytes in the L2 cache";
+  String L2_CACHE_SIZE = "l2CacheSize";
+  String L2_CACHE_SIZE_DESC = "Size of the L2 cache in bytes";
+  String L2_CACHE_COUNT = "l2CacheCount";
+  String L2_CACHE_COUNT_DESC = "Count of blocks in the L2 cache";
+  String L2_CACHE_EVICTION_COUNT = "l2CacheEvictionCount";
+  String L2_CACHE_EVICTION_COUNT_DESC = "Count of blocks evicted from the L2 cache";
   String L2_CACHE_HIT_COUNT = "l2CacheHitCount";
   String L2_CACHE_HIT_COUNT_DESC = "L2 cache hit count.";
   String L2_CACHE_MISS_COUNT = "l2CacheMissCount";
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
index f8cebd2ec60..d264649c381 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
@@ -532,11 +532,19 @@ public class MetricsRegionServerSourceImpl extends BaseSourceImpl
         rsWrap.getBlockCacheFreeSize())
       .addGauge(Interns.info(BLOCK_CACHE_COUNT, BLOCK_CACHE_COUNT_DESC),
         rsWrap.getBlockCacheCount())
+      .addGauge(Interns.info(BLOCK_CACHE_DATA_BLOCK_COUNT, BLOCK_CACHE_DATA_BLOCK_COUNT_DESC),
+        rsWrap.getBlockCacheDataBlockCount())
       .addGauge(Interns.info(BLOCK_CACHE_SIZE, BLOCK_CACHE_SIZE_DESC), rsWrap.getBlockCacheSize())
       .addGauge(Interns.info(BLOCK_CACHE_HIT_PERCENT, BLOCK_CACHE_HIT_PERCENT_DESC),
         rsWrap.getBlockCacheHitPercent())
       .addGauge(Interns.info(BLOCK_CACHE_EXPRESS_HIT_PERCENT, BLOCK_CACHE_EXPRESS_HIT_PERCENT_DESC),
         rsWrap.getBlockCacheHitCachingPercent())
+      .addGauge(Interns.info(L1_CACHE_SIZE, L1_CACHE_SIZE_DESC), rsWrap.getL1CacheSize())
+      .addGauge(Interns.info(L1_CACHE_FREE_SIZE, L1_CACHE_FREE_SIZE_DESC),
+        rsWrap.getL1CacheFreeSize())
+      .addGauge(Interns.info(L1_CACHE_COUNT, L1_CACHE_COUNT_DESC), rsWrap.getL1CacheCount())
+      .addCounter(Interns.info(L1_CACHE_EVICTION_COUNT, L1_CACHE_EVICTION_COUNT_DESC),
+        rsWrap.getL1CacheEvictedCount())
       .addGauge(Interns.info(L1_CACHE_HIT_COUNT, L1_CACHE_HIT_COUNT_DESC),
         rsWrap.getL1CacheHitCount())
       .addGauge(Interns.info(L1_CACHE_MISS_COUNT, L1_CACHE_MISS_COUNT_DESC),
@@ -545,6 +553,12 @@ public class MetricsRegionServerSourceImpl extends BaseSourceImpl
         rsWrap.getL1CacheHitRatio())
       .addGauge(Interns.info(L1_CACHE_MISS_RATIO, L1_CACHE_MISS_RATIO_DESC),
         rsWrap.getL1CacheMissRatio())
+      .addGauge(Interns.info(L2_CACHE_SIZE, L2_CACHE_SIZE_DESC), rsWrap.getL2CacheSize())
+      .addGauge(Interns.info(L2_CACHE_FREE_SIZE, L2_CACHE_FREE_SIZE_DESC),
+        rsWrap.getL2CacheFreeSize())
+      .addGauge(Interns.info(L2_CACHE_COUNT, L2_CACHE_COUNT_DESC), rsWrap.getL2CacheCount())
+      .addCounter(Interns.info(L2_CACHE_EVICTION_COUNT, L2_CACHE_EVICTION_COUNT_DESC),
+        rsWrap.getL2CacheEvictedCount())
       .addGauge(Interns.info(L2_CACHE_HIT_COUNT, L2_CACHE_HIT_COUNT_DESC),
         rsWrap.getL2CacheHitCount())
       .addGauge(Interns.info(L2_CACHE_MISS_COUNT, L2_CACHE_MISS_COUNT_DESC),
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
index c654ba844e3..d4f33737c44 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
@@ -267,6 +267,11 @@ public interface MetricsRegionServerWrapper {
    */
   long getBlockCacheCount();
 
+  /**
+   * Get the number of DATA blocks in the block cache.
+   */
+  long getBlockCacheDataBlockCount();
+
   /**
    * Get the total size (in bytes) of the block cache.
    */
@@ -317,6 +322,26 @@ public interface MetricsRegionServerWrapper {
    */
   long getBlockCacheFailedInsertions();
 
+  /**
+   * Cache size (bytes) of L1 cache
+   */
+  long getL1CacheSize();
+
+  /**
+   * Free cache size (bytes) of L1 cache
+   */
+  long getL1CacheFreeSize();
+
+  /**
+   * Number of blocks in L1 cache
+   */
+  long getL1CacheCount();
+
+  /**
+   * Number of blocks evicted from L1 cache
+   */
+  long getL1CacheEvictedCount();
+
   /**
    * Hit count of L1 cache.
    */
@@ -337,6 +362,26 @@ public interface MetricsRegionServerWrapper {
    */
   double getL1CacheMissRatio();
 
+  /**
+   * Cache size (bytes) of L2 cache
+   */
+  long getL2CacheSize();
+
+  /**
+   * Free cache size (bytes) of L2 cache
+   */
+  long getL2CacheFreeSize();
+
+  /**
+   * Number of blocks in L2 cache
+   */
+  long getL2CacheCount();
+
+  /**
+   * Number of blocks evicted from L2 cache
+   */
+  long getL2CacheEvictedCount();
+
   /**
    * Hit count of L2 cache.
    */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 69a70600a6c..6cd40b0858f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -390,4 +390,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
   public FirstLevelBlockCache getFirstLevelCache() {
     return l1Cache;
   }
+
+  public BlockCache getSecondLevelCache() {
+    return l2Cache;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index 8793f495cb7..ae5e904955e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -66,6 +66,8 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
   private final ByteBuffAllocator allocator;
 
   private BlockCache blockCache;
+  private BlockCache l1Cache = null;
+  private BlockCache l2Cache = null;
   private MobFileCache mobFileCache;
   private CacheStats cacheStats;
   private CacheStats l1Stats = null;
@@ -173,6 +175,14 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
         l1Stats = this.cacheStats;
       }
     }
+    if (this.blockCache != null) {
+      if (this.blockCache instanceof CombinedBlockCache) {
+        l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache();
+        l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache();
+      } else {
+        l1Cache = this.blockCache;
+      }
+    }
   }
 
   /**
@@ -276,6 +286,11 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
     return this.blockCache != null ? this.blockCache.getBlockCount() : 0L;
   }
 
+  @Override
+  public long getBlockCacheDataBlockCount() {
+    return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L;
+  }
+
   @Override
   public long getMemStoreLimit() {
     return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit();
@@ -354,6 +369,38 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
     return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L;
   }
 
+  public long getL1CacheSize() {
+    return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L;
+  }
+
+  public long getL1CacheFreeSize() {
+    return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L;
+  }
+
+  public long getL1CacheCount() {
+    return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L;
+  }
+
+  public long getL1CacheEvictedCount() {
+    return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L;
+  }
+
+  public long getL2CacheSize() {
+    return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L;
+  }
+
+  public long getL2CacheFreeSize() {
+    return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L;
+  }
+
+  public long getL2CacheCount() {
+    return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L;
+  }
+
+  public long getL2CacheEvictedCount() {
+    return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L;
+  }
+
   @Override
   public long getL1CacheHitCount() {
     return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
index e451683a367..d604cf00d49 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
@@ -292,6 +292,11 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
     return 414;
   }
 
+  @Override
+  public long getBlockCacheDataBlockCount() {
+    return 300;
+  }
+
   @Override
   public long getBlockCacheSize() {
     return 415;
@@ -342,6 +347,26 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
     return 36;
   }
 
+  @Override
+  public long getL1CacheSize() {
+    return 123;
+  }
+
+  @Override
+  public long getL1CacheFreeSize() {
+    return 100;
+  }
+
+  @Override
+  public long getL1CacheCount() {
+    return 50;
+  }
+
+  @Override
+  public long getL1CacheEvictedCount() {
+    return 1000;
+  }
+
   @Override
   public long getL1CacheHitCount() {
     return 200;
@@ -362,6 +387,26 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
     return 20;
   }
 
+  @Override
+  public long getL2CacheSize() {
+    return 456;
+  }
+
+  @Override
+  public long getL2CacheFreeSize() {
+    return 200;
+  }
+
+  @Override
+  public long getL2CacheCount() {
+    return 75;
+  }
+
+  @Override
+  public long getL2CacheEvictedCount() {
+    return 2000;
+  }
+
   @Override
   public long getL2CacheHitCount() {
     return 800;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
index 70e37bc5408..a44f3c6b62e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
@@ -104,6 +104,7 @@ public class TestMetricsRegionServer {
     HELPER.assertGauge("flushQueueLength", 412, serverSource);
     HELPER.assertGauge("blockCacheFreeSize", 413, serverSource);
     HELPER.assertGauge("blockCacheCount", 414, serverSource);
+    HELPER.assertGauge("blockCacheDataBlockCount", 300, serverSource);
     HELPER.assertGauge("blockCacheSize", 415, serverSource);
     HELPER.assertCounter("blockCacheHitCount", 416, serverSource);
     HELPER.assertCounter("blockCacheMissCount", 417, serverSource);
@@ -111,10 +112,18 @@ public class TestMetricsRegionServer {
     HELPER.assertGauge("blockCacheCountHitPercent", 98, serverSource);
     HELPER.assertGauge("blockCacheExpressHitPercent", 97, serverSource);
     HELPER.assertCounter("blockCacheFailedInsertionCount", 36, serverSource);
+    HELPER.assertGauge("l1CacheFreeSize", 100, serverSource);
+    HELPER.assertGauge("l1CacheSize", 123, serverSource);
+    HELPER.assertGauge("l1CacheCount", 50, serverSource);
+    HELPER.assertCounter("l1CacheEvictionCount", 1000, serverSource);
     HELPER.assertGauge("l1CacheHitCount", 200, serverSource);
     HELPER.assertGauge("l1CacheMissCount", 100, serverSource);
     HELPER.assertGauge("l1CacheHitRatio", 80, serverSource);
     HELPER.assertGauge("l1CacheMissRatio", 20, serverSource);
+    HELPER.assertGauge("l2CacheFreeSize", 200, serverSource);
+    HELPER.assertGauge("l2CacheSize", 456, serverSource);
+    HELPER.assertGauge("l2CacheCount", 75, serverSource);
+    HELPER.assertCounter("l2CacheEvictionCount", 2000, serverSource);
     HELPER.assertGauge("l2CacheHitCount", 800, serverSource);
     HELPER.assertGauge("l2CacheMissCount", 200, serverSource);
     HELPER.assertGauge("l2CacheHitRatio", 90, serverSource);
diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc
index 91192f9614c..23d069c1d91 100644
--- a/src/main/asciidoc/_chapters/architecture.adoc
+++ b/src/main/asciidoc/_chapters/architecture.adoc
@@ -946,6 +946,10 @@ Bloom Filters::
 Currently the recommended way to measure HFile indexes and bloom filters sizes is to look at the region server web UI and checkout the relevant metrics.
 For keys, sampling can be done by using the HFile command line tool and look for the average key size metric.
 Since HBase 0.98.3, you can view details on BlockCache stats and metrics in a special Block Cache section in the UI.
+As of HBase 2.4.14, you can estimate HFile indexes and bloom filters vs other DATA blocks using blockCacheCount and blockCacheDataBlockCount in JMX. The
+formula `(blockCacheCount - blockCacheDataBlockCount) * blockSize` will give you an estimate which can be useful when trying to enable the BucketCache. You
+should make sure the post-BucketCache config gives enough memory to the on-heap LRU cache to hold at least the same number of non-DATA blocks from pre-BucketCache.
+Once BucketCache is enabled, the L1 metrics like l1CacheSize, l1CacheCount, and l1CacheEvictionCount can help you further tune the size.
 
 It's generally bad to use block caching when the WSS doesn't fit in memory.
 This is the case when you have for example 40GB available across all your region servers' block caches but you need to process 1TB of data.