You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ta...@apache.org on 2021/09/22 16:30:27 UTC

[hbase] branch master updated: HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)

This is an automated email from the ASF dual-hosted git repository.

taklwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new d956828  HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)
d956828 is described below

commit d956828aaca89c396d66e8bf2e9f0a4d3e0de8bc
Author: Tak Lon (Stephen) Wu <ta...@apache.org>
AuthorDate: Wed Sep 22 09:17:18 2021 -0700

    HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)
    
    Introduce `hfile.onheap.block.cache.fixed.size`
    and default to disable. when using ClientSideRegionScanner
    it will be enabled with a fixed size for caching
    INDEX/LEAF_INDEX block when a client, e.g.
    snapshot scanner, scans the entire HFile
    and does not need to seek/reseek to index
    block multiple times.
    
    Signed-off-by: Josh Elser <el...@apache.org>
---
 .../java/org/apache/hadoop/hbase/HConstants.java   |  11 ++
 .../hbase/client/ClientSideRegionScanner.java      |  15 +++
 .../apache/hadoop/hbase/io/hfile/BlockCache.java   |   9 ++
 .../hadoop/hbase/io/hfile/BlockCacheFactory.java   |   4 +-
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |   5 -
 .../hbase/io/hfile/IndexOnlyLruBlockCache.java     |  49 +++++++++
 .../hadoop/hbase/io/util/MemorySizeUtil.java       |   8 +-
 .../hbase/client/TestClientSideRegionScanner.java  | 115 +++++++++++++++++++++
 .../hadoop/hbase/io/hfile/TestCacheConfig.java     |  30 ++++++
 9 files changed, 238 insertions(+), 8 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 10a38f6..6464158 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1049,6 +1049,17 @@ public final class HConstants {
 
   public static final float HFILE_BLOCK_CACHE_SIZE_DEFAULT = 0.4f;
 
+  /**
+   * Configuration key for setting the fix size of the block size, default do nothing and it should
+   * be explicitly set by user or only used within ClientSideRegionScanner. if it's set less than
+   * current max on heap size, it overrides the max size of block cache
+   */
+  public static final String HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY =
+    "hfile.onheap.block.cache.fixed.size";
+  public static final long HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT = 0L;
+  public static final long HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT =
+    32 * 1024 * 1024L;
+
   /*
     * Minimum percentage of free heap necessary for a successful cluster startup.
     */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
index 1e2b699..1feafc1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
@@ -25,8 +25,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
 import org.apache.hadoop.hbase.mob.MobFileCache;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RegionScanner;
@@ -60,6 +62,15 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
     region = HRegion.newHRegion(CommonFSUtils.getTableDir(rootDir, htd.getTableName()), null, fs,
       conf, hri, htd, null);
     region.setRestoredRegion(true);
+    // non RS process does not have a block cache, and this a client side scanner,
+    // create one for MapReduce jobs to cache the INDEX block by setting to use
+    // IndexOnlyLruBlockCache and set a value to HBASE_CLIENT_SCANNER_BLOCK_CACHE_SIZE_KEY
+    conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
+    conf.setIfUnset(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
+        String.valueOf(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT));
+    // don't allow L2 bucket cache for non RS process to avoid unexpected disk usage.
+    conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
+    region.setBlockCache(BlockCacheFactory.createBlockCache(conf));
     // we won't initialize the MobFileCache when not running in RS process. so provided an
     // initialized cache. Consider the case: an CF was set from an mob to non-mob. if we only
     // initialize cache for MOB region, NPE from HMobStore will still happen. So Initialize the
@@ -122,6 +133,10 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
     }
   }
 
+  HRegion getRegion() {
+    return region;
+  }
+
   @Override
   public boolean renewLease() {
     throw new UnsupportedOperationException();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index f54edae..6f32d62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -147,4 +147,13 @@ public interface BlockCache extends Iterable<CachedBlock> {
    * @return The list of sub blockcaches that make up this one; returns null if no sub caches.
    */
   BlockCache [] getBlockCaches();
+
+  /**
+   * Check if block type is meta or index block
+   * @param blockType block type of a given HFile block
+   * @return true if block type is non-data block
+   */
+  default boolean isMetaBlock(BlockType blockType) {
+    return blockType != null && blockType.getCategory() != BlockType.BlockCategory.DATA;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
index 90dd833..12c769e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
@@ -43,7 +43,7 @@ public final class BlockCacheFactory {
    */
 
   /**
-   * Configuration key to cache block policy (Lru, TinyLfu).
+   * Configuration key to cache block policy (Lru, TinyLfu, AdaptiveLRU, IndexOnlyLRU).
    */
   public static final String BLOCKCACHE_POLICY_KEY = "hfile.block.cache.policy";
   public static final String BLOCKCACHE_POLICY_DEFAULT = "LRU";
@@ -129,6 +129,8 @@ public final class BlockCacheFactory {
         StringUtils.byteDesc(cacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize));
     if (policy.equalsIgnoreCase("LRU")) {
       return new LruBlockCache(cacheSize, blockSize, true, c);
+    } else if (policy.equalsIgnoreCase("IndexOnlyLRU")) {
+      return new IndexOnlyLruBlockCache(cacheSize, blockSize, true, c);
     } else if (policy.equalsIgnoreCase("TinyLFU")) {
       return new TinyLfuBlockCache(cacheSize, blockSize, ForkJoinPool.commonPool(), c);
     } else if (policy.equalsIgnoreCase("AdaptiveLRU")) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 571ae07..dc4f697 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -22,7 +22,6 @@ import java.util.Iterator;
 
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
 
 /**
@@ -71,10 +70,6 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
     cacheBlock(cacheKey, buf, false);
   }
 
-  private boolean isMetaBlock(BlockType blockType) {
-    return blockType.getCategory() != BlockCategory.DATA;
-  }
-
   @Override
   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching,
       boolean repeat, boolean updateCacheMetrics) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java
new file mode 100644
index 0000000..50b195d
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * An on heap block cache implementation extended LruBlockCache and only cache index block.
+ * This block cache should be only used by
+ * {@link org.apache.hadoop.hbase.client.ClientSideRegionScanner} that normally considers to be
+ * used by client resides out of the region server, e.g. a container of a map reduce job.
+ **/
+@InterfaceAudience.Private
+public class IndexOnlyLruBlockCache extends LruBlockCache {
+
+  public IndexOnlyLruBlockCache(long maxSize, long blockSize, boolean evictionThread,
+    Configuration conf) {
+    super(maxSize, blockSize, evictionThread, conf);
+  }
+
+  /**
+   * Cache only index block with the specified name and buffer
+   * @param cacheKey block's cache key
+   * @param buf      block buffer
+   * @param inMemory if block is in-memory
+   */
+  @Override
+  public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
+    if (isMetaBlock(buf.getBlockType())) {
+      super.cacheBlock(cacheKey, buf, inMemory);
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
index 9104980..b1f298e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
@@ -228,9 +228,13 @@ public class MemorySizeUtil {
     if (usage != null) {
       max = usage.getMax();
     }
-
+    float onHeapCacheFixedSize = (float) conf
+      .getLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
+        HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT) / max;
     // Calculate the amount of heap to give the heap.
-    return (long) (max * cachePercentage);
+    return (onHeapCacheFixedSize > 0 && onHeapCacheFixedSize < cachePercentage) ?
+      (long) (max * onHeapCacheFixedSize) :
+      (long) (max * cachePercentage);
   }
 
   /**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java
new file mode 100644
index 0000000..859e36f
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.IndexOnlyLruBlockCache;
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ SmallTests.class, ClientTests.class })
+public class TestClientSideRegionScanner {
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestClientSideRegionScanner.class);
+
+  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+
+  private Configuration conf;
+  private Path rootDir;
+  private FileSystem fs;
+  private TableDescriptor htd;
+  private RegionInfo hri;
+  private Scan scan;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setup() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
+    rootDir = TEST_UTIL.getDefaultRootDirPath();
+    fs = TEST_UTIL.getTestFileSystem();
+    htd = TEST_UTIL.getAdmin().getDescriptor(TableName.META_TABLE_NAME);
+    hri = TEST_UTIL.getAdmin().getRegions(TableName.META_TABLE_NAME).get(0);
+    scan = new Scan();
+  }
+
+  @Test
+  public void testDefaultBlockCache() throws IOException {
+    Configuration copyConf = new Configuration(conf);
+    ClientSideRegionScanner clientSideRegionScanner =
+      new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+    BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+    assertNotNull(blockCache);
+    assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+    assertTrue(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT == blockCache
+      .getMaxSize());
+  }
+
+  @Test
+  public void testConfiguredBlockCache() throws IOException {
+    Configuration copyConf = new Configuration(conf);
+    // tiny 1MB fixed cache size
+    long blockCacheFixedSize = 1024 * 1024L;
+    copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, blockCacheFixedSize);
+    ClientSideRegionScanner clientSideRegionScanner =
+      new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+    BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+    assertNotNull(blockCache);
+    assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+    assertTrue(blockCacheFixedSize == blockCache.getMaxSize());
+  }
+
+  @Test
+  public void testNoBlockCache() throws IOException {
+    Configuration copyConf = new Configuration(conf);
+    copyConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
+    ClientSideRegionScanner clientSideRegionScanner =
+      new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+    BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+    assertNull(blockCache);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
index 65fc3af..0ec596e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
@@ -374,4 +374,34 @@ public class TestCacheConfig {
     } catch (IllegalArgumentException e) {
     }
   }
+
+  @Test
+  public void testIndexOnlyLruBlockCache() {
+    CacheConfig cc = new CacheConfig(this.conf);
+    conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
+    BlockCache blockCache = BlockCacheFactory.createBlockCache(this.conf);
+    assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+    // reject data block
+    long initialBlockCount = blockCache.getBlockCount();
+    BlockCacheKey bck = new BlockCacheKey("bck", 0);
+    Cacheable c = new DataCacheEntry();
+    blockCache.cacheBlock(bck, c, true);
+    // accept index block
+    Cacheable indexCacheEntry = new IndexCacheEntry();
+    blockCache.cacheBlock(bck, indexCacheEntry, true);
+    assertEquals(initialBlockCount + 1, blockCache.getBlockCount());
+  }
+
+  @Test
+  public void testGetOnHeapCacheSize() {
+    Configuration copyConf = new Configuration(conf);
+    long fixedSize = 1024 * 1024L;
+    long onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
+    assertEquals(null, copyConf.get(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY));
+    assertTrue(onHeapCacheSize > 0 && onHeapCacheSize != fixedSize);
+    // when HBASE_BLOCK_CACHE_FIXED_SIZE_KEY is set, it will be a fixed size
+    copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, fixedSize);
+    onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
+    assertEquals(fixedSize, onHeapCacheSize);
+  }
 }