You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ta...@apache.org on 2021/09/22 16:30:27 UTC
[hbase] branch master updated: HBASE-26274 Create an option to
reintroduce BlockCache to mapreduce job (#3684)
This is an automated email from the ASF dual-hosted git repository.
taklwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new d956828 HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)
d956828 is described below
commit d956828aaca89c396d66e8bf2e9f0a4d3e0de8bc
Author: Tak Lon (Stephen) Wu <ta...@apache.org>
AuthorDate: Wed Sep 22 09:17:18 2021 -0700
HBASE-26274 Create an option to reintroduce BlockCache to mapreduce job (#3684)
Introduce `hfile.onheap.block.cache.fixed.size`
and default to disable. when using ClientSideRegionScanner
it will be enabled with a fixed size for caching
INDEX/LEAF_INDEX block when a client, e.g.
snapshot scanner, scans the entire HFile
and does not need to seek/reseek to index
block multiple times.
Signed-off-by: Josh Elser <el...@apache.org>
---
.../java/org/apache/hadoop/hbase/HConstants.java | 11 ++
.../hbase/client/ClientSideRegionScanner.java | 15 +++
.../apache/hadoop/hbase/io/hfile/BlockCache.java | 9 ++
.../hadoop/hbase/io/hfile/BlockCacheFactory.java | 4 +-
.../hadoop/hbase/io/hfile/CombinedBlockCache.java | 5 -
.../hbase/io/hfile/IndexOnlyLruBlockCache.java | 49 +++++++++
.../hadoop/hbase/io/util/MemorySizeUtil.java | 8 +-
.../hbase/client/TestClientSideRegionScanner.java | 115 +++++++++++++++++++++
.../hadoop/hbase/io/hfile/TestCacheConfig.java | 30 ++++++
9 files changed, 238 insertions(+), 8 deletions(-)
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 10a38f6..6464158 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1049,6 +1049,17 @@ public final class HConstants {
public static final float HFILE_BLOCK_CACHE_SIZE_DEFAULT = 0.4f;
+ /**
+ * Configuration key for setting the fix size of the block size, default do nothing and it should
+ * be explicitly set by user or only used within ClientSideRegionScanner. if it's set less than
+ * current max on heap size, it overrides the max size of block cache
+ */
+ public static final String HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY =
+ "hfile.onheap.block.cache.fixed.size";
+ public static final long HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT = 0L;
+ public static final long HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT =
+ 32 * 1024 * 1024L;
+
/*
* Minimum percentage of free heap necessary for a successful cluster startup.
*/
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
index 1e2b699..1feafc1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java
@@ -25,8 +25,10 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
import org.apache.hadoop.hbase.mob.MobFileCache;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
@@ -60,6 +62,15 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
region = HRegion.newHRegion(CommonFSUtils.getTableDir(rootDir, htd.getTableName()), null, fs,
conf, hri, htd, null);
region.setRestoredRegion(true);
+ // non RS process does not have a block cache, and this a client side scanner,
+ // create one for MapReduce jobs to cache the INDEX block by setting to use
+ // IndexOnlyLruBlockCache and set a value to HBASE_CLIENT_SCANNER_BLOCK_CACHE_SIZE_KEY
+ conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
+ conf.setIfUnset(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
+ String.valueOf(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT));
+ // don't allow L2 bucket cache for non RS process to avoid unexpected disk usage.
+ conf.unset(HConstants.BUCKET_CACHE_IOENGINE_KEY);
+ region.setBlockCache(BlockCacheFactory.createBlockCache(conf));
// we won't initialize the MobFileCache when not running in RS process. so provided an
// initialized cache. Consider the case: an CF was set from an mob to non-mob. if we only
// initialize cache for MOB region, NPE from HMobStore will still happen. So Initialize the
@@ -122,6 +133,10 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
}
}
+ HRegion getRegion() {
+ return region;
+ }
+
@Override
public boolean renewLease() {
throw new UnsupportedOperationException();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index f54edae..6f32d62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -147,4 +147,13 @@ public interface BlockCache extends Iterable<CachedBlock> {
* @return The list of sub blockcaches that make up this one; returns null if no sub caches.
*/
BlockCache [] getBlockCaches();
+
+ /**
+ * Check if block type is meta or index block
+ * @param blockType block type of a given HFile block
+ * @return true if block type is non-data block
+ */
+ default boolean isMetaBlock(BlockType blockType) {
+ return blockType != null && blockType.getCategory() != BlockType.BlockCategory.DATA;
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
index 90dd833..12c769e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
@@ -43,7 +43,7 @@ public final class BlockCacheFactory {
*/
/**
- * Configuration key to cache block policy (Lru, TinyLfu).
+ * Configuration key to cache block policy (Lru, TinyLfu, AdaptiveLRU, IndexOnlyLRU).
*/
public static final String BLOCKCACHE_POLICY_KEY = "hfile.block.cache.policy";
public static final String BLOCKCACHE_POLICY_DEFAULT = "LRU";
@@ -129,6 +129,8 @@ public final class BlockCacheFactory {
StringUtils.byteDesc(cacheSize) + ", blockSize=" + StringUtils.byteDesc(blockSize));
if (policy.equalsIgnoreCase("LRU")) {
return new LruBlockCache(cacheSize, blockSize, true, c);
+ } else if (policy.equalsIgnoreCase("IndexOnlyLRU")) {
+ return new IndexOnlyLruBlockCache(cacheSize, blockSize, true, c);
} else if (policy.equalsIgnoreCase("TinyLFU")) {
return new TinyLfuBlockCache(cacheSize, blockSize, ForkJoinPool.commonPool(), c);
} else if (policy.equalsIgnoreCase("AdaptiveLRU")) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 571ae07..dc4f697 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -22,7 +22,6 @@ import java.util.Iterator;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
/**
@@ -71,10 +70,6 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
cacheBlock(cacheKey, buf, false);
}
- private boolean isMetaBlock(BlockType blockType) {
- return blockType.getCategory() != BlockCategory.DATA;
- }
-
@Override
public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching,
boolean repeat, boolean updateCacheMetrics) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java
new file mode 100644
index 0000000..50b195d
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/IndexOnlyLruBlockCache.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * An on heap block cache implementation extended LruBlockCache and only cache index block.
+ * This block cache should be only used by
+ * {@link org.apache.hadoop.hbase.client.ClientSideRegionScanner} that normally considers to be
+ * used by client resides out of the region server, e.g. a container of a map reduce job.
+ **/
+@InterfaceAudience.Private
+public class IndexOnlyLruBlockCache extends LruBlockCache {
+
+ public IndexOnlyLruBlockCache(long maxSize, long blockSize, boolean evictionThread,
+ Configuration conf) {
+ super(maxSize, blockSize, evictionThread, conf);
+ }
+
+ /**
+ * Cache only index block with the specified name and buffer
+ * @param cacheKey block's cache key
+ * @param buf block buffer
+ * @param inMemory if block is in-memory
+ */
+ @Override
+ public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
+ if (isMetaBlock(buf.getBlockType())) {
+ super.cacheBlock(cacheKey, buf, inMemory);
+ }
+ }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
index 9104980..b1f298e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/util/MemorySizeUtil.java
@@ -228,9 +228,13 @@ public class MemorySizeUtil {
if (usage != null) {
max = usage.getMax();
}
-
+ float onHeapCacheFixedSize = (float) conf
+ .getLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY,
+ HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT) / max;
// Calculate the amount of heap to give the heap.
- return (long) (max * cachePercentage);
+ return (onHeapCacheFixedSize > 0 && onHeapCacheFixedSize < cachePercentage) ?
+ (long) (max * onHeapCacheFixedSize) :
+ (long) (max * cachePercentage);
}
/**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java
new file mode 100644
index 0000000..859e36f
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestClientSideRegionScanner.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.IndexOnlyLruBlockCache;
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ SmallTests.class, ClientTests.class })
+public class TestClientSideRegionScanner {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestClientSideRegionScanner.class);
+
+ private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+
+ private Configuration conf;
+ private Path rootDir;
+ private FileSystem fs;
+ private TableDescriptor htd;
+ private RegionInfo hri;
+ private Scan scan;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster(1);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void setup() throws IOException {
+ conf = TEST_UTIL.getConfiguration();
+ rootDir = TEST_UTIL.getDefaultRootDirPath();
+ fs = TEST_UTIL.getTestFileSystem();
+ htd = TEST_UTIL.getAdmin().getDescriptor(TableName.META_TABLE_NAME);
+ hri = TEST_UTIL.getAdmin().getRegions(TableName.META_TABLE_NAME).get(0);
+ scan = new Scan();
+ }
+
+ @Test
+ public void testDefaultBlockCache() throws IOException {
+ Configuration copyConf = new Configuration(conf);
+ ClientSideRegionScanner clientSideRegionScanner =
+ new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+ BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+ assertNotNull(blockCache);
+ assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+ assertTrue(HConstants.HBASE_CLIENT_SCANNER_ONHEAP_BLOCK_CACHE_FIXED_SIZE_DEFAULT == blockCache
+ .getMaxSize());
+ }
+
+ @Test
+ public void testConfiguredBlockCache() throws IOException {
+ Configuration copyConf = new Configuration(conf);
+ // tiny 1MB fixed cache size
+ long blockCacheFixedSize = 1024 * 1024L;
+ copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, blockCacheFixedSize);
+ ClientSideRegionScanner clientSideRegionScanner =
+ new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+ BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+ assertNotNull(blockCache);
+ assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+ assertTrue(blockCacheFixedSize == blockCache.getMaxSize());
+ }
+
+ @Test
+ public void testNoBlockCache() throws IOException {
+ Configuration copyConf = new Configuration(conf);
+ copyConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
+ ClientSideRegionScanner clientSideRegionScanner =
+ new ClientSideRegionScanner(copyConf, fs, rootDir, htd, hri, scan, null);
+
+ BlockCache blockCache = clientSideRegionScanner.getRegion().getBlockCache();
+ assertNull(blockCache);
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
index 65fc3af..0ec596e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
@@ -374,4 +374,34 @@ public class TestCacheConfig {
} catch (IllegalArgumentException e) {
}
}
+
+ @Test
+ public void testIndexOnlyLruBlockCache() {
+ CacheConfig cc = new CacheConfig(this.conf);
+ conf.set(BlockCacheFactory.BLOCKCACHE_POLICY_KEY, "IndexOnlyLRU");
+ BlockCache blockCache = BlockCacheFactory.createBlockCache(this.conf);
+ assertTrue(blockCache instanceof IndexOnlyLruBlockCache);
+ // reject data block
+ long initialBlockCount = blockCache.getBlockCount();
+ BlockCacheKey bck = new BlockCacheKey("bck", 0);
+ Cacheable c = new DataCacheEntry();
+ blockCache.cacheBlock(bck, c, true);
+ // accept index block
+ Cacheable indexCacheEntry = new IndexCacheEntry();
+ blockCache.cacheBlock(bck, indexCacheEntry, true);
+ assertEquals(initialBlockCount + 1, blockCache.getBlockCount());
+ }
+
+ @Test
+ public void testGetOnHeapCacheSize() {
+ Configuration copyConf = new Configuration(conf);
+ long fixedSize = 1024 * 1024L;
+ long onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
+ assertEquals(null, copyConf.get(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY));
+ assertTrue(onHeapCacheSize > 0 && onHeapCacheSize != fixedSize);
+ // when HBASE_BLOCK_CACHE_FIXED_SIZE_KEY is set, it will be a fixed size
+ copyConf.setLong(HConstants.HFILE_ONHEAP_BLOCK_CACHE_FIXED_SIZE_KEY, fixedSize);
+ onHeapCacheSize = MemorySizeUtil.getOnHeapCacheSize(copyConf);
+ assertEquals(fixedSize, onHeapCacheSize);
+ }
}