You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2013/08/03 02:46:45 UTC
svn commit: r1509923 - in /hbase/branches/0.94/src/main:
java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
resources/hbase-default.xml
Author: larsh
Date: Sat Aug 3 00:46:45 2013
New Revision: 1509923
URL: http://svn.apache.org/r1509923
Log:
HBASE-8949 hbase.mapreduce.hfileoutputformat.blocksize should configure with blocksize of a table (rajeshbabu)
Modified:
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
hbase/branches/0.94/src/main/resources/hbase-default.xml
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java?rev=1509923&r1=1509922&r2=1509923&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java Sat Aug 3 00:46:45 2013
@@ -82,6 +82,7 @@ public class HFileOutputFormat extends F
private static final String BLOOM_TYPE_CONF_KEY = "hbase.hfileoutputformat.families.bloomtype";
private static final String DATABLOCK_ENCODING_CONF_KEY =
"hbase.mapreduce.hfileoutputformat.datablock.encoding";
+ private static final String BLOCK_SIZE_CONF_KEY = "hbase.mapreduce.hfileoutputformat.blocksize";
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
throws IOException, InterruptedException {
@@ -93,8 +94,6 @@ public class HFileOutputFormat extends F
// These configs. are from hbase-*.xml
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
HConstants.DEFAULT_MAX_FILE_SIZE);
- final int blocksize = conf.getInt("hbase.mapreduce.hfileoutputformat.blocksize",
- HFile.DEFAULT_BLOCKSIZE);
// Invented config. Add to hbase-*.xml if other than default compression.
final String defaultCompression = conf.get("hfile.compression",
Compression.Algorithm.NONE.getName());
@@ -104,7 +103,8 @@ public class HFileOutputFormat extends F
// create a map from column family to the compression algorithm
final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
-
+ final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);
+
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
final HFileDataBlockEncoder encoder;
if (dataBlockEncodingStr == null) {
@@ -201,9 +201,12 @@ public class HFileOutputFormat extends F
if (bloomTypeStr != null) {
bloomType = BloomType.valueOf(bloomTypeStr);
}
+ String blockSizeString = blockSizeMap.get(family);
+ int blockSize = blockSizeString == null ? HFile.DEFAULT_BLOCKSIZE
+ : Integer.parseInt(blockSizeString);
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
- wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blocksize)
+ wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
.withOutputDir(familydir)
.withCompression(AbstractHFileWriter.compressionByName(compression))
.withBloomType(bloomType)
@@ -374,6 +377,7 @@ public class HFileOutputFormat extends F
// Set compression algorithms based on column families
configureCompression(table, conf);
configureBloomType(table, conf);
+ configureBlockSize(table, conf);
TableMapReduceUtil.addDependencyJars(job);
LOG.info("Incremental table output configured.");
@@ -417,7 +421,11 @@ public class HFileOutputFormat extends F
private static Map<byte[], String> createFamilyBloomMap(Configuration conf) {
return createFamilyConfValueMap(conf, BLOOM_TYPE_CONF_KEY);
}
-
+
+ private static Map<byte[], String> createFamilyBlockSizeMap(Configuration conf) {
+ return createFamilyConfValueMap(conf, BLOCK_SIZE_CONF_KEY);
+ }
+
/**
* Run inside the task to deserialize column family to given conf value map.
*
@@ -474,6 +482,29 @@ public class HFileOutputFormat extends F
conf.set(COMPRESSION_CONF_KEY, compressionConfigValue.toString());
}
+ private static void configureBlockSize(HTable table, Configuration conf) throws IOException {
+ StringBuilder blockSizeConfigValue = new StringBuilder();
+ HTableDescriptor tableDescriptor = table.getTableDescriptor();
+ if (tableDescriptor == null) {
+ // could happen with mock table instance
+ return;
+ }
+ Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
+ int i = 0;
+ for (HColumnDescriptor familyDescriptor : families) {
+ if (i++ > 0) {
+ blockSizeConfigValue.append('&');
+ }
+ blockSizeConfigValue.append(URLEncoder.encode(
+ familyDescriptor.getNameAsString(), "UTF-8"));
+ blockSizeConfigValue.append('=');
+ blockSizeConfigValue.append(URLEncoder.encode(
+ String.valueOf(familyDescriptor.getBlocksize()), "UTF-8"));
+ }
+ // Get rid of the last ampersand
+ conf.set(BLOCK_SIZE_CONF_KEY, blockSizeConfigValue.toString());
+ }
+
/**
* Serialize column family to bloom type map to configuration.
* Invoked while configuring the MR job for incremental load.
Modified: hbase/branches/0.94/src/main/resources/hbase-default.xml
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/resources/hbase-default.xml?rev=1509923&r1=1509922&r2=1509923&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/resources/hbase-default.xml (original)
+++ hbase/branches/0.94/src/main/resources/hbase-default.xml Sat Aug 3 00:46:45 2013
@@ -460,19 +460,6 @@
</description>
</property>
<property>
- <name>hbase.mapreduce.hfileoutputformat.blocksize</name>
- <value>65536</value>
- <description>The mapreduce HFileOutputFormat writes storefiles/hfiles.
- This is the minimum hfile blocksize to emit. Usually in hbase, writing
- hfiles, the blocksize is gotten from the table schema (HColumnDescriptor)
- but in the mapreduce outputformat context, we don't have access to the
- schema so get blocksize from Configuration. The smaller you make
- the blocksize, the bigger your index and the less you fetch on a
- random-access. Set the blocksize down if you have small cells and want
- faster random-access of individual cells.
- </description>
- </property>
- <property>
<name>hfile.block.cache.size</name>
<value>0.25</value>
<description>