You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2013/08/03 02:46:45 UTC

svn commit: r1509923 - in /hbase/branches/0.94/src/main: java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java resources/hbase-default.xml

Author: larsh
Date: Sat Aug  3 00:46:45 2013
New Revision: 1509923

URL: http://svn.apache.org/r1509923
Log:
HBASE-8949 hbase.mapreduce.hfileoutputformat.blocksize should configure with blocksize of a table (rajeshbabu)

Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
    hbase/branches/0.94/src/main/resources/hbase-default.xml

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java?rev=1509923&r1=1509922&r2=1509923&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java Sat Aug  3 00:46:45 2013
@@ -82,6 +82,7 @@ public class HFileOutputFormat extends F
   private static final String BLOOM_TYPE_CONF_KEY = "hbase.hfileoutputformat.families.bloomtype";
   private static final String DATABLOCK_ENCODING_CONF_KEY = 
      "hbase.mapreduce.hfileoutputformat.datablock.encoding";
+  private static final String BLOCK_SIZE_CONF_KEY = "hbase.mapreduce.hfileoutputformat.blocksize";
 
   public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
   throws IOException, InterruptedException {
@@ -93,8 +94,6 @@ public class HFileOutputFormat extends F
     // These configs. are from hbase-*.xml
     final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
         HConstants.DEFAULT_MAX_FILE_SIZE);
-    final int blocksize = conf.getInt("hbase.mapreduce.hfileoutputformat.blocksize",
-        HFile.DEFAULT_BLOCKSIZE);
     // Invented config.  Add to hbase-*.xml if other than default compression.
     final String defaultCompression = conf.get("hfile.compression",
         Compression.Algorithm.NONE.getName());
@@ -104,7 +103,8 @@ public class HFileOutputFormat extends F
     // create a map from column family to the compression algorithm
     final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
     final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
-    
+    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);
+
     String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
     final HFileDataBlockEncoder encoder;
     if (dataBlockEncodingStr == null) {
@@ -201,9 +201,12 @@ public class HFileOutputFormat extends F
         if (bloomTypeStr != null) {
           bloomType = BloomType.valueOf(bloomTypeStr);
         }
+        String blockSizeString = blockSizeMap.get(family);
+        int blockSize = blockSizeString == null ? HFile.DEFAULT_BLOCKSIZE
+            : Integer.parseInt(blockSizeString);
         Configuration tempConf = new Configuration(conf);
         tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
-        wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blocksize)
+        wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
             .withOutputDir(familydir)
             .withCompression(AbstractHFileWriter.compressionByName(compression))
             .withBloomType(bloomType)
@@ -374,6 +377,7 @@ public class HFileOutputFormat extends F
     // Set compression algorithms based on column families
     configureCompression(table, conf);
     configureBloomType(table, conf);
+    configureBlockSize(table, conf);
     
     TableMapReduceUtil.addDependencyJars(job);
     LOG.info("Incremental table output configured.");
@@ -417,7 +421,11 @@ public class HFileOutputFormat extends F
   private static Map<byte[], String> createFamilyBloomMap(Configuration conf) {
     return createFamilyConfValueMap(conf, BLOOM_TYPE_CONF_KEY);
   }
-  
+
+  private static Map<byte[], String> createFamilyBlockSizeMap(Configuration conf) {
+    return createFamilyConfValueMap(conf, BLOCK_SIZE_CONF_KEY);
+  }
+
   /**
    * Run inside the task to deserialize column family to given conf value map.
    * 
@@ -474,6 +482,29 @@ public class HFileOutputFormat extends F
     conf.set(COMPRESSION_CONF_KEY, compressionConfigValue.toString());
   }
   
+  private static void configureBlockSize(HTable table, Configuration conf) throws IOException {
+    StringBuilder blockSizeConfigValue = new StringBuilder();
+    HTableDescriptor tableDescriptor = table.getTableDescriptor();
+    if (tableDescriptor == null) {
+      // could happen with mock table instance
+      return;
+    }
+    Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
+    int i = 0;
+    for (HColumnDescriptor familyDescriptor : families) {
+      if (i++ > 0) {
+        blockSizeConfigValue.append('&');
+      }
+      blockSizeConfigValue.append(URLEncoder.encode(
+          familyDescriptor.getNameAsString(), "UTF-8"));
+      blockSizeConfigValue.append('=');
+      blockSizeConfigValue.append(URLEncoder.encode(
+          String.valueOf(familyDescriptor.getBlocksize()), "UTF-8"));
+    }
+    // Get rid of the last ampersand
+    conf.set(BLOCK_SIZE_CONF_KEY, blockSizeConfigValue.toString());
+  }
+
   /**
    * Serialize column family to bloom type map to configuration.
    * Invoked while configuring the MR job for incremental load.

Modified: hbase/branches/0.94/src/main/resources/hbase-default.xml
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/resources/hbase-default.xml?rev=1509923&r1=1509922&r2=1509923&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/resources/hbase-default.xml (original)
+++ hbase/branches/0.94/src/main/resources/hbase-default.xml Sat Aug  3 00:46:45 2013
@@ -460,19 +460,6 @@
     </description>
   </property>
   <property>
-    <name>hbase.mapreduce.hfileoutputformat.blocksize</name>
-    <value>65536</value>
-    <description>The mapreduce HFileOutputFormat writes storefiles/hfiles.
-    This is the minimum hfile blocksize to emit.  Usually in hbase, writing
-    hfiles, the blocksize is gotten from the table schema (HColumnDescriptor)
-    but in the mapreduce outputformat context, we don't have access to the
-    schema so get blocksize from Configuration.  The smaller you make
-    the blocksize, the bigger your index and the less you fetch on a
-    random-access.  Set the blocksize down if you have small cells and want
-    faster random-access of individual cells.
-    </description>
-  </property>
-  <property>
     <name>hfile.block.cache.size</name>
     <value>0.25</value>
     <description>