You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2013/04/10 12:08:44 UTC
svn commit: r1466412 - in /hbase/trunk/hbase-server/src:
main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
Author: mbertozzi
Date: Wed Apr 10 10:08:44 2013
New Revision: 1466412
URL: http://svn.apache.org/r1466412
Log:
HBASE-8313 Add Bloom filter testing for HFileOutputFormat
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1466412&r1=1466411&r2=1466412&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Wed Apr 10 10:08:44 2013
@@ -99,7 +99,7 @@ public class StoreFile {
Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION");
/** Bloom filter Type in FileInfo */
- static final byte[] BLOOM_FILTER_TYPE_KEY =
+ public static final byte[] BLOOM_FILTER_TYPE_KEY =
Bytes.toBytes("BLOOM_FILTER_TYPE");
/** Delete Family Count in FileInfo */
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1466412&r1=1466411&r2=1466412&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Wed Apr 10 10:08:44 2013
@@ -74,6 +74,7 @@ import org.apache.hadoop.hbase.io.compre
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.ChecksumUtil;
+import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionStates;
@@ -84,6 +85,7 @@ import org.apache.hadoop.hbase.regionser
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@@ -2661,4 +2663,56 @@ public class HBaseTestingUtility extends
};
}
+ /**
+ * Create a set of column descriptors with the combination of compression,
+ * encoding, bloom codecs available.
+ * @return the list of column descriptors
+ */
+ public static List<HColumnDescriptor> generateColumnDescriptors() {
+ return generateColumnDescriptors("");
+ }
+
+ /**
+ * Create a set of column descriptors with the combination of compression,
+ * encoding, bloom codecs available.
+ * @param prefix family names prefix
+ * @return the list of column descriptors
+ */
+ public static List<HColumnDescriptor> generateColumnDescriptors(final String prefix) {
+ List<HColumnDescriptor> htds = new ArrayList<HColumnDescriptor>();
+ long familyId = 0;
+ for (Compression.Algorithm compressionType: getSupportedCompressionAlgorithms()) {
+ for (DataBlockEncoding encodingType: DataBlockEncoding.values()) {
+ for (BloomType bloomType: BloomType.values()) {
+ String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
+ HColumnDescriptor htd = new HColumnDescriptor(name);
+ htd.setCompressionType(compressionType);
+ htd.setDataBlockEncoding(encodingType);
+ htd.setBloomFilterType(bloomType);
+ htds.add(htd);
+ familyId++;
+ }
+ }
+ }
+ return htds;
+ }
+
+ /**
+ * Get supported compression algorithms.
+ * @return supported compression algorithms.
+ */
+ public static Compression.Algorithm[] getSupportedCompressionAlgorithms() {
+ String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
+ List<Compression.Algorithm> supportedAlgos = new ArrayList<Compression.Algorithm>();
+ for (String algoName : allAlgos) {
+ try {
+ Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
+ algo.getCompressor();
+ supportedAlgos.add(algo);
+ } catch (Throwable t) {
+ // this algo is not available
+ }
+ }
+ return supportedAlgos.toArray(new Compression.Algorithm[0]);
+ }
}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java?rev=1466412&r1=1466411&r2=1466412&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java Wed Apr 10 10:08:44 2013
@@ -31,6 +31,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeSet;
import java.util.Random;
import java.util.concurrent.Callable;
@@ -65,6 +67,8 @@ import org.apache.hadoop.hbase.io.hfile.
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
import org.apache.hadoop.hbase.regionserver.HStore;
+import org.apache.hadoop.hbase.regionserver.BloomType;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@@ -542,30 +546,25 @@ public class TestHFileOutputFormat {
return familyToCompression;
}
+
/**
- * Test that {@link HFileOutputFormat} RecordWriter uses compression settings
- * from the column family descriptor
+ * Test that {@link HFileOutputFormat} RecordWriter uses compression and
+ * bloom filter settings from the column family descriptor
*/
@Test
- public void testColumnFamilyCompression() throws Exception {
+ public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
TaskAttemptContext context = null;
- Path dir =
- util.getDataTestDirOnTestFS("testColumnFamilyCompression");
+ Path dir = util.getDataTestDir("testColumnFamilySettings");
+ // Setup table descriptor
HTable table = Mockito.mock(HTable.class);
-
- Map<String, Compression.Algorithm> configuredCompression =
- new HashMap<String, Compression.Algorithm>();
- Compression.Algorithm[] supportedAlgos = getSupportedCompressionAlgorithms();
-
- int familyIndex = 0;
- for (byte[] family : FAMILIES) {
- configuredCompression.put(Bytes.toString(family),
- supportedAlgos[familyIndex++ % supportedAlgos.length]);
+ HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
+ Mockito.doReturn(htd).when(table).getTableDescriptor();
+ for (HColumnDescriptor hcd: this.util.generateColumnDescriptors()) {
+ htd.addFamily(hcd);
}
- setupMockColumnFamilies(table, configuredCompression);
// set up the table to return some mock keys
setupMockStartKeys(table);
@@ -576,7 +575,7 @@ public class TestHFileOutputFormat {
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
Job job = new Job(conf, "testLocalMRIncrementalLoad");
- job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilyCompression"));
+ job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job);
HFileOutputFormat.configureIncrementalLoad(job, table);
FileOutputFormat.setOutputPath(job, dir);
@@ -585,75 +584,45 @@ public class TestHFileOutputFormat {
writer = hof.getRecordWriter(context);
// write out random rows
- writeRandomKeyValues(writer, context, ROWSPERSPLIT);
+ writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
- FileSystem fileSystem = dir.getFileSystem(conf);
+ FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
- for (byte[] family : FAMILIES) {
- String familyStr = new String(family);
- boolean found = false;
- for (FileStatus f : fileSystem.listStatus(dir)) {
-
- if (Bytes.toString(family).equals(f.getPath().getName())) {
- // we found a matching directory
- found = true;
-
- // verify that the compression on this file matches the configured
- // compression
- Path dataFilePath = fileSystem.listStatus(f.getPath())[0].getPath();
- Reader reader = HFile.createReader(fileSystem, dataFilePath,
- new CacheConfig(conf));
- reader.loadFileInfo();
- assertEquals("Incorrect compression used for column family " + familyStr
- + "(reader: " + reader + ")",
- configuredCompression.get(familyStr), reader.getCompressionAlgorithm());
- break;
- }
- }
-
- if (!found) {
- fail("HFile for column family " + familyStr + " not found");
- }
+ FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
+ assertEquals(htd.getFamilies().size(), families.length);
+ for (FileStatus f : families) {
+ String familyStr = f.getPath().getName();
+ HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
+ // verify that the compression on this file matches the configured
+ // compression
+ Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
+ Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf));
+ Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+
+ byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
+ if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
+ assertEquals("Incorrect bloom filter used for column family " + familyStr +
+ "(reader: " + reader + ")",
+ hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
+ assertEquals("Incorrect compression used for column family " + familyStr +
+ "(reader: " + reader + ")", hcd.getCompression(), reader.getCompressionAlgorithm());
}
-
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
-
- /**
- * @return
- */
- private Compression.Algorithm[] getSupportedCompressionAlgorithms() {
- String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
- List<Compression.Algorithm> supportedAlgos = Lists.newArrayList();
-
- for (String algoName : allAlgos) {
- try {
- Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
- algo.getCompressor();
- supportedAlgos.add(algo);
- } catch (Throwable t) {
- // this algo is not available
- }
- }
-
- return supportedAlgos.toArray(new Compression.Algorithm[0]);
- }
-
-
/**
* Write random values to the writer assuming a table created using
* {@link #FAMILIES} as column family descriptors
*/
- private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context,
- int numRows)
+ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
+ TaskAttemptContext context, Set<byte[]> families, int numRows)
throws IOException, InterruptedException {
byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
int valLength = 10;
@@ -669,7 +638,7 @@ public class TestHFileOutputFormat {
random.nextBytes(valBytes);
ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
- for (byte[] family : TestHFileOutputFormat.FAMILIES) {
+ for (byte[] family : families) {
KeyValue kv = new KeyValue(keyBytes, family,
PerformanceEvaluation.QUALIFIER_NAME, valBytes);
writer.write(key, kv);