You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2013/03/22 21:02:52 UTC

svn commit: r1459961 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/io/hfile/ main/java/org/apache/hadoop/hbase/regionserver/ test/java/org/apache/hadoop/hbase/regionserver/

Author: larsh
Date: Fri Mar 22 20:02:51 2013
New Revision: 1459961

URL: http://svn.apache.org/r1459961
Log:
HBASE-8166 Avoid writing the memstoreTS into HFiles when possible

Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Compactor.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Fri Mar 22 20:02:51 2013
@@ -323,6 +323,7 @@ public class HFile {
     protected KeyComparator comparator;
     protected ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
     protected int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM;
+    protected boolean includeMVCCReadpoint = true;
 
     WriterFactory(Configuration conf, CacheConfig cacheConf) {
       this.conf = conf;
@@ -383,6 +384,11 @@ public class HFile {
       return this;
     }
 
+    public WriterFactory includeMVCCReadpoint(boolean includeMVCCReadpoint) {
+      this.includeMVCCReadpoint = includeMVCCReadpoint;
+      return this;
+    }
+
     public Writer create() throws IOException {
       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
         throw new AssertionError("Please specify exactly one of " +
@@ -391,8 +397,8 @@ public class HFile {
       if (path != null) {
         ostream = AbstractHFileWriter.createOutputStream(conf, fs, path);
       }
-      return createWriter(fs, path, ostream, blockSize,
-          compression, encoder, comparator, checksumType, bytesPerChecksum);
+      return createWriter(fs, path, ostream, blockSize, compression, encoder, comparator,
+          checksumType, bytesPerChecksum, includeMVCCReadpoint);
     }
 
     protected abstract Writer createWriter(FileSystem fs, Path path,
@@ -400,7 +406,7 @@ public class HFile {
         Compression.Algorithm compress,
         HFileDataBlockEncoder dataBlockEncoder,
         KeyComparator comparator, ChecksumType checksumType,
-        int bytesPerChecksum) throws IOException;
+        int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException;
   }
 
   /** The configuration key for HFile version to use for new files */

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java Fri Mar 22 20:02:51 2013
@@ -92,7 +92,7 @@ public class HFileWriterV1 extends Abstr
         FSDataOutputStream ostream, int blockSize,
         Algorithm compressAlgo, HFileDataBlockEncoder dataBlockEncoder,
         KeyComparator comparator, final ChecksumType checksumType,
-        final int bytesPerChecksum) throws IOException {
+        final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException {
       // version 1 does not implement checksums
       return new HFileWriterV1(conf, cacheConf, fs, path, ostream, blockSize,
           compressAlgo, dataBlockEncoder, comparator);

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java Fri Mar 22 20:02:51 2013
@@ -86,7 +86,7 @@ public class HFileWriterV2 extends Abstr
   private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
   private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM;
 
-  private final boolean includeMemstoreTS = true;
+  private final boolean includeMemstoreTS;
   private long maxMemstoreTS = 0;
 
   private int minorVersion = HFileReaderV2.MAX_MINOR_VERSION;
@@ -101,9 +101,9 @@ public class HFileWriterV2 extends Abstr
         FSDataOutputStream ostream, int blockSize,
         Compression.Algorithm compress, HFileDataBlockEncoder blockEncoder,
         final KeyComparator comparator, final ChecksumType checksumType,
-        final int bytesPerChecksum) throws IOException {
-      return new HFileWriterV2(conf, cacheConf, fs, path, ostream, blockSize,
-          compress, blockEncoder, comparator, checksumType, bytesPerChecksum);
+        final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException {
+      return new HFileWriterV2(conf, cacheConf, fs, path, ostream, blockSize, compress,
+          blockEncoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint);
     }
   }
 
@@ -112,13 +112,14 @@ public class HFileWriterV2 extends Abstr
       FileSystem fs, Path path, FSDataOutputStream ostream, int blockSize,
       Compression.Algorithm compressAlgo, HFileDataBlockEncoder blockEncoder,
       final KeyComparator comparator, final ChecksumType checksumType,
-      final int bytesPerChecksum) throws IOException {
+      final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException {
     super(cacheConf,
         ostream == null ? createOutputStream(conf, fs, path) : ostream,
         path, blockSize, compressAlgo, blockEncoder, comparator);
     SchemaMetrics.configureGlobally(conf);
     this.checksumType = checksumType;
     this.bytesPerChecksum = bytesPerChecksum;
+    this.includeMemstoreTS = includeMVCCReadpoint;
     if (!conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, false)) {
       this.minorVersion = 0;
     }

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Compactor.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Compactor.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Compactor.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Compactor.java Fri Mar 22 20:02:51 2013
@@ -22,6 +22,7 @@ import java.io.InterruptedIOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -32,6 +33,7 @@ import org.apache.hadoop.hbase.HConstant
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.hfile.Compression;
+import org.apache.hadoop.hbase.io.hfile.HFileWriterV2;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -84,6 +86,7 @@ class Compactor extends Configured {
     // Also calculate earliest put timestamp if major compaction
     int maxKeyCount = 0;
     long earliestPutTs = HConstants.LATEST_TIMESTAMP;
+    long maxMVCCReadpoint = 0;
 
     // pull out the interesting things from the CR for ease later
     final Store store = request.getStore();
@@ -102,11 +105,17 @@ class Compactor extends Configured {
           .getBloomFilterType()) ?
           r.getFilterEntries() : r.getEntries();
       maxKeyCount += keyCount;
+      // Calculate the maximum MVCC readpoint used in any of the involved files
+      Map<byte[], byte[]> fileInfo = r.loadFileInfo();
+      byte[] tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
+      if (tmp != null) {
+        maxMVCCReadpoint = Math.max(maxMVCCReadpoint, Bytes.toLong(tmp));
+      }
       // For major compactions calculate the earliest put timestamp
       // of all involved storefiles. This is used to remove 
       // family delete marker during the compaction.
       if (majorCompaction) {
-        byte[] tmp = r.loadFileInfo().get(StoreFile.EARLIEST_PUT_TS);
+        tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS);
         if (tmp == null) {
           // There's a file with no information, must be an old one
           // assume we have very old puts
@@ -183,7 +192,8 @@ class Compactor extends Configured {
         do {
           hasMore = scanner.next(kvs, compactionKVMax);
           if (writer == null && !kvs.isEmpty()) {
-            writer = store.createWriterInTmp(maxKeyCount, compactionCompression, true);
+            writer = store.createWriterInTmp(maxKeyCount, compactionCompression, true,
+                maxMVCCReadpoint >= smallestReadPoint);
           }
           if (writer != null) {
             // output to writer:

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Fri Mar 22 20:02:51 2013
@@ -901,7 +901,7 @@ public class Store extends SchemaConfigu
    */
   private StoreFile.Writer createWriterInTmp(int maxKeyCount)
   throws IOException {
-    return createWriterInTmp(maxKeyCount, this.family.getCompression(), false);
+    return createWriterInTmp(maxKeyCount, this.family.getCompression(), false, true);
   }
 
   /*
@@ -911,7 +911,7 @@ public class Store extends SchemaConfigu
    * @return Writer for a new StoreFile in the tmp dir.
    */
   public StoreFile.Writer createWriterInTmp(int maxKeyCount,
-    Compression.Algorithm compression, boolean isCompaction)
+    Compression.Algorithm compression, boolean isCompaction, boolean includeMVCCReadpoint)
   throws IOException {
     final CacheConfig writerCacheConf;
     if (isCompaction) {
@@ -931,6 +931,7 @@ public class Store extends SchemaConfigu
             .withChecksumType(checksumType)
             .withBytesPerChecksum(bytesPerChecksum)
             .withCompression(compression)
+            .includeMVCCReadpoint(includeMVCCReadpoint)
             .build();
     // The store file writer's path does not include the CF name, so we need
     // to configure the HFile writer directly.

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Fri Mar 22 20:02:51 2013
@@ -43,7 +43,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
-import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValue.KVComparator;
 import org.apache.hadoop.hbase.client.Scan;
@@ -741,6 +740,7 @@ public class StoreFile extends SchemaCon
     private Path filePath;
     private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
     private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM;
+    private boolean includeMVCCReadpoint = true;
 
     public WriterBuilder(Configuration conf, CacheConfig cacheConf,
         FileSystem fs, int blockSize) {
@@ -826,6 +826,15 @@ public class StoreFile extends SchemaCon
     }
 
     /**
+     * @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV
+     * @return this (for chained invocation)
+     */
+    public WriterBuilder includeMVCCReadpoint(boolean includeMVCCReadpoint) {
+      this.includeMVCCReadpoint = includeMVCCReadpoint;
+      return this;
+    }
+
+    /**
      * Create a store file writer. Client is responsible for closing file when
      * done. If metadata, add BEFORE closing using
      * {@link Writer#appendMetadata}.
@@ -859,7 +868,7 @@ public class StoreFile extends SchemaCon
       }
       return new Writer(fs, filePath, blockSize, compressAlgo, dataBlockEncoder,
           conf, cacheConf, comparator, bloomType, maxKeyCount, checksumType,
-          bytesPerChecksum);
+          bytesPerChecksum, includeMVCCReadpoint);
     }
   }
 
@@ -998,6 +1007,7 @@ public class StoreFile extends SchemaCon
      *        for Bloom filter size in {@link HFile} format version 1.
      * @param checksumType the checksum type
      * @param bytesPerChecksum the number of bytes per checksum value
+     * @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV
      * @throws IOException problem writing to FS
      */
     private Writer(FileSystem fs, Path path, int blocksize,
@@ -1005,7 +1015,7 @@ public class StoreFile extends SchemaCon
         HFileDataBlockEncoder dataBlockEncoder, final Configuration conf,
         CacheConfig cacheConf,
         final KVComparator comparator, BloomType bloomType, long maxKeys,
-        final ChecksumType checksumType, final int bytesPerChecksum)
+        final ChecksumType checksumType, final int bytesPerChecksum, boolean includeMVCCReadpoint)
         throws IOException {
       this.dataBlockEncoder = dataBlockEncoder != null ?
           dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
@@ -1017,6 +1027,7 @@ public class StoreFile extends SchemaCon
           .withComparator(comparator.getRawComparator())
           .withChecksumType(checksumType)
           .withBytesPerChecksum(bytesPerChecksum)
+          .includeMVCCReadpoint(includeMVCCReadpoint)
           .create();
 
       this.kvComparator = comparator;

Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java?rev=1459961&r1=1459960&r2=1459961&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java Fri Mar 22 20:02:51 2013
@@ -181,7 +181,7 @@ public class TestCacheOnWriteInSchema {
   public void testCacheOnWriteInSchema() throws IOException {
     // Write some random data into the store
     StoreFile.Writer writer = store.createWriterInTmp(Integer.MAX_VALUE,
-        Compression.Algorithm.NONE, false);
+        Compression.Algorithm.NONE, false, true);
     writeStoreFile(writer);
     writer.close();
     // Verify the block types of interest were cached on write