You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2015/04/04 00:26:40 UTC

[4/4] hbase git commit: Revert "HBASE-Squash HFileReaderV3 together with HFileReaderV2 and AbstractHFileReader; ditto for Scanners and BlockReader, etc." Revert because missing JIRA number

Revert "HBASE-Squash HFileReaderV3 together with HFileReaderV2 and AbstractHFileReader; ditto for Scanners and BlockReader, etc."
Revert because missing JIRA number

This reverts commit 691efc60f705de50055bf5c44911128648535110.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/da619282
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/da619282
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/da619282

Branch: refs/heads/branch-1
Commit: da619282469c65dcf6bee06783c4246a24a1517c
Parents: 691efc6
Author: stack <st...@apache.org>
Authored: Fri Apr 3 15:26:13 2015 -0700
Committer: stack <st...@apache.org>
Committed: Fri Apr 3 15:26:13 2015 -0700

----------------------------------------------------------------------
 .../IntegrationTestIngestWithEncryption.java    |    8 +-
 .../hbase/io/hfile/AbstractHFileReader.java     |  352 ++++
 .../hbase/io/hfile/AbstractHFileWriter.java     |  266 +++
 .../hadoop/hbase/io/hfile/FixedFileTrailer.java |    6 +-
 .../org/apache/hadoop/hbase/io/hfile/HFile.java |   42 +-
 .../hadoop/hbase/io/hfile/HFileBlock.java       |  110 +-
 .../hadoop/hbase/io/hfile/HFileBlockIndex.java  |   12 +-
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java  | 1688 ------------------
 .../hadoop/hbase/io/hfile/HFileReaderV2.java    | 1323 ++++++++++++++
 .../hadoop/hbase/io/hfile/HFileReaderV3.java    |  358 ++++
 .../hbase/io/hfile/HFileWriterFactory.java      |   40 -
 .../hadoop/hbase/io/hfile/HFileWriterImpl.java  |  641 -------
 .../hadoop/hbase/io/hfile/HFileWriterV2.java    |  424 +++++
 .../hadoop/hbase/io/hfile/HFileWriterV3.java    |  136 ++
 .../hbase/mapreduce/HFileOutputFormat2.java     |   13 +-
 .../hbase/regionserver/HRegionServer.java       |    2 -
 .../hadoop/hbase/regionserver/Region.java       |   20 +-
 .../hadoop/hbase/regionserver/StoreFile.java    |    3 +-
 .../regionserver/compactions/Compactor.java     |    4 +-
 .../hadoop/hbase/util/CompressionTest.java      |    4 +-
 .../hbase/HFilePerformanceEvaluation.java       |    4 +-
 .../hadoop/hbase/io/hfile/TestCacheOnWrite.java |   18 +-
 .../hbase/io/hfile/TestFixedFileTrailer.java    |    8 +-
 .../io/hfile/TestForceCacheImportantBlocks.java |    2 +
 .../apache/hadoop/hbase/io/hfile/TestHFile.java |    4 +-
 .../hbase/io/hfile/TestHFileBlockIndex.java     |    2 +-
 .../TestHFileInlineToRootChunkConversion.java   |    7 +-
 .../hadoop/hbase/io/hfile/TestHFileSeek.java    |    2 +-
 .../hbase/io/hfile/TestHFileWriterV2.java       |    9 +-
 .../hbase/io/hfile/TestHFileWriterV3.java       |    9 +-
 .../hfile/TestLazyDataBlockDecompression.java   |    5 +-
 .../hadoop/hbase/io/hfile/TestPrefetch.java     |    6 +-
 .../hadoop/hbase/io/hfile/TestReseekTo.java     |    2 +-
 .../hadoop/hbase/io/hfile/TestSeekTo.java       |   15 +-
 .../regionserver/DataBlockEncodingTool.java     |    7 +-
 .../regionserver/TestCacheOnWriteInSchema.java  |    3 +-
 36 files changed, 3027 insertions(+), 2528 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithEncryption.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithEncryption.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithEncryption.java
index 6464aad..dbaf5b8 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithEncryption.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithEncryption.java
@@ -24,9 +24,9 @@ import org.apache.hadoop.hbase.Waiter.Predicate;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
 import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileReaderV3;
+import org.apache.hadoop.hbase.io.hfile.HFileWriterV3;
 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
-import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;
-import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
 import org.apache.hadoop.hbase.wal.WAL.Reader;
 import org.apache.hadoop.hbase.wal.WALProvider.Writer;
 import org.apache.hadoop.hbase.regionserver.wal.SecureProtobufLogReader;
@@ -46,8 +46,8 @@ public class IntegrationTestIngestWithEncryption extends IntegrationTestIngest {
   static {
     // These log level changes are only useful when running on a localhost
     // cluster.
-    Logger.getLogger(HFileReaderImpl.class).setLevel(Level.TRACE);
-    Logger.getLogger(HFileWriterImpl.class).setLevel(Level.TRACE);
+    Logger.getLogger(HFileReaderV3.class).setLevel(Level.TRACE);
+    Logger.getLogger(HFileWriterV3.class).setLevel(Level.TRACE);
     Logger.getLogger(SecureProtobufLogReader.class).setLevel(Level.TRACE);
     Logger.getLogger(SecureProtobufLogWriter.class).setLevel(Level.TRACE);
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java
new file mode 100644
index 0000000..8c1e7b9
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java
@@ -0,0 +1,352 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
+
+/**
+ * Common functionality needed by all versions of {@link HFile} readers.
+ */
+@InterfaceAudience.Private
+@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
+public abstract class AbstractHFileReader
+    implements HFile.Reader, Configurable {
+  /** Stream to read from. Does checksum verifications in file system */
+  protected FSDataInputStream istream; // UUF_UNUSED_PUBLIC_OR_PROTECTED_FIELD
+
+  /** The file system stream of the underlying {@link HFile} that
+   * does not do checksum verification in the file system */
+  protected FSDataInputStream istreamNoFsChecksum;  // UUF_UNUSED_PUBLIC_OR_PROTECTED_FIELD
+
+  /** Data block index reader keeping the root data index in memory */
+  protected HFileBlockIndex.BlockIndexReader dataBlockIndexReader;
+
+  /** Meta block index reader -- always single level */
+  protected HFileBlockIndex.BlockIndexReader metaBlockIndexReader;
+
+  protected final FixedFileTrailer trailer;
+
+  /** Filled when we read in the trailer. */
+  protected final Compression.Algorithm compressAlgo;
+
+  /**
+   * What kind of data block encoding should be used while reading, writing,
+   * and handling cache.
+   */
+  protected HFileDataBlockEncoder dataBlockEncoder =
+      NoOpDataBlockEncoder.INSTANCE;
+
+  /** Last key in the file. Filled in when we read in the file info */
+  protected byte [] lastKey = null;
+
+  /** Average key length read from file info */
+  protected int avgKeyLen = -1;
+
+  /** Average value length read from file info */
+  protected int avgValueLen = -1;
+
+  /** Key comparator */
+  protected KVComparator comparator = new KVComparator();
+
+  /** Size of this file. */
+  protected final long fileSize;
+
+  /** Block cache configuration. */
+  protected final CacheConfig cacheConf;
+
+  /** Path of file */
+  protected final Path path;
+
+  /** File name to be used for block names */
+  protected final String name;
+
+  protected FileInfo fileInfo;
+
+  /** The filesystem used for accesing data */
+  protected HFileSystem hfs;
+
+  protected Configuration conf;
+
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
+  protected AbstractHFileReader(Path path, FixedFileTrailer trailer,
+      final long fileSize, final CacheConfig cacheConf, final HFileSystem hfs,
+      final Configuration conf) {
+    this.trailer = trailer;
+    this.compressAlgo = trailer.getCompressionCodec();
+    this.cacheConf = cacheConf;
+    this.fileSize = fileSize;
+    this.path = path;
+    this.name = path.getName();
+    this.hfs = hfs; // URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD
+    this.conf = conf;
+  }
+
+  @SuppressWarnings("serial")
+  public static class BlockIndexNotLoadedException
+      extends IllegalStateException {
+    public BlockIndexNotLoadedException() {
+      // Add a message in case anyone relies on it as opposed to class name.
+      super("Block index not loaded");
+    }
+  }
+
+  protected String toStringFirstKey() {
+    return KeyValue.keyToString(getFirstKey());
+  }
+
+  protected String toStringLastKey() {
+    return KeyValue.keyToString(getLastKey());
+  }
+
+  public abstract boolean isFileInfoLoaded();
+
+  @Override
+  public String toString() {
+    return "reader=" + path.toString() +
+        (!isFileInfoLoaded()? "":
+          ", compression=" + compressAlgo.getName() +
+          ", cacheConf=" + cacheConf +
+          ", firstKey=" + toStringFirstKey() +
+          ", lastKey=" + toStringLastKey()) +
+          ", avgKeyLen=" + avgKeyLen +
+          ", avgValueLen=" + avgValueLen +
+          ", entries=" + trailer.getEntryCount() +
+          ", length=" + fileSize;
+  }
+
+  @Override
+  public long length() {
+    return fileSize;
+  }
+
+  /**
+   * Create a Scanner on this file. No seeks or reads are done on creation. Call
+   * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
+   * nothing to clean up in a Scanner. Letting go of your references to the
+   * scanner is sufficient. NOTE: Do not use this overload of getScanner for
+   * compactions.
+   *
+   * @param cacheBlocks True if we should cache blocks read in by this scanner.
+   * @param pread Use positional read rather than seek+read if true (pread is
+   *          better for random reads, seek+read is better scanning).
+   * @return Scanner on this file.
+   */
+  @Override
+  public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) {
+    return getScanner(cacheBlocks, pread, false);
+  }
+
+  /**
+   * @return the first key in the file. May be null if file has no entries. Note
+   *         that this is not the first row key, but rather the byte form of the
+   *         first KeyValue.
+   */
+  @Override
+  public byte [] getFirstKey() {
+    if (dataBlockIndexReader == null) {
+      throw new BlockIndexNotLoadedException();
+    }
+    return dataBlockIndexReader.isEmpty() ? null
+        : dataBlockIndexReader.getRootBlockKey(0);
+  }
+
+  /**
+   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's
+   * patch goes in to eliminate {@link KeyValue} here.
+   *
+   * @return the first row key, or null if the file is empty.
+   */
+  @Override
+  public byte[] getFirstRowKey() {
+    byte[] firstKey = getFirstKey();
+    if (firstKey == null)
+      return null;
+    return KeyValue.createKeyValueFromKey(firstKey).getRow();
+  }
+
+  /**
+   * TODO left from {@link HFile} version 1: move this to StoreFile after
+   * Ryan's patch goes in to eliminate {@link KeyValue} here.
+   *
+   * @return the last row key, or null if the file is empty.
+   */
+  @Override
+  public byte[] getLastRowKey() {
+    byte[] lastKey = getLastKey();
+    if (lastKey == null)
+      return null;
+    return KeyValue.createKeyValueFromKey(lastKey).getRow();
+  }
+
+  /** @return number of KV entries in this HFile */
+  @Override
+  public long getEntries() {
+    return trailer.getEntryCount();
+  }
+
+  /** @return comparator */
+  @Override
+  public KVComparator getComparator() {
+    return comparator;
+  }
+
+  /** @return compression algorithm */
+  @Override
+  public Compression.Algorithm getCompressionAlgorithm() {
+    return compressAlgo;
+  }
+
+  /**
+   * @return the total heap size of data and meta block indexes in bytes. Does
+   *         not take into account non-root blocks of a multilevel data index.
+   */
+  public long indexSize() {
+    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
+        + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize()
+            : 0);
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public HFileBlockIndex.BlockIndexReader getDataBlockIndexReader() {
+    return dataBlockIndexReader;
+  }
+
+  @Override
+  public FixedFileTrailer getTrailer() {
+    return trailer;
+  }
+
+  @Override
+  public FileInfo loadFileInfo() throws IOException {
+    return fileInfo;
+  }
+
+  /**
+   * An exception thrown when an operation requiring a scanner to be seeked
+   * is invoked on a scanner that is not seeked.
+   */
+  @SuppressWarnings("serial")
+  public static class NotSeekedException extends IllegalStateException {
+    public NotSeekedException() {
+      super("Not seeked to a key/value");
+    }
+  }
+
+  protected static abstract class Scanner implements HFileScanner {
+    protected ByteBuffer blockBuffer;
+
+    protected boolean cacheBlocks;
+    protected final boolean pread;
+    protected final boolean isCompaction;
+
+    protected int currKeyLen;
+    protected int currValueLen;
+    protected int currMemstoreTSLen;
+    protected long currMemstoreTS;
+
+    protected int blockFetches;
+
+    protected final HFile.Reader reader;
+
+    public Scanner(final HFile.Reader reader, final boolean cacheBlocks,
+        final boolean pread, final boolean isCompaction) {
+      this.reader = reader;
+      this.cacheBlocks = cacheBlocks;
+      this.pread = pread;
+      this.isCompaction = isCompaction;
+    }
+
+    @Override
+    public boolean isSeeked(){
+      return blockBuffer != null;
+    }
+
+    @Override
+    public String toString() {
+      return "HFileScanner for reader " + String.valueOf(getReader());
+    }
+
+    protected void assertSeeked() {
+      if (!isSeeked())
+        throw new NotSeekedException();
+    }
+
+    @Override
+    public int seekTo(byte[] key) throws IOException {
+      return seekTo(key, 0, key.length);
+    }
+    
+    @Override
+    public boolean seekBefore(byte[] key) throws IOException {
+      return seekBefore(key, 0, key.length);
+    }
+    
+    @Override
+    public int reseekTo(byte[] key) throws IOException {
+      return reseekTo(key, 0, key.length);
+    }
+
+    @Override
+    public HFile.Reader getReader() {
+      return reader;
+    }
+  }
+
+  /** For testing */
+  abstract HFileBlock.FSReader getUncachedBlockReader();
+
+  public Path getPath() {
+    return path;
+  }
+
+  @Override
+  public DataBlockEncoding getDataBlockEncoding() {
+    return dataBlockEncoder.getDataBlockEncoding();
+  }
+
+  public abstract int getMajorVersion();
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
new file mode 100644
index 0000000..52491e6
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Common functionality needed by all versions of {@link HFile} writers.
+ */
+@InterfaceAudience.Private
+public abstract class AbstractHFileWriter implements HFile.Writer {
+
+  /** The Cell previously appended. Becomes the last cell in the file.*/
+  protected Cell lastCell = null;
+
+  /** FileSystem stream to write into. */
+  protected FSDataOutputStream outputStream;
+
+  /** True if we opened the <code>outputStream</code> (and so will close it). */
+  protected final boolean closeOutputStream;
+
+  /** A "file info" block: a key-value map of file-wide metadata. */
+  protected FileInfo fileInfo = new HFile.FileInfo();
+
+  /** Total # of key/value entries, i.e. how many times add() was called. */
+  protected long entryCount = 0;
+
+  /** Used for calculating the average key length. */
+  protected long totalKeyLength = 0;
+
+  /** Used for calculating the average value length. */
+  protected long totalValueLength = 0;
+
+  /** Total uncompressed bytes, maybe calculate a compression ratio later. */
+  protected long totalUncompressedBytes = 0;
+
+  /** Key comparator. Used to ensure we write in order. */
+  protected final KVComparator comparator;
+
+  /** Meta block names. */
+  protected List<byte[]> metaNames = new ArrayList<byte[]>();
+
+  /** {@link Writable}s representing meta block data. */
+  protected List<Writable> metaData = new ArrayList<Writable>();
+
+  /**
+   * First cell in a block.
+   * This reference should be short-lived since we write hfiles in a burst.
+   */
+  protected Cell firstCellInBlock = null;
+
+  /** May be null if we were passed a stream. */
+  protected final Path path;
+
+
+  /** Cache configuration for caching data on write. */
+  protected final CacheConfig cacheConf;
+
+  /**
+   * Name for this object used when logging or in toString. Is either
+   * the result of a toString on stream or else name of passed file Path.
+   */
+  protected final String name;
+
+  /**
+   * The data block encoding which will be used.
+   * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
+   */
+  protected final HFileDataBlockEncoder blockEncoder;
+  
+  protected final HFileContext hFileContext;
+
+  public AbstractHFileWriter(CacheConfig cacheConf,
+      FSDataOutputStream outputStream, Path path, 
+      KVComparator comparator, HFileContext fileContext) {
+    this.outputStream = outputStream;
+    this.path = path;
+    this.name = path != null ? path.getName() : outputStream.toString();
+    this.hFileContext = fileContext;
+    DataBlockEncoding encoding = hFileContext.getDataBlockEncoding();
+    if (encoding != DataBlockEncoding.NONE) {
+      this.blockEncoder = new HFileDataBlockEncoderImpl(encoding);
+    } else {
+      this.blockEncoder = NoOpDataBlockEncoder.INSTANCE;
+    }
+    this.comparator = comparator != null ? comparator
+        : KeyValue.COMPARATOR;
+
+    closeOutputStream = path != null;
+    this.cacheConf = cacheConf;
+  }
+
+  /**
+   * Add last bits of metadata to file info before it is written out.
+   */
+  protected void finishFileInfo() throws IOException {
+    if (lastCell != null) {
+      // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
+      // byte buffer. Won't take a tuple.
+      byte [] lastKey = CellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell);
+      fileInfo.append(FileInfo.LASTKEY, lastKey, false);
+    }
+
+    // Average key length.
+    int avgKeyLen =
+        entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount);
+    fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false);
+
+    // Average value length.
+    int avgValueLen =
+        entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
+    fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
+
+    fileInfo.append(FileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()),
+      false);
+  }
+
+  /**
+   * Add to the file info. All added key/value pairs can be obtained using
+   * {@link HFile.Reader#loadFileInfo()}.
+   *
+   * @param k Key
+   * @param v Value
+   * @throws IOException in case the key or the value are invalid
+   */
+  @Override
+  public void appendFileInfo(final byte[] k, final byte[] v)
+      throws IOException {
+    fileInfo.append(k, v, true);
+  }
+
+  /**
+   * Sets the file info offset in the trailer, finishes up populating fields in
+   * the file info, and writes the file info into the given data output. The
+   * reason the data output is not always {@link #outputStream} is that we store
+   * file info as a block in version 2.
+   *
+   * @param trailer fixed file trailer
+   * @param out the data output to write the file info to
+   * @throws IOException
+   */
+  protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out)
+  throws IOException {
+    trailer.setFileInfoOffset(outputStream.getPos());
+    finishFileInfo();
+    fileInfo.write(out);
+  }
+
+  /**
+   * Checks that the given Cell's key does not violate the key order.
+   *
+   * @param cell Cell whose key to check.
+   * @return true if the key is duplicate
+   * @throws IOException if the key or the key order is wrong
+   */
+  protected boolean checkKey(final Cell cell) throws IOException {
+    boolean isDuplicateKey = false;
+
+    if (cell == null) {
+      throw new IOException("Key cannot be null or empty");
+    }
+    if (lastCell != null) {
+      int keyComp = comparator.compareOnlyKeyPortion(lastCell, cell);
+
+      if (keyComp > 0) {
+        throw new IOException("Added a key not lexically larger than"
+            + " previous. Current cell = " + cell + ", lastCell = " + lastCell);
+      } else if (keyComp == 0) {
+        isDuplicateKey = true;
+      }
+    }
+    return isDuplicateKey;
+  }
+
+  /** Checks the given value for validity. */
+  protected void checkValue(final byte[] value, final int offset,
+      final int length) throws IOException {
+    if (value == null) {
+      throw new IOException("Value cannot be null");
+    }
+  }
+
+  /**
+   * @return Path or null if we were passed a stream rather than a Path.
+   */
+  @Override
+  public Path getPath() {
+    return path;
+  }
+
+  @Override
+  public String toString() {
+    return "writer=" + (path != null ? path.toString() : null) + ", name="
+        + name + ", compression=" + hFileContext.getCompression().getName();
+  }
+
+  /**
+   * Sets remaining trailer fields, writes the trailer to disk, and optionally
+   * closes the output stream.
+   */
+  protected void finishClose(FixedFileTrailer trailer) throws IOException {
+    trailer.setMetaIndexCount(metaNames.size());
+    trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize());
+    trailer.setEntryCount(entryCount);
+    trailer.setCompressionCodec(hFileContext.getCompression());
+
+    trailer.serialize(outputStream);
+
+    if (closeOutputStream) {
+      outputStream.close();
+      outputStream = null;
+    }
+  }
+
+  public static Compression.Algorithm compressionByName(String algoName) {
+    if (algoName == null)
+      return HFile.DEFAULT_COMPRESSION_ALGORITHM;
+    return Compression.getCompressionAlgorithmByName(algoName);
+  }
+
+  /** A helper method to create HFile output streams in constructors */
+  protected static FSDataOutputStream createOutputStream(Configuration conf,
+      FileSystem fs, Path path, InetSocketAddress[] favoredNodes) throws IOException {
+    FsPermission perms = FSUtils.getFilePermissions(fs, conf,
+        HConstants.DATA_FILE_UMASK_KEY);
+    return FSUtils.create(fs, path, perms, favoredNodes);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
index 3dcfc9b..56510f0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@@ -238,7 +238,7 @@ public class FixedFileTrailer {
     BlockType.TRAILER.readAndCheck(inputStream);
 
     if (majorVersion > 2
-        || (majorVersion == 2 && minorVersion >= HFileReaderImpl.PBUF_TRAILER_MINOR_VERSION)) {
+        || (majorVersion == 2 && minorVersion >= HFileReaderV2.PBUF_TRAILER_MINOR_VERSION)) {
       deserializeFromPB(inputStream);
     } else {
       deserializeFromWritable(inputStream);
@@ -611,9 +611,7 @@ public class FixedFileTrailer {
   }
 
   public byte[] getEncryptionKey() {
-    // This is a v3 feature but if reading a v2 file the encryptionKey will just be null which
-    // if fine for this feature.
-    expectAtLeastMajorVersion(2);
+    expectAtLeastMajorVersion(3);
     return encryptionKey;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index f7dff43..7e658e7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -66,7 +66,6 @@ import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.io.Writable;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 /**
@@ -197,8 +196,6 @@ public class HFile {
 
   /** API required to write an {@link HFile} */
   public interface Writer extends Closeable {
-    /** Max memstore (mvcc) timestamp in FileInfo */
-    public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
 
     /** Add an element to the file info map. */
     void appendFileInfo(byte[] key, byte[] value) throws IOException;
@@ -296,7 +293,7 @@ public class HFile {
             "filesystem/path or path");
       }
       if (path != null) {
-        ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
+        ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
       }
       return createWriter(fs, path, ostream,
                    comparator, fileContext);
@@ -335,12 +332,9 @@ public class HFile {
     int version = getFormatVersion(conf);
     switch (version) {
     case 2:
-      throw new IllegalArgumentException("This should never happen. " +
-        "Did you change hfile.format.version to read v2? This version of the software writes v3" +
-        " hfiles only (but it can read v2 files without having to update hfile.format.version " +
-        "in hbase-site.xml)");
+      return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
     case 3:
-      return new HFileWriterFactory(conf, cacheConf);
+      return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
     default:
       throw new IllegalArgumentException("Cannot create writer for HFile " +
           "format version " + version);
@@ -445,18 +439,6 @@ public class HFile {
      * Return the file context of the HFile this reader belongs to
      */
     HFileContext getFileContext();
-
-    boolean shouldIncludeMemstoreTS();
-
-    boolean isDecodeMemstoreTS();
-
-    DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
-
-    @VisibleForTesting
-    HFileBlock.FSReader getUncachedBlockReader();
-
-    @VisibleForTesting
-    boolean prefetchComplete();
   }
 
   /**
@@ -480,10 +462,9 @@ public class HFile {
       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
       switch (trailer.getMajorVersion()) {
       case 2:
-        LOG.debug("Opening HFile v2 with v3 reader");
-        // Fall through.
+        return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
       case 3 :
-        return new HFileReaderImpl(path, trailer, fsdis, size, cacheConf, hfs, conf);
+        return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
       default:
         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
       }
@@ -507,7 +488,6 @@ public class HFile {
    * @return A version specific Hfile Reader
    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
    */
-  @SuppressWarnings("resource")
   public static Reader createReader(FileSystem fs, Path path,
       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
       throws IOException {
@@ -873,18 +853,6 @@ public class HFile {
     }
   }
 
-
-  public static void checkHFileVersion(final Configuration c) {
-    int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
-    if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
-      throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY +
-        " (in your hbase-*.xml files) is " + version + " which does not match " +
-        MAX_FORMAT_VERSION +
-        "; are you running with a configuration from an older or newer hbase install (an " +
-        "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
-    }
-  }
-
   public static void main(String[] args) throws Exception {
     // delegate to preserve old behavior
     HFilePrettyPrinter.main(args);

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 022e579..b8303b8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -1257,40 +1257,13 @@ public class HFileBlock implements Cacheable {
 
     /** Get the default decoder for blocks from this file. */
     HFileBlockDecodingContext getDefaultBlockDecodingContext();
-
-    void setIncludesMemstoreTS(boolean includesMemstoreTS);
-    void setDataBlockEncoder(HFileDataBlockEncoder encoder);
   }
 
   /**
-   * We always prefetch the header of the next block, so that we know its
-   * on-disk size in advance and can read it in one operation.
+   * A common implementation of some methods of {@link FSReader} and some
+   * tools for implementing HFile format version-specific block readers.
    */
-  private static class PrefetchedHeader {
-    long offset = -1;
-    byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
-    final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
-  }
-
-  /** Reads version 2 blocks from the filesystem. */
-  static class FSReaderImpl implements FSReader {
-    /** The file system stream of the underlying {@link HFile} that
-     * does or doesn't do checksum validations in the filesystem */
-    protected FSDataInputStreamWrapper streamWrapper;
-
-    private HFileBlockDecodingContext encodedBlockDecodingCtx;
-
-    /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
-    private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
-
-    private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
-        new ThreadLocal<PrefetchedHeader>() {
-      @Override
-      public PrefetchedHeader initialValue() {
-        return new PrefetchedHeader();
-      }
-    };
-
+  private abstract static class AbstractFSReader implements FSReader {
     /** Compression algorithm used by the {@link HFile} */
 
     /** The size of the file we are reading from, or -1 if unknown. */
@@ -1312,31 +1285,18 @@ public class HFileBlock implements Cacheable {
 
     protected HFileContext fileContext;
 
-    public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
-        HFileContext fileContext) throws IOException {
+    public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext)
+        throws IOException {
       this.fileSize = fileSize;
       this.hfs = hfs;
       this.path = path;
       this.fileContext = fileContext;
       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
-
-      this.streamWrapper = stream;
-      // Older versions of HBase didn't support checksum.
-      this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
-      defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
-      encodedBlockDecodingCtx = defaultDecodingCtx;
     }
 
-    /**
-     * A constructor that reads files with the latest minor version.
-     * This is used by unit tests only.
-     */
-    FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
-    throws IOException {
-      this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
-    }
-
-    public BlockIterator blockRange(final long startOffset, final long endOffset) {
+    @Override
+    public BlockIterator blockRange(final long startOffset,
+        final long endOffset) {
       final FSReader owner = this; // handle for inner class
       return new BlockIterator() {
         private long offset = startOffset;
@@ -1433,6 +1393,56 @@ public class HFileBlock implements Cacheable {
       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
     }
 
+  }
+
+  /**
+   * We always prefetch the header of the next block, so that we know its
+   * on-disk size in advance and can read it in one operation.
+   */
+  private static class PrefetchedHeader {
+    long offset = -1;
+    byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
+    final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
+  }
+
+  /** Reads version 2 blocks from the filesystem. */
+  static class FSReaderImpl extends AbstractFSReader {
+    /** The file system stream of the underlying {@link HFile} that
+     * does or doesn't do checksum validations in the filesystem */
+    protected FSDataInputStreamWrapper streamWrapper;
+
+    private HFileBlockDecodingContext encodedBlockDecodingCtx;
+
+    /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
+    private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
+
+    private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
+        new ThreadLocal<PrefetchedHeader>() {
+          @Override
+          public PrefetchedHeader initialValue() {
+            return new PrefetchedHeader();
+          }
+        };
+
+    public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
+        HFileContext fileContext) throws IOException {
+      super(fileSize, hfs, path, fileContext);
+      this.streamWrapper = stream;
+      // Older versions of HBase didn't support checksum.
+      this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
+      defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
+      encodedBlockDecodingCtx = defaultDecodingCtx;
+    }
+
+    /**
+     * A constructor that reads files with the latest minor version.
+     * This is used by unit tests only.
+     */
+    FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
+    throws IOException {
+      this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
+    }
+
     /**
      * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as
      * little memory allocation as possible, using the provided on-disk size.
@@ -1673,11 +1683,11 @@ public class HFileBlock implements Cacheable {
       return b;
     }
 
-    public void setIncludesMemstoreTS(boolean includesMemstoreTS) {
+    void setIncludesMemstoreTS(boolean includesMemstoreTS) {
       this.fileContext.setIncludesMvcc(includesMemstoreTS);
     }
 
-    public void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
+    void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/da619282/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index 9132d2f..d656cae 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -54,9 +54,9 @@ import org.apache.hadoop.util.StringUtils;
  * ({@link BlockIndexReader}) single-level and multi-level block indexes.
  *
  * Examples of how to use the block index writer can be found in
- * {@link org.apache.hadoop.hbase.util.CompoundBloomFilterWriter} and
- *  {@link HFileWriterImpl}. Examples of how to use the reader can be
- *  found in {@link HFileWriterImpl} and TestHFileBlockIndex.
+ * {@link org.apache.hadoop.hbase.util.CompoundBloomFilterWriter} 
+ * and {@link HFileWriterV2}. Examples of how to use the reader can be 
+ * found in {@link HFileReaderV2} and TestHFileBlockIndex.
  */
 @InterfaceAudience.Private
 public class HFileBlockIndex {
@@ -193,7 +193,7 @@ public class HFileBlockIndex {
      * Return the BlockWithScanInfo which contains the DataBlock with other scan
      * info such as nextIndexedKey. This function will only be called when the
      * HFile version is larger than 1.
-     *
+     * 
      * @param key
      *          the key we are looking for
      * @param currentBlock
@@ -494,7 +494,7 @@ public class HFileBlockIndex {
      * Performs a binary search over a non-root level index block. Utilizes the
      * secondary index, which records the offsets of (offset, onDiskSize,
      * firstKey) tuples of all entries.
-     *
+     * 
      * @param key
      *          the key we are searching for offsets to individual entries in
      *          the blockIndex buffer
@@ -641,7 +641,7 @@ public class HFileBlockIndex {
         }
       }
     }
-
+    
     /**
      * Read in the root-level index from the given input stream. Must match
      * what was written into the root level by