You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/02/25 06:59:27 UTC

svn commit: r747672 [4/4] - in /hadoop/hbase/trunk: conf/ src/java/org/apache/hadoop/hbase/io/ src/java/org/apache/hadoop/hbase/io/hfile/ src/java/org/apache/hadoop/hbase/regionserver/ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/...

Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,510 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
+import org.apache.hadoop.io.BytesWritable;
+
+/**
+ * test the performance for seek.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+public class TestHFileSeek extends TestCase { 
+  private MyOptions options;
+  private Configuration conf;
+  private Path path;
+  private FileSystem fs;
+  private NanoTimer timer;
+  private Random rng;
+  private RandomDistribution.DiscreteRNG keyLenGen;
+  private KVGenerator kvGen;
+
+  @Override
+  public void setUp() throws IOException {
+    if (options == null) {
+      options = new MyOptions(new String[0]);
+    }
+
+    conf = new Configuration();
+    conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
+    conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
+    path = new Path(new Path(options.rootDir), options.file);
+    fs = path.getFileSystem(conf);
+    timer = new NanoTimer(false);
+    rng = new Random(options.seed);
+    keyLenGen =
+        new RandomDistribution.Zipf(new Random(rng.nextLong()),
+            options.minKeyLen, options.maxKeyLen, 1.2);
+    RandomDistribution.DiscreteRNG valLenGen =
+        new RandomDistribution.Flat(new Random(rng.nextLong()),
+            options.minValLength, options.maxValLength);
+    RandomDistribution.DiscreteRNG wordLenGen =
+        new RandomDistribution.Flat(new Random(rng.nextLong()),
+            options.minWordLen, options.maxWordLen);
+    kvGen =
+        new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
+            options.dictSize);
+  }
+  
+  @Override
+  public void tearDown() {
+    try {
+      fs.close();
+    }
+    catch (Exception e) {
+      // Nothing
+    }
+  }
+  
+  private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
+    throws IOException {
+    if (fs.exists(name)) {
+      fs.delete(name, true);
+    }
+    FSDataOutputStream fout = fs.create(name);
+    return fout;
+  }
+
+  private void createTFile() throws IOException {
+    long totalBytes = 0;
+    FSDataOutputStream fout = createFSOutput(path, fs);
+    try {
+      Writer writer =
+          new Writer(fout, options.minBlockSize, options.compress, null);
+      try {
+        BytesWritable key = new BytesWritable();
+        BytesWritable val = new BytesWritable();
+        timer.start();
+        for (long i = 0; true; ++i) {
+          if (i % 1000 == 0) { // test the size for every 1000 rows.
+            if (fs.getFileStatus(path).getLen() >= options.fileSize) {
+              break;
+            }
+          }
+          kvGen.next(key, val, false);
+          byte [] k = new byte [key.getLength()];
+          System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
+          byte [] v = new byte [val.getLength()];
+          System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
+          writer.append(k, v);
+          totalBytes += key.getLength();
+          totalBytes += val.getLength();
+        }
+        timer.stop();
+      }
+      finally {
+        writer.close();
+      }
+    }
+    finally {
+      fout.close();
+    }
+    double duration = (double)timer.read()/1000; // in us.
+    long fsize = fs.getFileStatus(path).getLen();
+
+    System.out.printf(
+        "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
+        timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
+            / duration);
+    System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
+        timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
+  }
+  
+  public void seekTFile() throws IOException {
+    int miss = 0;
+    long totalBytes = 0;
+    FSDataInputStream fsdis = fs.open(path);
+    Reader reader =
+      new Reader(fsdis, fs.getFileStatus(path).getLen(), null);
+    reader.loadFileInfo();
+    System.out.println(reader);
+    KeySampler kSampler =
+        new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
+            keyLenGen);
+    HFileScanner scanner = reader.getScanner();
+    BytesWritable key = new BytesWritable();
+    BytesWritable val = new BytesWritable();
+    timer.reset();
+    timer.start();
+    for (int i = 0; i < options.seekCount; ++i) {
+      kSampler.next(key);
+      byte [] k = new byte [key.getLength()];
+      System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
+      if (scanner.seekTo(k) >= 0) {
+        ByteBuffer bbkey = scanner.getKey();
+        ByteBuffer bbval = scanner.getValue();
+        totalBytes += bbkey.limit();
+        totalBytes += bbval.limit();
+      }
+      else {
+        ++miss;
+      }
+    }
+    timer.stop();
+    double duration = (double) timer.read() / 1000; // in us.
+    System.out.printf(
+        "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
+        timer.toString(), NanoTimer.nanoTimeToString(timer.read()
+            / options.seekCount), options.seekCount - miss, miss,
+        (double) totalBytes / 1024 / (options.seekCount - miss));
+
+  }
+  
+  public void testSeeks() throws IOException {
+    if (options.doCreate()) {
+      createTFile();
+    }
+
+    if (options.doRead()) {
+      seekTFile();
+    }
+
+    if (options.doCreate()) {
+      fs.delete(path, true);
+    }
+  }
+  
+  private static class IntegerRange {
+    private final int from, to;
+
+    public IntegerRange(int from, int to) {
+      this.from = from;
+      this.to = to;
+    }
+
+    public static IntegerRange parse(String s) throws ParseException {
+      StringTokenizer st = new StringTokenizer(s, " \t,");
+      if (st.countTokens() != 2) {
+        throw new ParseException("Bad integer specification: " + s);
+      }
+      int from = Integer.parseInt(st.nextToken());
+      int to = Integer.parseInt(st.nextToken());
+      return new IntegerRange(from, to);
+    }
+
+    public int from() {
+      return from;
+    }
+
+    public int to() {
+      return to;
+    }
+  }
+
+  private static class MyOptions {
+    // hard coded constants
+    int dictSize = 1000;
+    int minWordLen = 5;
+    int maxWordLen = 20;
+    int osInputBufferSize = 64 * 1024;
+    int osOutputBufferSize = 64 * 1024;
+    int fsInputBufferSizeNone = 0;
+    int fsInputBufferSizeLzo = 0;
+    int fsInputBufferSizeGz = 0;
+    int fsOutputBufferSizeNone = 1;
+    int fsOutputBufferSizeLzo = 1;
+    int fsOutputBufferSizeGz = 1;
+   
+    String rootDir =
+        System.getProperty("test.build.data", "/tmp/TestTFileSeek");
+    String file = "TestTFileSeek";
+    // String compress = "lzo"; DISABLED
+    String compress = "none";
+    int minKeyLen = 10;
+    int maxKeyLen = 50;
+    int minValLength = 1024;
+    int maxValLength = 2 * 1024;
+    int minBlockSize = 1 * 1024 * 1024;
+    int fsOutputBufferSize = 1;
+    int fsInputBufferSize = 0;
+    // Default writing 10MB.
+    long fileSize = 10 * 1024 * 1024;
+    long seekCount = 1000;
+    long seed;
+
+    static final int OP_CREATE = 1;
+    static final int OP_READ = 2;
+    int op = OP_CREATE | OP_READ;
+
+    boolean proceed = false;
+
+    public MyOptions(String[] args) {
+      seed = System.nanoTime();
+
+      try {
+        Options opts = buildOptions();
+        CommandLineParser parser = new GnuParser();
+        CommandLine line = parser.parse(opts, args, true);
+        processOptions(line, opts);
+        validateOptions();
+      }
+      catch (ParseException e) {
+        System.out.println(e.getMessage());
+        System.out.println("Try \"--help\" option for details.");
+        setStopProceed();
+      }
+    }
+
+    public boolean proceed() {
+      return proceed;
+    }
+
+    private Options buildOptions() {
+      Option compress =
+          OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]")
+              .hasArg().withDescription("compression scheme").create('c');
+
+      Option fileSize =
+          OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
+              .hasArg().withDescription("target size of the file (in MB).")
+              .create('s');
+
+      Option fsInputBufferSz =
+          OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
+              .hasArg().withDescription(
+                  "size of the file system input buffer (in bytes).").create(
+                  'i');
+
+      Option fsOutputBufferSize =
+          OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
+              .hasArg().withDescription(
+                  "size of the file system output buffer (in bytes).").create(
+                  'o');
+
+      Option keyLen =
+          OptionBuilder
+              .withLongOpt("key-length")
+              .withArgName("min,max")
+              .hasArg()
+              .withDescription(
+                  "the length range of the key (in bytes)")
+              .create('k');
+
+      Option valueLen =
+          OptionBuilder
+              .withLongOpt("value-length")
+              .withArgName("min,max")
+              .hasArg()
+              .withDescription(
+                  "the length range of the value (in bytes)")
+              .create('v');
+
+      Option blockSz =
+          OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
+              .withDescription("minimum block size (in KB)").create('b');
+
+      Option seed =
+          OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg()
+              .withDescription("specify the seed").create('S');
+
+      Option operation =
+          OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
+              .withDescription(
+                  "action: seek-only, create-only, seek-after-create").create(
+                  'x');
+
+      Option rootDir =
+          OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
+              .withDescription(
+                  "specify root directory where files will be created.")
+              .create('r');
+
+      Option file =
+          OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
+              .withDescription("specify the file name to be created or read.")
+              .create('f');
+
+      Option seekCount =
+          OptionBuilder
+              .withLongOpt("seek")
+              .withArgName("count")
+              .hasArg()
+              .withDescription(
+                  "specify how many seek operations we perform (requires -x r or -x rw.")
+              .create('n');
+
+      Option help =
+          OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
+              "show this screen").create("h");
+
+      return new Options().addOption(compress).addOption(fileSize).addOption(
+          fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
+          .addOption(blockSz).addOption(rootDir).addOption(valueLen).addOption(
+              operation).addOption(seekCount).addOption(file).addOption(help);
+
+    }
+
+    private void processOptions(CommandLine line, Options opts)
+        throws ParseException {
+      // --help -h and --version -V must be processed first.
+      if (line.hasOption('h')) {
+        HelpFormatter formatter = new HelpFormatter();
+        System.out.println("TFile and SeqFile benchmark.");
+        System.out.println();
+        formatter.printHelp(100,
+            "java ... TestTFileSeqFileComparison [options]",
+            "\nSupported options:", opts, "");
+        return;
+      }
+
+      if (line.hasOption('c')) {
+        compress = line.getOptionValue('c');
+      }
+
+      if (line.hasOption('d')) {
+        dictSize = Integer.parseInt(line.getOptionValue('d'));
+      }
+
+      if (line.hasOption('s')) {
+        fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
+      }
+
+      if (line.hasOption('i')) {
+        fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
+      }
+
+      if (line.hasOption('o')) {
+        fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
+      }
+      
+      if (line.hasOption('n')) {
+        seekCount = Integer.parseInt(line.getOptionValue('n'));
+      }
+
+      if (line.hasOption('k')) {
+        IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
+        minKeyLen = ir.from();
+        maxKeyLen = ir.to();
+      }
+
+      if (line.hasOption('v')) {
+        IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
+        minValLength = ir.from();
+        maxValLength = ir.to();
+      }
+
+      if (line.hasOption('b')) {
+        minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
+      }
+
+      if (line.hasOption('r')) {
+        rootDir = line.getOptionValue('r');
+      }
+      
+      if (line.hasOption('f')) {
+        file = line.getOptionValue('f');
+      }
+
+      if (line.hasOption('S')) {
+        seed = Long.parseLong(line.getOptionValue('S'));
+      }
+
+      if (line.hasOption('x')) {
+        String strOp = line.getOptionValue('x');
+        if (strOp.equals("r")) {
+          op = OP_READ;
+        }
+        else if (strOp.equals("w")) {
+          op = OP_CREATE;
+        }
+        else if (strOp.equals("rw")) {
+          op = OP_CREATE | OP_READ;
+        }
+        else {
+          throw new ParseException("Unknown action specifier: " + strOp);
+        }
+      }
+
+      proceed = true;
+    }
+
+    private void validateOptions() throws ParseException {
+      if (!compress.equals("none") && !compress.equals("lzo")
+          && !compress.equals("gz")) {
+        throw new ParseException("Unknown compression scheme: " + compress);
+      }
+
+      if (minKeyLen >= maxKeyLen) {
+        throw new ParseException(
+            "Max key length must be greater than min key length.");
+      }
+
+      if (minValLength >= maxValLength) {
+        throw new ParseException(
+            "Max value length must be greater than min value length.");
+      }
+
+      if (minWordLen >= maxWordLen) {
+        throw new ParseException(
+            "Max word length must be greater than min word length.");
+      }
+      return;
+    }
+
+    private void setStopProceed() {
+      proceed = false;
+    }
+
+    public boolean doCreate() {
+      return (op & OP_CREATE) != 0;
+    }
+
+    public boolean doRead() {
+      return (op & OP_READ) != 0;
+    }
+  }
+  
+  public static void main(String[] argv) throws IOException {
+    TestHFileSeek testCase = new TestHFileSeek();
+    MyOptions options = new MyOptions(argv);
+    
+    if (options.proceed == false) {
+      return;
+    }
+
+    testCase.options = options;
+    testCase.setUp();
+    testCase.testSeeks();
+    testCase.tearDown();
+  }
+}

Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,146 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Test {@link HFileScanner#seekTo(byte[])} and its variants.
+ */
+public class TestSeekTo extends TestCase {
+  private static String ROOT_DIR =
+    System.getProperty("test.build.data", "/tmp/TestHFile");
+
+  private HBaseConfiguration conf;
+  private LocalFileSystem fs;
+
+  @Override
+  public void setUp() {
+    conf = new HBaseConfiguration();
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    rawLFS.setConf(conf);
+    fs = new LocalFileSystem(rawLFS);
+  }
+  private FSDataOutputStream createFSOutput(Path name) throws IOException {
+    if (fs.exists(name)) fs.delete(name, true);
+    FSDataOutputStream fout = fs.create(name);
+    return fout;
+  }
+
+  Path makeNewFile() throws IOException {
+    Path ncTFile = new Path(ROOT_DIR, "basic.hfile");
+    FSDataOutputStream fout = createFSOutput(ncTFile);
+    HFile.Writer writer = new HFile.Writer(fout, 40, "none", null);
+    // 4 bytes * 3 * 2 for each key/value +
+    // 3 for keys, 15 for values = 42 (woot)
+    writer.append(Bytes.toBytes("c"), Bytes.toBytes("value"));
+    writer.append(Bytes.toBytes("e"), Bytes.toBytes("value"));
+    writer.append(Bytes.toBytes("g"), Bytes.toBytes("value"));
+    // block transition
+    writer.append(Bytes.toBytes("i"), Bytes.toBytes("value"));
+    writer.append(Bytes.toBytes("k"), Bytes.toBytes("value"));
+    writer.close();
+    fout.close();
+    return ncTFile;
+  }
+  public void testSeekBefore() throws Exception {
+    Path p = makeNewFile();
+    HFile.Reader reader = new HFile.Reader(fs, p, null);
+    reader.loadFileInfo();
+    HFileScanner scanner = reader.getScanner();
+    assertEquals(false, scanner.seekBefore(Bytes.toBytes("a")));
+    
+    assertEquals(false, scanner.seekBefore(Bytes.toBytes("c")));
+    
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("d")));
+    assertEquals("c", scanner.getKeyString());
+    
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("e")));
+    assertEquals("c", scanner.getKeyString());
+    
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("f")));
+    assertEquals("e", scanner.getKeyString());
+    
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("g")));
+    assertEquals("e", scanner.getKeyString());
+    
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("h")));
+    assertEquals("g", scanner.getKeyString());
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("i")));
+    assertEquals("g", scanner.getKeyString());
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("j")));
+    assertEquals("i", scanner.getKeyString());
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("k")));
+    assertEquals("i", scanner.getKeyString());
+    assertEquals(true, scanner.seekBefore(Bytes.toBytes("l")));
+    assertEquals("k", scanner.getKeyString());
+  }
+  
+  public void testSeekTo() throws Exception {
+    Path p = makeNewFile();
+    HFile.Reader reader = new HFile.Reader(fs, p, null);
+    reader.loadFileInfo();
+    assertEquals(2, reader.blockIndex.count);
+    HFileScanner scanner = reader.getScanner();
+    // lies before the start of the file.
+    assertEquals(-1, scanner.seekTo(Bytes.toBytes("a")));
+  
+    assertEquals(1, scanner.seekTo(Bytes.toBytes("d")));
+    assertEquals("c", scanner.getKeyString());
+    
+    // Across a block boundary now.
+    assertEquals(1, scanner.seekTo(Bytes.toBytes("h")));
+    assertEquals("g", scanner.getKeyString());
+    
+    assertEquals(1, scanner.seekTo(Bytes.toBytes("l")));
+    assertEquals("k", scanner.getKeyString());
+  }
+  
+  public void testBlockContainingKey() throws Exception {
+    Path p = makeNewFile();
+    HFile.Reader reader = new HFile.Reader(fs, p, null);
+    reader.loadFileInfo();
+    System.out.println(reader.blockIndex.toString());
+    // falls before the start of the file.
+    assertEquals(-1, reader.blockIndex.blockContainingKey(Bytes.toBytes("a")));
+    assertEquals(0, reader.blockIndex.blockContainingKey(Bytes.toBytes("c")));
+    assertEquals(0, reader.blockIndex.blockContainingKey(Bytes.toBytes("d")));
+    assertEquals(0, reader.blockIndex.blockContainingKey(Bytes.toBytes("e")));
+    assertEquals(0, reader.blockIndex.blockContainingKey(Bytes.toBytes("g")));
+    assertEquals(0, reader.blockIndex.blockContainingKey(Bytes.toBytes("h")));
+    assertEquals(1, reader.blockIndex.blockContainingKey(Bytes.toBytes("i")));
+    assertEquals(1, reader.blockIndex.blockContainingKey(Bytes.toBytes("j")));
+    assertEquals(1, reader.blockIndex.blockContainingKey(Bytes.toBytes("k")));
+    assertEquals(1, reader.blockIndex.blockContainingKey(Bytes.toBytes("l")));
+
+
+    
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,295 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.io.Reference.Range;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+/**
+ * Test HStoreFile
+ */
+public class TestStoreFile extends HBaseTestCase {
+  static final Log LOG = LogFactory.getLog(TestStoreFile.class);
+  private MiniDFSCluster cluster;
+  
+  @Override
+  public void setUp() throws Exception {
+    try {
+      this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null);
+      // Set the hbase.rootdir to be the home directory in mini dfs.
+      this.conf.set(HConstants.HBASE_DIR,
+        this.cluster.getFileSystem().getHomeDirectory().toString());
+    } catch (IOException e) {
+      shutdownDfs(cluster);
+    }
+    super.setUp();
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+    shutdownDfs(cluster);
+    // ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
+    //  "Temporary end-of-test thread dump debugging HADOOP-2040: " + getName());
+  }
+
+  /**
+   * Write a file and then assert that we can read from top and bottom halves
+   * using two HalfMapFiles.
+   * @throws Exception
+   */
+  public void testBasicHalfMapFile() throws Exception {
+    // Make up a directory hierarchy that has a regiondir and familyname.
+    HFile.Writer writer = StoreFile.getWriter(this.fs,
+      new Path(new Path(this.testDir, "regionname"), "familyname"),
+      2 * 1024, null, null);
+    writeStoreFile(writer);
+    checkHalfHFile(new StoreFile(this.fs, writer.getPath()));
+  }
+
+  /*
+   * Writes HStoreKey and ImmutableBytes data to passed writer and
+   * then closes it.
+   * @param writer
+   * @throws IOException
+   */
+  private void writeStoreFile(final HFile.Writer writer)
+  throws IOException {
+    try {
+      for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
+        for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
+          byte[] b = new byte[] { (byte) d, (byte) e };
+          byte [] t = Bytes.toBytes(new String(b, HConstants.UTF8_ENCODING));
+          HStoreKey hsk = new HStoreKey(t, t, System.currentTimeMillis());
+          writer.append(hsk.getBytes(), t);
+        }
+      }
+    } finally {
+      writer.close();
+    }
+  }
+  
+  /**
+   * Test that our mechanism of writing store files in one region to reference
+   * store files in other regions works.
+   * @throws IOException
+   */
+  public void testReference()
+  throws IOException {
+    Path storedir = new Path(new Path(this.testDir, "regionname"), "familyname");
+    Path dir = new Path(storedir, "1234567890");
+    // Make a store file and write data to it.
+    HFile.Writer writer = StoreFile.getWriter(this.fs, dir, 8 * 1024, null, null);
+    writeStoreFile(writer);
+    StoreFile hsf = new StoreFile(this.fs, writer.getPath());
+    HFile.Reader reader = hsf.getReader();
+    // Split on a row, not in middle of row.  Midkey returned by reader
+    // may be in middle of row.  Create new one with empty column and
+    // timestamp.
+    HStoreKey hsk = HStoreKey.create(reader.midkey());
+    byte [] midkey = hsk.getRow();
+    hsk = HStoreKey.create(reader.getLastKey());
+    byte [] finalKey = hsk.getRow();
+    // Make a reference
+    Path refPath = StoreFile.split(fs, dir, hsf, reader.midkey(), Range.top);
+    StoreFile refHsf = new StoreFile(this.fs, refPath);
+    // Now confirm that I can read from the reference and that it only gets
+    // keys from top half of the file.
+    HFileScanner s = refHsf.getReader().getScanner();
+    for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
+      ByteBuffer bb = s.getKey();
+      hsk = HStoreKey.create(bb.array(), bb.arrayOffset(), bb.limit());
+      if (first) {
+        assertTrue(Bytes.equals(hsk.getRow(), midkey));
+        first = false;
+      }
+    }
+    assertTrue(Bytes.equals(hsk.getRow(), finalKey));
+  }
+
+  private void checkHalfHFile(final StoreFile f)
+  throws IOException {
+    byte [] midkey = f.getReader().midkey();
+    // Create top split.
+    Path topDir = Store.getStoreHomedir(this.testDir, 1,
+      Bytes.toBytes(f.getPath().getParent().getName()));
+    if (this.fs.exists(topDir)) {
+      this.fs.delete(topDir, true);
+    }
+    Path topPath = StoreFile.split(this.fs, topDir, f, midkey, Range.top);
+    // Create bottom split.
+    Path bottomDir = Store.getStoreHomedir(this.testDir, 2,
+      Bytes.toBytes(f.getPath().getParent().getName()));
+    if (this.fs.exists(bottomDir)) {
+      this.fs.delete(bottomDir, true);
+    }
+    Path bottomPath = StoreFile.split(this.fs, bottomDir,
+      f, midkey, Range.bottom);
+    // Make readers on top and bottom.
+    HFile.Reader top = new StoreFile(this.fs, topPath).getReader();
+    HFile.Reader bottom = new StoreFile(this.fs, bottomPath).getReader();
+    ByteBuffer previous = null;
+    LOG.info("Midkey: " + Bytes.toString(midkey));
+    byte [] midkeyBytes = new HStoreKey(midkey).getBytes();
+    ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkeyBytes);
+    try {
+      // Now make two HalfMapFiles and assert they can read the full backing
+      // file, one from the top and the other from the bottom.
+      // Test bottom half first.
+      // Now test reading from the top.
+      boolean first = true;
+      ByteBuffer key = null;
+      HFileScanner topScanner = top.getScanner();
+      while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
+          (topScanner.isSeeked() && topScanner.next())) {
+        key = topScanner.getKey();
+        
+        assertTrue(topScanner.getReader().getComparator().compare(key.array(),
+          key.arrayOffset(), key.limit(), midkeyBytes, 0, midkeyBytes.length) >= 0);
+        if (first) {
+          first = false;
+          LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
+        }
+      }
+      LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
+      
+      first = true;
+      HFileScanner bottomScanner = bottom.getScanner();
+      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
+          bottomScanner.next()) {
+        previous = bottomScanner.getKey();
+        key = bottomScanner.getKey();
+        if (first) {
+          first = false;
+          LOG.info("First in bottom: " +
+            Bytes.toString(Bytes.toBytes(previous)));
+        }
+        assertTrue(key.compareTo(bbMidkeyBytes) < 0);
+      }
+      if (previous != null) {
+        LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
+      }
+      // Remove references.
+      this.fs.delete(topPath, false);
+      this.fs.delete(bottomPath, false);
+
+      // Next test using a midkey that does not exist in the file.
+      // First, do a key that is < than first key. Ensure splits behave
+      // properly.
+      byte [] badmidkey = Bytes.toBytes("  .");
+      topPath = StoreFile.split(this.fs, topDir, f, badmidkey, Range.top);
+      bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
+        Range.bottom);
+      top = new StoreFile(this.fs, topPath).getReader();
+      bottom = new StoreFile(this.fs, bottomPath).getReader();
+      bottomScanner = bottom.getScanner();
+      int count = 0;
+      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
+          bottomScanner.next()) {
+        count++;
+      }
+      // When badkey is < than the bottom, should return no values.
+      assertTrue(count == 0);
+      // Now read from the top.
+      first = true;
+      topScanner = top.getScanner();
+      while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
+          topScanner.next()) {
+        key = topScanner.getKey();
+        assertTrue(topScanner.getReader().getComparator().compare(key.array(),
+          key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0);
+        if (first) {
+          first = false;
+          first = false;
+          HStoreKey keyhsk = HStoreKey.create(key);
+          LOG.info("First top when key < bottom: " + keyhsk);
+          String tmp = Bytes.toString(keyhsk.getRow());
+          for (int i = 0; i < tmp.length(); i++) {
+            assertTrue(tmp.charAt(i) == 'a');
+          }
+        }
+      }
+      HStoreKey keyhsk = HStoreKey.create(key);
+      LOG.info("Last top when key < bottom: " + keyhsk);
+      String tmp = Bytes.toString(keyhsk.getRow());
+      for (int i = 0; i < tmp.length(); i++) {
+        assertTrue(tmp.charAt(i) == 'z');
+      }
+      // Remove references.
+      this.fs.delete(topPath, false);
+      this.fs.delete(bottomPath, false);
+
+      // Test when badkey is > than last key in file ('||' > 'zz').
+      badmidkey = Bytes.toBytes("|||");
+      topPath = StoreFile.split(this.fs, topDir, f, badmidkey, Range.top);
+      bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
+        Range.bottom);
+      top = new StoreFile(this.fs, topPath).getReader();
+      bottom = new StoreFile(this.fs, bottomPath).getReader();
+      first = true;
+      bottomScanner = bottom.getScanner();
+      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
+          bottomScanner.next()) {
+        key = bottomScanner.getKey();
+        if (first) {
+          first = false;
+          keyhsk = HStoreKey.create(key);
+          LOG.info("First bottom when key > top: " + keyhsk);
+          tmp = Bytes.toString(keyhsk.getRow());
+          for (int i = 0; i < tmp.length(); i++) {
+            assertTrue(tmp.charAt(i) == 'a');
+          }
+        }
+      }
+      keyhsk = HStoreKey.create(key);
+      LOG.info("Last bottom when key > top: " + keyhsk);
+      for (int i = 0; i < tmp.length(); i++) {
+        assertTrue(Bytes.toString(keyhsk.getRow()).charAt(i) == 'z');
+      }
+      count = 0;
+      topScanner = top.getScanner();
+      while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
+          (topScanner.isSeeked() && topScanner.next())) {
+        count++;
+      }
+      // When badkey is < than the bottom, should return no values.
+      assertTrue(count == 0);
+    } finally {
+      if (top != null) {
+        top.close();
+      }
+      if (bottom != null) {
+        bottom.close();
+      }
+      fs.delete(f.getPath(), true);
+    }
+  }
+}
\ No newline at end of file