You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2009/07/11 03:22:15 UTC
svn commit: r793163 [4/4] - in /hadoop/common/branches/branch-0.20: ./
src/core/org/apache/hadoop/io/file/
src/core/org/apache/hadoop/io/file/tfile/ src/test/
src/test/org/apache/hadoop/io/file/ src/test/org/apache/hadoop/io/file/tfile/
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+
+public class TestTFileNoneCodecsByteArrays extends TestTFileByteArrays {
+ /**
+ * Test non-compression codec, using the same test cases as in the ByteArrays.
+ */
+ @Override
+ public void setUp() throws IOException {
+ init(Compression.Algorithm.NONE.getName(), "memcmp", "TFileTestCodecsNone",
+ 24, 24);
+ super.setUp();
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.WritableComparator;
+
+/**
+ *
+ * Byte arrays test case class using GZ compression codec, base class of none
+ * and LZO compression classes.
+ *
+ */
+
+public class TestTFileNoneCodecsJClassComparatorByteArrays extends TestTFileByteArrays {
+ /**
+ * Test non-compression codec, using the same test cases as in the ByteArrays.
+ */
+ @Override
+ public void setUp() throws IOException {
+ init(Compression.Algorithm.NONE.getName(),
+ "jclass: org.apache.hadoop.io.file.tfile.MyComparator",
+ "TestTFileNoneCodecsJClassComparatorByteArrays", 24, 24);
+ super.setUp();
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+
+public class TestTFileNoneCodecsStreams extends TestTFileStreams {
+ /**
+ * Test non-compression codec, using the same test cases as in the ByteArrays.
+ */
+ @Override
+ public void setUp() throws IOException {
+ init(Compression.Algorithm.NONE.getName(), "memcmp", "TFileTestCodecsNone");
+ super.setUp();
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,504 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.io.file.tfile.TFile.Reader;
+import org.apache.hadoop.io.file.tfile.TFile.Writer;
+import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner;
+
+/**
+ * test the performance for seek.
+ *
+ */
+public class TestTFileSeek extends TestCase {
+ private MyOptions options;
+ private Configuration conf;
+ private Path path;
+ private FileSystem fs;
+ private NanoTimer timer;
+ private Random rng;
+ private DiscreteRNG keyLenGen;
+ private KVGenerator kvGen;
+
+ @Override
+ public void setUp() throws IOException {
+ if (options == null) {
+ options = new MyOptions(new String[0]);
+ }
+
+ conf = new Configuration();
+ conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
+ conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
+ path = new Path(new Path(options.rootDir), options.file);
+ fs = path.getFileSystem(conf);
+ timer = new NanoTimer(false);
+ rng = new Random(options.seed);
+ keyLenGen =
+ new RandomDistribution.Zipf(new Random(rng.nextLong()),
+ options.minKeyLen, options.maxKeyLen, 1.2);
+ DiscreteRNG valLenGen =
+ new RandomDistribution.Flat(new Random(rng.nextLong()),
+ options.minValLength, options.maxValLength);
+ DiscreteRNG wordLenGen =
+ new RandomDistribution.Flat(new Random(rng.nextLong()),
+ options.minWordLen, options.maxWordLen);
+ kvGen =
+ new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
+ options.dictSize);
+ }
+
+ @Override
+ public void tearDown() throws IOException {
+ fs.delete(path, true);
+ }
+
+ private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
+ throws IOException {
+ if (fs.exists(name)) {
+ fs.delete(name, true);
+ }
+ FSDataOutputStream fout = fs.create(name);
+ return fout;
+ }
+
+ private void createTFile() throws IOException {
+ long totalBytes = 0;
+ FSDataOutputStream fout = createFSOutput(path, fs);
+ try {
+ Writer writer =
+ new Writer(fout, options.minBlockSize, options.compress, "memcmp",
+ conf);
+ try {
+ BytesWritable key = new BytesWritable();
+ BytesWritable val = new BytesWritable();
+ timer.start();
+ for (long i = 0; true; ++i) {
+ if (i % 1000 == 0) { // test the size for every 1000 rows.
+ if (fs.getFileStatus(path).getLen() >= options.fileSize) {
+ break;
+ }
+ }
+ kvGen.next(key, val, false);
+ writer.append(key.get(), 0, key.getSize(), val.get(), 0, val
+ .getSize());
+ totalBytes += key.getSize();
+ totalBytes += val.getSize();
+ }
+ timer.stop();
+ }
+ finally {
+ writer.close();
+ }
+ }
+ finally {
+ fout.close();
+ }
+ double duration = (double)timer.read()/1000; // in us.
+ long fsize = fs.getFileStatus(path).getLen();
+
+ System.out.printf(
+ "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
+ timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
+ / duration);
+ System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
+ timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
+ }
+
+ public void seekTFile() throws IOException {
+ int miss = 0;
+ long totalBytes = 0;
+ FSDataInputStream fsdis = fs.open(path);
+ Reader reader =
+ new Reader(fsdis, fs.getFileStatus(path).getLen(), conf);
+ KeySampler kSampler =
+ new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
+ keyLenGen);
+ Scanner scanner = reader.createScanner();
+ BytesWritable key = new BytesWritable();
+ BytesWritable val = new BytesWritable();
+ timer.reset();
+ timer.start();
+ for (int i = 0; i < options.seekCount; ++i) {
+ kSampler.next(key);
+ scanner.lowerBound(key.get(), 0, key.getSize());
+ if (!scanner.atEnd()) {
+ scanner.entry().get(key, val);
+ totalBytes += key.getSize();
+ totalBytes += val.getSize();
+ }
+ else {
+ ++miss;
+ }
+ }
+ timer.stop();
+ double duration = (double) timer.read() / 1000; // in us.
+ System.out.printf(
+ "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
+ timer.toString(), NanoTimer.nanoTimeToString(timer.read()
+ / options.seekCount), options.seekCount - miss, miss,
+ (double) totalBytes / 1024 / (options.seekCount - miss));
+
+ }
+
+ public void testSeeks() throws IOException {
+ String[] supported = TFile.getSupportedCompressionAlgorithms();
+ boolean proceed = false;
+ for (String c : supported) {
+ if (c.equals(options.compress)) {
+ proceed = true;
+ break;
+ }
+ }
+
+ if (!proceed) {
+ System.out.println("Skipped for " + options.compress);
+ return;
+ }
+
+ if (options.doCreate()) {
+ createTFile();
+ }
+
+ if (options.doRead()) {
+ seekTFile();
+ }
+ }
+
+ private static class IntegerRange {
+ private final int from, to;
+
+ public IntegerRange(int from, int to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ public static IntegerRange parse(String s) throws ParseException {
+ StringTokenizer st = new StringTokenizer(s, " \t,");
+ if (st.countTokens() != 2) {
+ throw new ParseException("Bad integer specification: " + s);
+ }
+ int from = Integer.parseInt(st.nextToken());
+ int to = Integer.parseInt(st.nextToken());
+ return new IntegerRange(from, to);
+ }
+
+ public int from() {
+ return from;
+ }
+
+ public int to() {
+ return to;
+ }
+ }
+
+ private static class MyOptions {
+ // hard coded constants
+ int dictSize = 1000;
+ int minWordLen = 5;
+ int maxWordLen = 20;
+ int osInputBufferSize = 64 * 1024;
+ int osOutputBufferSize = 64 * 1024;
+ int fsInputBufferSizeNone = 0;
+ int fsInputBufferSizeLzo = 0;
+ int fsInputBufferSizeGz = 0;
+ int fsOutputBufferSizeNone = 1;
+ int fsOutputBufferSizeLzo = 1;
+ int fsOutputBufferSizeGz = 1;
+
+ String rootDir =
+ System.getProperty("test.build.data", "/tmp/tfile-test");
+ String file = "TestTFileSeek";
+ String compress = "gz";
+ int minKeyLen = 10;
+ int maxKeyLen = 50;
+ int minValLength = 100;
+ int maxValLength = 200;
+ int minBlockSize = 64 * 1024;
+ int fsOutputBufferSize = 1;
+ int fsInputBufferSize = 0;
+ long fileSize = 3 * 1024 * 1024;
+ long seekCount = 1000;
+ long seed;
+
+ static final int OP_CREATE = 1;
+ static final int OP_READ = 2;
+ int op = OP_CREATE | OP_READ;
+
+ boolean proceed = false;
+
+ public MyOptions(String[] args) {
+ seed = System.nanoTime();
+
+ try {
+ Options opts = buildOptions();
+ CommandLineParser parser = new GnuParser();
+ CommandLine line = parser.parse(opts, args, true);
+ processOptions(line, opts);
+ validateOptions();
+ }
+ catch (ParseException e) {
+ System.out.println(e.getMessage());
+ System.out.println("Try \"--help\" option for details.");
+ setStopProceed();
+ }
+ }
+
+ public boolean proceed() {
+ return proceed;
+ }
+
+ private Options buildOptions() {
+ Option compress =
+ OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]")
+ .hasArg().withDescription("compression scheme").create('c');
+
+ Option fileSize =
+ OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
+ .hasArg().withDescription("target size of the file (in MB).")
+ .create('s');
+
+ Option fsInputBufferSz =
+ OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
+ .hasArg().withDescription(
+ "size of the file system input buffer (in bytes).").create(
+ 'i');
+
+ Option fsOutputBufferSize =
+ OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
+ .hasArg().withDescription(
+ "size of the file system output buffer (in bytes).").create(
+ 'o');
+
+ Option keyLen =
+ OptionBuilder
+ .withLongOpt("key-length")
+ .withArgName("min,max")
+ .hasArg()
+ .withDescription(
+ "the length range of the key (in bytes)")
+ .create('k');
+
+ Option valueLen =
+ OptionBuilder
+ .withLongOpt("value-length")
+ .withArgName("min,max")
+ .hasArg()
+ .withDescription(
+ "the length range of the value (in bytes)")
+ .create('v');
+
+ Option blockSz =
+ OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
+ .withDescription("minimum block size (in KB)").create('b');
+
+ Option seed =
+ OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg()
+ .withDescription("specify the seed").create('S');
+
+ Option operation =
+ OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
+ .withDescription(
+ "action: seek-only, create-only, seek-after-create").create(
+ 'x');
+
+ Option rootDir =
+ OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
+ .withDescription(
+ "specify root directory where files will be created.")
+ .create('r');
+
+ Option file =
+ OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
+ .withDescription("specify the file name to be created or read.")
+ .create('f');
+
+ Option seekCount =
+ OptionBuilder
+ .withLongOpt("seek")
+ .withArgName("count")
+ .hasArg()
+ .withDescription(
+ "specify how many seek operations we perform (requires -x r or -x rw.")
+ .create('n');
+
+ Option help =
+ OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
+ "show this screen").create("h");
+
+ return new Options().addOption(compress).addOption(fileSize).addOption(
+ fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
+ .addOption(blockSz).addOption(rootDir).addOption(valueLen).addOption(
+ operation).addOption(seekCount).addOption(file).addOption(help);
+
+ }
+
+ private void processOptions(CommandLine line, Options opts)
+ throws ParseException {
+ // --help -h and --version -V must be processed first.
+ if (line.hasOption('h')) {
+ HelpFormatter formatter = new HelpFormatter();
+ System.out.println("TFile and SeqFile benchmark.");
+ System.out.println();
+ formatter.printHelp(100,
+ "java ... TestTFileSeqFileComparison [options]",
+ "\nSupported options:", opts, "");
+ return;
+ }
+
+ if (line.hasOption('c')) {
+ compress = line.getOptionValue('c');
+ }
+
+ if (line.hasOption('d')) {
+ dictSize = Integer.parseInt(line.getOptionValue('d'));
+ }
+
+ if (line.hasOption('s')) {
+ fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
+ }
+
+ if (line.hasOption('i')) {
+ fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
+ }
+
+ if (line.hasOption('o')) {
+ fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
+ }
+
+ if (line.hasOption('n')) {
+ seekCount = Integer.parseInt(line.getOptionValue('n'));
+ }
+
+ if (line.hasOption('k')) {
+ IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
+ minKeyLen = ir.from();
+ maxKeyLen = ir.to();
+ }
+
+ if (line.hasOption('v')) {
+ IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
+ minValLength = ir.from();
+ maxValLength = ir.to();
+ }
+
+ if (line.hasOption('b')) {
+ minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
+ }
+
+ if (line.hasOption('r')) {
+ rootDir = line.getOptionValue('r');
+ }
+
+ if (line.hasOption('f')) {
+ file = line.getOptionValue('f');
+ }
+
+ if (line.hasOption('S')) {
+ seed = Long.parseLong(line.getOptionValue('S'));
+ }
+
+ if (line.hasOption('x')) {
+ String strOp = line.getOptionValue('x');
+ if (strOp.equals("r")) {
+ op = OP_READ;
+ }
+ else if (strOp.equals("w")) {
+ op = OP_CREATE;
+ }
+ else if (strOp.equals("rw")) {
+ op = OP_CREATE | OP_READ;
+ }
+ else {
+ throw new ParseException("Unknown action specifier: " + strOp);
+ }
+ }
+
+ proceed = true;
+ }
+
+ private void validateOptions() throws ParseException {
+ if (!compress.equals("none") && !compress.equals("lzo")
+ && !compress.equals("gz")) {
+ throw new ParseException("Unknown compression scheme: " + compress);
+ }
+
+ if (minKeyLen >= maxKeyLen) {
+ throw new ParseException(
+ "Max key length must be greater than min key length.");
+ }
+
+ if (minValLength >= maxValLength) {
+ throw new ParseException(
+ "Max value length must be greater than min value length.");
+ }
+
+ if (minWordLen >= maxWordLen) {
+ throw new ParseException(
+ "Max word length must be greater than min word length.");
+ }
+ return;
+ }
+
+ private void setStopProceed() {
+ proceed = false;
+ }
+
+ public boolean doCreate() {
+ return (op & OP_CREATE) != 0;
+ }
+
+ public boolean doRead() {
+ return (op & OP_READ) != 0;
+ }
+ }
+
+ public static void main(String[] argv) throws IOException {
+ TestTFileSeek testCase = new TestTFileSeek();
+ MyOptions options = new MyOptions(argv);
+
+ if (options.proceed == false) {
+ return;
+ }
+
+ testCase.options = options;
+ testCase.setUp();
+ testCase.testSeeks();
+ testCase.tearDown();
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,782 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner.Entry;
+
+public class TestTFileSeqFileComparison extends TestCase {
+ MyOptions options;
+
+ private FileSystem fs;
+ private Configuration conf;
+ private long startTimeEpoch;
+ private long finishTimeEpoch;
+ private DateFormat formatter;
+ byte[][] dictionary;
+
+ @Override
+ public void setUp() throws IOException {
+ if (options == null) {
+ options = new MyOptions(new String[0]);
+ }
+
+ conf = new Configuration();
+ conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
+ conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
+ Path path = new Path(options.rootDir);
+ fs = path.getFileSystem(conf);
+ formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ setUpDictionary();
+ }
+
+ private void setUpDictionary() {
+ Random rng = new Random();
+ dictionary = new byte[options.dictSize][];
+ for (int i = 0; i < options.dictSize; ++i) {
+ int len =
+ rng.nextInt(options.maxWordLen - options.minWordLen)
+ + options.minWordLen;
+ dictionary[i] = new byte[len];
+ rng.nextBytes(dictionary[i]);
+ }
+ }
+
+ @Override
+ public void tearDown() throws IOException {
+ // do nothing
+ }
+
+ public void startTime() throws IOException {
+ startTimeEpoch = System.currentTimeMillis();
+ System.out.println(formatTime() + " Started timing.");
+ }
+
+ public void stopTime() throws IOException {
+ finishTimeEpoch = System.currentTimeMillis();
+ System.out.println(formatTime() + " Stopped timing.");
+ }
+
+ public long getIntervalMillis() throws IOException {
+ return finishTimeEpoch - startTimeEpoch;
+ }
+
+ public void printlnWithTimestamp(String message) throws IOException {
+ System.out.println(formatTime() + " " + message);
+ }
+
+ /*
+ * Format millis into minutes and seconds.
+ */
+ public String formatTime(long milis) {
+ return formatter.format(milis);
+ }
+
+ public String formatTime() {
+ return formatTime(System.currentTimeMillis());
+ }
+
+ private interface KVAppendable {
+ public void append(BytesWritable key, BytesWritable value)
+ throws IOException;
+
+ public void close() throws IOException;
+ }
+
+ private interface KVReadable {
+ public byte[] getKey();
+
+ public byte[] getValue();
+
+ public int getKeyLength();
+
+ public int getValueLength();
+
+ public boolean next() throws IOException;
+
+ public void close() throws IOException;
+ }
+
+ static class TFileAppendable implements KVAppendable {
+ private FSDataOutputStream fsdos;
+ private TFile.Writer writer;
+
+ public TFileAppendable(FileSystem fs, Path path, String compress,
+ int minBlkSize, int osBufferSize, Configuration conf)
+ throws IOException {
+ this.fsdos = fs.create(path, true, osBufferSize);
+ this.writer = new TFile.Writer(fsdos, minBlkSize, compress, null, conf);
+ }
+
+ public void append(BytesWritable key, BytesWritable value)
+ throws IOException {
+ writer.append(key.get(), 0, key.getSize(), value.get(), 0, value
+ .getSize());
+ }
+
+ public void close() throws IOException {
+ writer.close();
+ fsdos.close();
+ }
+ }
+
+ static class TFileReadable implements KVReadable {
+ private FSDataInputStream fsdis;
+ private TFile.Reader reader;
+ private TFile.Reader.Scanner scanner;
+ private byte[] keyBuffer;
+ private int keyLength;
+ private byte[] valueBuffer;
+ private int valueLength;
+
+ public TFileReadable(FileSystem fs, Path path, int osBufferSize,
+ Configuration conf) throws IOException {
+ this.fsdis = fs.open(path, osBufferSize);
+ this.reader =
+ new TFile.Reader(fsdis, fs.getFileStatus(path).getLen(), conf);
+ this.scanner = reader.createScanner();
+ keyBuffer = new byte[32];
+ valueBuffer = new byte[32];
+ }
+
+ private void checkKeyBuffer(int size) {
+ if (size <= keyBuffer.length) {
+ return;
+ }
+ keyBuffer =
+ new byte[Math.max(2 * keyBuffer.length, 2 * size - keyBuffer.length)];
+ }
+
+ private void checkValueBuffer(int size) {
+ if (size <= valueBuffer.length) {
+ return;
+ }
+ valueBuffer =
+ new byte[Math.max(2 * valueBuffer.length, 2 * size
+ - valueBuffer.length)];
+ }
+
+ public byte[] getKey() {
+ return keyBuffer;
+ }
+
+ public int getKeyLength() {
+ return keyLength;
+ }
+
+ public byte[] getValue() {
+ return valueBuffer;
+ }
+
+ public int getValueLength() {
+ return valueLength;
+ }
+
+ public boolean next() throws IOException {
+ if (scanner.atEnd()) return false;
+ Entry entry = scanner.entry();
+ keyLength = entry.getKeyLength();
+ checkKeyBuffer(keyLength);
+ entry.getKey(keyBuffer);
+ valueLength = entry.getValueLength();
+ checkValueBuffer(valueLength);
+ entry.getValue(valueBuffer);
+ scanner.advance();
+ return true;
+ }
+
+ public void close() throws IOException {
+ scanner.close();
+ reader.close();
+ fsdis.close();
+ }
+ }
+
+ static class SeqFileAppendable implements KVAppendable {
+ private FSDataOutputStream fsdos;
+ private SequenceFile.Writer writer;
+
+ public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize,
+ String compress, int minBlkSize) throws IOException {
+ Configuration conf = new Configuration();
+ conf.setBoolean("hadoop.native.lib", true);
+
+ CompressionCodec codec = null;
+ if ("lzo".equals(compress)) {
+ codec = Compression.Algorithm.LZO.getCodec();
+ }
+ else if ("gz".equals(compress)) {
+ codec = Compression.Algorithm.GZ.getCodec();
+ }
+ else if (!"none".equals(compress))
+ throw new IOException("Codec not supported.");
+
+ this.fsdos = fs.create(path, true, osBufferSize);
+
+ if (!"none".equals(compress)) {
+ writer =
+ SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
+ BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
+ }
+ else {
+ writer =
+ SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
+ BytesWritable.class, SequenceFile.CompressionType.NONE, null);
+ }
+ }
+
+ public void append(BytesWritable key, BytesWritable value)
+ throws IOException {
+ writer.append(key, value);
+ }
+
+ public void close() throws IOException {
+ writer.close();
+ fsdos.close();
+ }
+ }
+
+ static class SeqFileReadable implements KVReadable {
+ private SequenceFile.Reader reader;
+ private BytesWritable key;
+ private BytesWritable value;
+
+ public SeqFileReadable(FileSystem fs, Path path, int osBufferSize)
+ throws IOException {
+ Configuration conf = new Configuration();
+ conf.setInt("io.file.buffer.size", osBufferSize);
+ reader = new SequenceFile.Reader(fs, path, conf);
+ key = new BytesWritable();
+ value = new BytesWritable();
+ }
+
+ public byte[] getKey() {
+ return key.get();
+ }
+
+ public int getKeyLength() {
+ return key.getSize();
+ }
+
+ public byte[] getValue() {
+ return value.get();
+ }
+
+ public int getValueLength() {
+ return value.getSize();
+ }
+
+ public boolean next() throws IOException {
+ return reader.next(key, value);
+ }
+
+ public void close() throws IOException {
+ reader.close();
+ }
+ }
+
+ private void reportStats(Path path, long totalBytes) throws IOException {
+ long duration = getIntervalMillis();
+ long fsize = fs.getFileStatus(path).getLen();
+ printlnWithTimestamp(String.format(
+ "Duration: %dms...total size: %.2fMB...raw thrpt: %.2fMB/s", duration,
+ (double) totalBytes / 1024 / 1024, (double) totalBytes / duration
+ * 1000 / 1024 / 1024));
+ printlnWithTimestamp(String.format(
+ "Compressed size: %.2fMB...compressed thrpt: %.2fMB/s.",
+ (double) fsize / 1024 / 1024, (double) fsize / duration * 1000 / 1024
+ / 1024));
+ }
+
+ private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) {
+ int n = 0;
+ while (n < len) {
+ byte[] word = dictionary[rng.nextInt(dictionary.length)];
+ int l = Math.min(word.length, len - n);
+ System.arraycopy(word, 0, tmp, n, l);
+ n += l;
+ }
+ bw.set(tmp, 0, len);
+ }
+
+ private void timeWrite(Path path, KVAppendable appendable, int baseKlen,
+ int baseVlen, long fileSize) throws IOException {
+ int maxKlen = baseKlen * 2;
+ int maxVlen = baseVlen * 2;
+ BytesWritable key = new BytesWritable();
+ BytesWritable value = new BytesWritable();
+ byte[] keyBuffer = new byte[maxKlen];
+ byte[] valueBuffer = new byte[maxVlen];
+ Random rng = new Random(options.seed);
+ long totalBytes = 0;
+ printlnWithTimestamp("Start writing: " + path.getName() + "...");
+ startTime();
+
+ for (long i = 0; true; ++i) {
+ if (i % 1000 == 0) { // test the size for every 1000 rows.
+ if (fs.getFileStatus(path).getLen() >= fileSize) {
+ break;
+ }
+ }
+ int klen = rng.nextInt(baseKlen) + baseKlen;
+ int vlen = rng.nextInt(baseVlen) + baseVlen;
+ fillBuffer(rng, key, keyBuffer, klen);
+ fillBuffer(rng, value, valueBuffer, vlen);
+ key.set(keyBuffer, 0, klen);
+ value.set(valueBuffer, 0, vlen);
+ appendable.append(key, value);
+ totalBytes += klen;
+ totalBytes += vlen;
+ }
+ stopTime();
+ appendable.close();
+ reportStats(path, totalBytes);
+ }
+
+ private void timeRead(Path path, KVReadable readable) throws IOException {
+ printlnWithTimestamp("Start reading: " + path.getName() + "...");
+ long totalBytes = 0;
+ startTime();
+ for (; readable.next();) {
+ totalBytes += readable.getKeyLength();
+ totalBytes += readable.getValueLength();
+ }
+ stopTime();
+ readable.close();
+ reportStats(path, totalBytes);
+ }
+
+ private void createTFile(String parameters, String compress)
+ throws IOException {
+ System.out.println("=== TFile: Creation (" + parameters + ") === ");
+ Path path = new Path(options.rootDir, "TFile.Performance");
+ KVAppendable appendable =
+ new TFileAppendable(fs, path, compress, options.minBlockSize,
+ options.osOutputBufferSize, conf);
+ timeWrite(path, appendable, options.keyLength, options.valueLength,
+ options.fileSize);
+ }
+
+ private void readTFile(String parameters, boolean delFile) throws IOException {
+ System.out.println("=== TFile: Reading (" + parameters + ") === ");
+ {
+ Path path = new Path(options.rootDir, "TFile.Performance");
+ KVReadable readable =
+ new TFileReadable(fs, path, options.osInputBufferSize, conf);
+ timeRead(path, readable);
+ if (delFile) {
+ if (fs.exists(path)) {
+ fs.delete(path, true);
+ }
+ }
+ }
+ }
+
+ private void createSeqFile(String parameters, String compress)
+ throws IOException {
+ System.out.println("=== SeqFile: Creation (" + parameters + ") === ");
+ Path path = new Path(options.rootDir, "SeqFile.Performance");
+ KVAppendable appendable =
+ new SeqFileAppendable(fs, path, options.osOutputBufferSize, compress,
+ options.minBlockSize);
+ timeWrite(path, appendable, options.keyLength, options.valueLength,
+ options.fileSize);
+ }
+
+ private void readSeqFile(String parameters, boolean delFile)
+ throws IOException {
+ System.out.println("=== SeqFile: Reading (" + parameters + ") === ");
+ Path path = new Path(options.rootDir, "SeqFile.Performance");
+ KVReadable readable =
+ new SeqFileReadable(fs, path, options.osInputBufferSize);
+ timeRead(path, readable);
+ if (delFile) {
+ if (fs.exists(path)) {
+ fs.delete(path, true);
+ }
+ }
+ }
+
+ private void compareRun(String compress) throws IOException {
+ String[] supported = TFile.getSupportedCompressionAlgorithms();
+ boolean proceed = false;
+ for (String c : supported) {
+ if (c.equals(compress)) {
+ proceed = true;
+ break;
+ }
+ }
+
+ if (!proceed) {
+ System.out.println("Skipped for " + compress);
+ return;
+ }
+
+ options.compress = compress;
+ String parameters = parameters2String(options);
+ createSeqFile(parameters, compress);
+ readSeqFile(parameters, true);
+ createTFile(parameters, compress);
+ readTFile(parameters, true);
+ createTFile(parameters, compress);
+ readTFile(parameters, true);
+ createSeqFile(parameters, compress);
+ readSeqFile(parameters, true);
+ }
+
+ public void testRunComparisons() throws IOException {
+ String[] compresses = new String[] { "none", "lzo", "gz" };
+ for (String compress : compresses) {
+ if (compress.equals("none")) {
+ conf
+ .setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeNone);
+ conf.setInt("tfile.fs.output.buffer.size",
+ options.fsOutputBufferSizeNone);
+ }
+ else if (compress.equals("lzo")) {
+ conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeLzo);
+ conf.setInt("tfile.fs.output.buffer.size",
+ options.fsOutputBufferSizeLzo);
+ }
+ else {
+ conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeGz);
+ conf
+ .setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSizeGz);
+ }
+ compareRun(compress);
+ }
+ }
+
+ private static String parameters2String(MyOptions options) {
+ return String
+ .format(
+ "KLEN: %d-%d... VLEN: %d-%d...MinBlkSize: %.2fKB...Target Size: %.2fMB...Compression: ...%s",
+ options.keyLength, options.keyLength * 2, options.valueLength,
+ options.valueLength * 2, (double) options.minBlockSize / 1024,
+ (double) options.fileSize / 1024 / 1024, options.compress);
+ }
+
+ private static class MyOptions {
+ String rootDir =
+ System
+ .getProperty("test.build.data", "/tmp/tfile-test");
+ String compress = "gz";
+ String format = "tfile";
+ int dictSize = 1000;
+ int minWordLen = 5;
+ int maxWordLen = 20;
+ int keyLength = 50;
+ int valueLength = 100;
+ int minBlockSize = 256 * 1024;
+ int fsOutputBufferSize = 1;
+ int fsInputBufferSize = 0;
+ // special variable only for unit testing.
+ int fsInputBufferSizeNone = 0;
+ int fsInputBufferSizeGz = 0;
+ int fsInputBufferSizeLzo = 0;
+ int fsOutputBufferSizeNone = 1;
+ int fsOutputBufferSizeGz = 1;
+ int fsOutputBufferSizeLzo = 1;
+
+ // un-exposed parameters.
+ int osInputBufferSize = 64 * 1024;
+ int osOutputBufferSize = 64 * 1024;
+
+ long fileSize = 3 * 1024 * 1024;
+ long seed;
+
+ static final int OP_CREATE = 1;
+ static final int OP_READ = 2;
+ int op = OP_READ;
+
+ boolean proceed = false;
+
+ public MyOptions(String[] args) {
+ seed = System.nanoTime();
+
+ try {
+ Options opts = buildOptions();
+ CommandLineParser parser = new GnuParser();
+ CommandLine line = parser.parse(opts, args, true);
+ processOptions(line, opts);
+ validateOptions();
+ }
+ catch (ParseException e) {
+ System.out.println(e.getMessage());
+ System.out.println("Try \"--help\" option for details.");
+ setStopProceed();
+ }
+ }
+
+ public boolean proceed() {
+ return proceed;
+ }
+
+ private Options buildOptions() {
+ Option compress =
+ OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]")
+ .hasArg().withDescription("compression scheme").create('c');
+
+ Option ditSize =
+ OptionBuilder.withLongOpt("dict").withArgName("size").hasArg()
+ .withDescription("number of dictionary entries").create('d');
+
+ Option fileSize =
+ OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
+ .hasArg().withDescription("target size of the file (in MB).")
+ .create('s');
+
+ Option format =
+ OptionBuilder.withLongOpt("format").withArgName("[tfile|seqfile]")
+ .hasArg().withDescription("choose TFile or SeqFile").create('f');
+
+ Option fsInputBufferSz =
+ OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
+ .hasArg().withDescription(
+ "size of the file system input buffer (in bytes).").create(
+ 'i');
+
+ Option fsOutputBufferSize =
+ OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
+ .hasArg().withDescription(
+ "size of the file system output buffer (in bytes).").create(
+ 'o');
+
+ Option keyLen =
+ OptionBuilder
+ .withLongOpt("key-length")
+ .withArgName("length")
+ .hasArg()
+ .withDescription(
+ "base length of the key (in bytes), actual length varies in [base, 2*base)")
+ .create('k');
+
+ Option valueLen =
+ OptionBuilder
+ .withLongOpt("value-length")
+ .withArgName("length")
+ .hasArg()
+ .withDescription(
+ "base length of the value (in bytes), actual length varies in [base, 2*base)")
+ .create('v');
+
+ Option wordLen =
+ OptionBuilder.withLongOpt("word-length").withArgName("min,max")
+ .hasArg().withDescription(
+ "range of dictionary word length (in bytes)").create('w');
+
+ Option blockSz =
+ OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
+ .withDescription("minimum block size (in KB)").create('b');
+
+ Option seed =
+ OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg()
+ .withDescription("specify the seed").create('S');
+
+ Option operation =
+ OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
+ .withDescription(
+ "action: read-only, create-only, read-after-create").create(
+ 'x');
+
+ Option rootDir =
+ OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
+ .withDescription(
+ "specify root directory where files will be created.")
+ .create('r');
+
+ Option help =
+ OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
+ "show this screen").create("h");
+
+ return new Options().addOption(compress).addOption(ditSize).addOption(
+ fileSize).addOption(format).addOption(fsInputBufferSz).addOption(
+ fsOutputBufferSize).addOption(keyLen).addOption(wordLen).addOption(
+ blockSz).addOption(rootDir).addOption(valueLen).addOption(operation)
+ .addOption(help);
+
+ }
+
+ private void processOptions(CommandLine line, Options opts)
+ throws ParseException {
+ // --help -h and --version -V must be processed first.
+ if (line.hasOption('h')) {
+ HelpFormatter formatter = new HelpFormatter();
+ System.out.println("TFile and SeqFile benchmark.");
+ System.out.println();
+ formatter.printHelp(100,
+ "java ... TestTFileSeqFileComparison [options]",
+ "\nSupported options:", opts, "");
+ return;
+ }
+
+ if (line.hasOption('c')) {
+ compress = line.getOptionValue('c');
+ }
+
+ if (line.hasOption('d')) {
+ dictSize = Integer.parseInt(line.getOptionValue('d'));
+ }
+
+ if (line.hasOption('s')) {
+ fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
+ }
+
+ if (line.hasOption('f')) {
+ format = line.getOptionValue('f');
+ }
+
+ if (line.hasOption('i')) {
+ fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
+ }
+
+ if (line.hasOption('o')) {
+ fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
+ }
+
+ if (line.hasOption('k')) {
+ keyLength = Integer.parseInt(line.getOptionValue('k'));
+ }
+
+ if (line.hasOption('v')) {
+ valueLength = Integer.parseInt(line.getOptionValue('v'));
+ }
+
+ if (line.hasOption('b')) {
+ minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
+ }
+
+ if (line.hasOption('r')) {
+ rootDir = line.getOptionValue('r');
+ }
+
+ if (line.hasOption('S')) {
+ seed = Long.parseLong(line.getOptionValue('S'));
+ }
+
+ if (line.hasOption('w')) {
+ String min_max = line.getOptionValue('w');
+ StringTokenizer st = new StringTokenizer(min_max, " \t,");
+ if (st.countTokens() != 2) {
+ throw new ParseException("Bad word length specification: " + min_max);
+ }
+ minWordLen = Integer.parseInt(st.nextToken());
+ maxWordLen = Integer.parseInt(st.nextToken());
+ }
+
+ if (line.hasOption('x')) {
+ String strOp = line.getOptionValue('x');
+ if (strOp.equals("r")) {
+ op = OP_READ;
+ }
+ else if (strOp.equals("w")) {
+ op = OP_CREATE;
+ }
+ else if (strOp.equals("rw")) {
+ op = OP_CREATE | OP_READ;
+ }
+ else {
+ throw new ParseException("Unknown action specifier: " + strOp);
+ }
+ }
+
+ proceed = true;
+ }
+
+ private void validateOptions() throws ParseException {
+ if (!compress.equals("none") && !compress.equals("lzo")
+ && !compress.equals("gz")) {
+ throw new ParseException("Unknown compression scheme: " + compress);
+ }
+
+ if (!format.equals("tfile") && !format.equals("seqfile")) {
+ throw new ParseException("Unknown file format: " + format);
+ }
+
+ if (minWordLen >= maxWordLen) {
+ throw new ParseException(
+ "Max word length must be greater than min word length.");
+ }
+ return;
+ }
+
+ private void setStopProceed() {
+ proceed = false;
+ }
+
+ public boolean doCreate() {
+ return (op & OP_CREATE) != 0;
+ }
+
+ public boolean doRead() {
+ return (op & OP_READ) != 0;
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+ TestTFileSeqFileComparison testCase = new TestTFileSeqFileComparison();
+ MyOptions options = new MyOptions(args);
+ if (options.proceed == false) {
+ return;
+ }
+ testCase.options = options;
+ String parameters = parameters2String(options);
+
+ testCase.setUp();
+ if (testCase.options.format.equals("tfile")) {
+ if (options.doCreate()) {
+ testCase.createTFile(parameters, options.compress);
+ }
+ if (options.doRead()) {
+ testCase.readTFile(parameters, options.doCreate());
+ }
+ }
+ else {
+ if (options.doCreate()) {
+ testCase.createSeqFile(parameters, options.compress);
+ }
+ if (options.doRead()) {
+ testCase.readSeqFile(parameters, options.doCreate());
+ }
+ }
+ testCase.tearDown();
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.file.tfile.TFile.Reader;
+import org.apache.hadoop.io.file.tfile.TFile.Writer;
+import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner;
+
+public class TestTFileSplit extends TestCase {
+ private static String ROOT =
+ System.getProperty("test.build.data", "/tmp/tfile-test");
+
+ private final static int BLOCK_SIZE = 64 * 1024;
+
+ private static final String KEY = "key";
+ private static final String VALUE = "value";
+
+ private FileSystem fs;
+ private Configuration conf;
+ private Path path;
+
+ private String comparator = "memcmp";
+ private String outputFile = "TestTFileSplit";
+
+ void createFile(int count, String compress) throws IOException {
+ conf = new Configuration();
+ path = new Path(ROOT, outputFile + "." + compress);
+ fs = path.getFileSystem(conf);
+ FSDataOutputStream out = fs.create(path);
+ Writer writer = new Writer(out, BLOCK_SIZE, compress, comparator, conf);
+
+ int nx;
+ for (nx = 0; nx < count; nx++) {
+ byte[] key = composeSortedKey(KEY, count, nx).getBytes();
+ byte[] value = (VALUE + nx).getBytes();
+ writer.append(key, value);
+ }
+ writer.close();
+ out.close();
+ }
+
+ void readFile() throws IOException {
+ long fileLength = fs.getFileStatus(path).getLen();
+ int numSplit = 10;
+ long splitSize = fileLength / numSplit + 1;
+
+ Reader reader =
+ new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ long offset = 0;
+ long rowCount = 0;
+ BytesWritable key, value;
+ for (int i = 0; i < numSplit; ++i, offset += splitSize) {
+ Scanner scanner = reader.createScanner(offset, splitSize);
+ int count = 0;
+ key = new BytesWritable();
+ value = new BytesWritable();
+ while (!scanner.atEnd()) {
+ scanner.entry().get(key, value);
+ ++count;
+ scanner.advance();
+ }
+ scanner.close();
+ Assert.assertTrue(count > 0);
+ rowCount += count;
+ }
+ Assert.assertEquals(rowCount, reader.getEntryCount());
+ reader.close();
+ }
+
+ static String composeSortedKey(String prefix, int total, int value) {
+ return String.format("%s%010d", prefix, value);
+ }
+
+ public void testSplit() throws IOException {
+ System.out.println("testSplit");
+ createFile(100000, Compression.Algorithm.NONE.getName());
+ readFile();
+ fs.delete(path, true);
+ createFile(500000, Compression.Algorithm.GZ.getName());
+ readFile();
+ fs.delete(path, true);
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,423 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Random;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.file.tfile.TFile.Reader;
+import org.apache.hadoop.io.file.tfile.TFile.Writer;
+import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner;
+
+/**
+ *
+ * Streaming interfaces test case class using GZ compression codec, base class
+ * of none and LZO compression classes.
+ *
+ */
+
+public class TestTFileStreams extends TestCase {
+ private static String ROOT =
+ System.getProperty("test.build.data", "/tmp/tfile-test");
+
+ private final static int BLOCK_SIZE = 512;
+ private final static int K = 1024;
+ private final static int M = K * K;
+ protected boolean skip = false;
+ private FileSystem fs;
+ private Configuration conf;
+ private Path path;
+ private FSDataOutputStream out;
+ Writer writer;
+
+ private String compression = Compression.Algorithm.GZ.getName();
+ private String comparator = "memcmp";
+ private String outputFile = "TFileTestStreams";
+
+ public void init(String compression, String comparator, String outputFile) {
+ this.compression = compression;
+ this.comparator = comparator;
+ this.outputFile = outputFile;
+ }
+
+ @Override
+ public void setUp() throws IOException {
+ conf = new Configuration();
+ path = new Path(ROOT, outputFile);
+ fs = path.getFileSystem(conf);
+ out = fs.create(path);
+ writer = new Writer(out, BLOCK_SIZE, compression, comparator, conf);
+ }
+
+ @Override
+ public void tearDown() throws IOException {
+ if (!skip) {
+ try {
+ closeOutput();
+ } catch (Exception e) {
+ // no-op
+ }
+ fs.delete(path, true);
+ }
+ }
+
+ public void testNoEntry() throws IOException {
+ if (skip)
+ return;
+ closeOutput();
+ TestTFileByteArrays.readRecords(fs, path, 0, conf);
+ }
+
+ public void testOneEntryKnownLength() throws IOException {
+ if (skip)
+ return;
+ writeRecords(1, true, true);
+
+ TestTFileByteArrays.readRecords(fs, path, 1, conf);
+ }
+
+ public void testOneEntryUnknownLength() throws IOException {
+ if (skip)
+ return;
+ writeRecords(1, false, false);
+
+ // TODO: will throw exception at getValueLength, it's inconsistent though;
+ // getKeyLength returns a value correctly, though initial length is -1
+ TestTFileByteArrays.readRecords(fs, path, 1, conf);
+ }
+
+ // known key length, unknown value length
+ public void testOneEntryMixedLengths1() throws IOException {
+ if (skip)
+ return;
+ writeRecords(1, true, false);
+
+ TestTFileByteArrays.readRecords(fs, path, 1, conf);
+ }
+
+ // unknown key length, known value length
+ public void testOneEntryMixedLengths2() throws IOException {
+ if (skip)
+ return;
+ writeRecords(1, false, true);
+
+ TestTFileByteArrays.readRecords(fs, path, 1, conf);
+ }
+
+ public void testTwoEntriesKnownLength() throws IOException {
+ if (skip)
+ return;
+ writeRecords(2, true, true);
+
+ TestTFileByteArrays.readRecords(fs, path, 2, conf);
+ }
+
+ // Negative test
+ public void testFailureAddKeyWithoutValue() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream dos = writer.prepareAppendKey(-1);
+ dos.write("key0".getBytes());
+ try {
+ closeOutput();
+ fail("Cannot add only a key without a value. ");
+ }
+ catch (IllegalStateException e) {
+ // noop, expecting an exception
+ }
+ }
+
+ public void testFailureAddValueWithoutKey() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outValue = null;
+ try {
+ outValue = writer.prepareAppendValue(6);
+ outValue.write("value0".getBytes());
+ fail("Cannot add a value without adding key first. ");
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ finally {
+ if (outValue != null) {
+ outValue.close();
+ }
+ }
+ }
+
+ public void testFailureOneEntryKnownLength() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(2);
+ try {
+ outKey.write("key0".getBytes());
+ fail("Specified key length mismatched the actual key length.");
+ }
+ catch (IOException e) {
+ // noop, expecting an exception
+ }
+
+ DataOutputStream outValue = null;
+ try {
+ outValue = writer.prepareAppendValue(6);
+ outValue.write("value0".getBytes());
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ }
+
+ public void testFailureKeyTooLong() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(2);
+ try {
+ outKey.write("key0".getBytes());
+ outKey.close();
+ Assert.fail("Key is longer than requested.");
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ finally {
+ }
+ }
+
+ public void testFailureKeyTooShort() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(4);
+ outKey.write("key0".getBytes());
+ outKey.close();
+ DataOutputStream outValue = writer.prepareAppendValue(15);
+ try {
+ outValue.write("value0".getBytes());
+ outValue.close();
+ Assert.fail("Value is shorter than expected.");
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ finally {
+ }
+ }
+
+ public void testFailureValueTooLong() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(4);
+ outKey.write("key0".getBytes());
+ outKey.close();
+ DataOutputStream outValue = writer.prepareAppendValue(3);
+ try {
+ outValue.write("value0".getBytes());
+ outValue.close();
+ Assert.fail("Value is longer than expected.");
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+
+ try {
+ outKey.close();
+ outKey.close();
+ }
+ catch (Exception e) {
+ Assert.fail("Second or more close() should have no effect.");
+ }
+ }
+
+ public void testFailureValueTooShort() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(8);
+ try {
+ outKey.write("key0".getBytes());
+ outKey.close();
+ Assert.fail("Key is shorter than expected.");
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ finally {
+ }
+ }
+
+ public void testFailureCloseKeyStreamManyTimesInWriter() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(4);
+ try {
+ outKey.write("key0".getBytes());
+ outKey.close();
+ }
+ catch (Exception e) {
+ // noop, expecting an exception
+ }
+ finally {
+ try {
+ outKey.close();
+ }
+ catch (Exception e) {
+ // no-op
+ }
+ }
+ outKey.close();
+ outKey.close();
+ Assert.assertTrue("Multiple close should have no effect.", true);
+ }
+
+ public void testFailureKeyLongerThan64K() throws IOException {
+ if (skip)
+ return;
+ try {
+ DataOutputStream outKey = writer.prepareAppendKey(64 * K + 1);
+ Assert.fail("Failed to handle key longer than 64K.");
+ }
+ catch (IndexOutOfBoundsException e) {
+ // noop, expecting exceptions
+ }
+ closeOutput();
+ }
+
+ public void testFailureKeyLongerThan64K_2() throws IOException {
+ if (skip)
+ return;
+ DataOutputStream outKey = writer.prepareAppendKey(-1);
+ try {
+ byte[] buf = new byte[K];
+ Random rand = new Random();
+ for (int nx = 0; nx < K + 2; nx++) {
+ rand.nextBytes(buf);
+ outKey.write(buf);
+ }
+ outKey.close();
+ Assert.fail("Failed to handle key longer than 64K.");
+ }
+ catch (EOFException e) {
+ // noop, expecting exceptions
+ }
+ finally {
+ try {
+ closeOutput();
+ }
+ catch (Exception e) {
+ // no-op
+ }
+ }
+ }
+
+ public void testFailureNegativeOffset() throws IOException {
+ if (skip)
+ return;
+ writeRecords(2, true, true);
+
+ Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ Scanner scanner = reader.createScanner();
+ byte[] buf = new byte[K];
+ try {
+ scanner.entry().getKey(buf, -1);
+ Assert.fail("Failed to handle key negative offset.");
+ }
+ catch (Exception e) {
+ // noop, expecting exceptions
+ }
+ finally {
+ }
+ scanner.close();
+ reader.close();
+ }
+
+ /**
+ * Verify that the compressed data size is less than raw data size.
+ *
+ * @throws IOException
+ */
+ public void testFailureCompressionNotWorking() throws IOException {
+ if (skip)
+ return;
+ long rawDataSize = writeRecords(10000, false, false, false);
+ if (!compression.equalsIgnoreCase(Compression.Algorithm.NONE.getName())) {
+ Assert.assertTrue(out.getPos() < rawDataSize);
+ }
+ closeOutput();
+ }
+
+ public void testFailureCompressionNotWorking2() throws IOException {
+ if (skip)
+ return;
+ long rawDataSize = writeRecords(10000, true, true, false);
+ if (!compression.equalsIgnoreCase(Compression.Algorithm.NONE.getName())) {
+ Assert.assertTrue(out.getPos() < rawDataSize);
+ }
+ closeOutput();
+ }
+
+ private long writeRecords(int count, boolean knownKeyLength,
+ boolean knownValueLength, boolean close) throws IOException {
+ long rawDataSize = 0;
+ for (int nx = 0; nx < count; nx++) {
+ String key = TestTFileByteArrays.composeSortedKey("key", count, nx);
+ DataOutputStream outKey =
+ writer.prepareAppendKey(knownKeyLength ? key.length() : -1);
+ outKey.write(key.getBytes());
+ outKey.close();
+ String value = "value" + nx;
+ DataOutputStream outValue =
+ writer.prepareAppendValue(knownValueLength ? value.length() : -1);
+ outValue.write(value.getBytes());
+ outValue.close();
+ rawDataSize +=
+ WritableUtils.getVIntSize(key.getBytes().length)
+ + key.getBytes().length
+ + WritableUtils.getVIntSize(value.getBytes().length)
+ + value.getBytes().length;
+ }
+ if (close) {
+ closeOutput();
+ }
+ return rawDataSize;
+ }
+
+ private long writeRecords(int count, boolean knownKeyLength,
+ boolean knownValueLength) throws IOException {
+ return writeRecords(count, knownKeyLength, knownValueLength, true);
+ }
+
+ private void closeOutput() throws IOException {
+ if (writer != null) {
+ writer.close();
+ writer = null;
+ }
+ if (out != null) {
+ out.close();
+ out = null;
+ }
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.file.tfile.TFile.Reader;
+import org.apache.hadoop.io.file.tfile.TFile.Writer;
+import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner;
+
+public class TestTFileUnsortedByteArrays extends TestCase {
+ private static String ROOT =
+ System.getProperty("test.build.data", "/tmp/tfile-test");
+
+
+ private final static int BLOCK_SIZE = 512;
+ private final static int BUF_SIZE = 64;
+
+ private FileSystem fs;
+ private Configuration conf;
+ private Path path;
+ private FSDataOutputStream out;
+ private Writer writer;
+
+ private String compression = Compression.Algorithm.GZ.getName();
+ private String outputFile = "TFileTestUnsorted";
+ /*
+ * pre-sampled numbers of records in one block, based on the given the
+ * generated key and value strings
+ */
+ private int records1stBlock = 4314;
+ private int records2ndBlock = 4108;
+
+ public void init(String compression, String outputFile,
+ int numRecords1stBlock, int numRecords2ndBlock) {
+ this.compression = compression;
+ this.outputFile = outputFile;
+ this.records1stBlock = numRecords1stBlock;
+ this.records2ndBlock = numRecords2ndBlock;
+ }
+
+ @Override
+ public void setUp() throws IOException {
+ conf = new Configuration();
+ path = new Path(ROOT, outputFile);
+ fs = path.getFileSystem(conf);
+ out = fs.create(path);
+ writer = new Writer(out, BLOCK_SIZE, compression, null, conf);
+ writer.append("keyZ".getBytes(), "valueZ".getBytes());
+ writer.append("keyM".getBytes(), "valueM".getBytes());
+ writer.append("keyN".getBytes(), "valueN".getBytes());
+ writer.append("keyA".getBytes(), "valueA".getBytes());
+ closeOutput();
+ }
+
+ @Override
+ public void tearDown() throws IOException {
+ fs.delete(path, true);
+ }
+
+ // we still can scan records in an unsorted TFile
+ public void testFailureScannerWithKeys() throws IOException {
+ Reader reader =
+ new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ Assert.assertFalse(reader.isSorted());
+ Assert.assertEquals((int) reader.getEntryCount(), 4);
+
+ try {
+ Scanner scanner =
+ reader.createScanner("aaa".getBytes(), "zzz".getBytes());
+ Assert
+ .fail("Failed to catch creating scanner with keys on unsorted file.");
+ }
+ catch (RuntimeException e) {
+ }
+ finally {
+ reader.close();
+ }
+ }
+
+ // we still can scan records in an unsorted TFile
+ public void testScan() throws IOException {
+ Reader reader =
+ new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ Assert.assertFalse(reader.isSorted());
+ Assert.assertEquals((int) reader.getEntryCount(), 4);
+
+ Scanner scanner = reader.createScanner();
+
+ try {
+
+ // read key and value
+ byte[] kbuf = new byte[BUF_SIZE];
+ int klen = scanner.entry().getKeyLength();
+ scanner.entry().getKey(kbuf);
+ Assert.assertEquals(new String(kbuf, 0, klen), "keyZ");
+
+ byte[] vbuf = new byte[BUF_SIZE];
+ int vlen = scanner.entry().getValueLength();
+ scanner.entry().getValue(vbuf);
+ Assert.assertEquals(new String(vbuf, 0, vlen), "valueZ");
+
+ scanner.advance();
+
+ // now try get value first
+ vbuf = new byte[BUF_SIZE];
+ vlen = scanner.entry().getValueLength();
+ scanner.entry().getValue(vbuf);
+ Assert.assertEquals(new String(vbuf, 0, vlen), "valueM");
+
+ kbuf = new byte[BUF_SIZE];
+ klen = scanner.entry().getKeyLength();
+ scanner.entry().getKey(kbuf);
+ Assert.assertEquals(new String(kbuf, 0, klen), "keyM");
+ }
+ finally {
+ scanner.close();
+ reader.close();
+ }
+ }
+
+ // we still can scan records in an unsorted TFile
+ public void testScanRange() throws IOException {
+ Reader reader =
+ new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ Assert.assertFalse(reader.isSorted());
+ Assert.assertEquals((int) reader.getEntryCount(), 4);
+
+ Scanner scanner = reader.createScanner();
+
+ try {
+
+ // read key and value
+ byte[] kbuf = new byte[BUF_SIZE];
+ int klen = scanner.entry().getKeyLength();
+ scanner.entry().getKey(kbuf);
+ Assert.assertEquals(new String(kbuf, 0, klen), "keyZ");
+
+ byte[] vbuf = new byte[BUF_SIZE];
+ int vlen = scanner.entry().getValueLength();
+ scanner.entry().getValue(vbuf);
+ Assert.assertEquals(new String(vbuf, 0, vlen), "valueZ");
+
+ scanner.advance();
+
+ // now try get value first
+ vbuf = new byte[BUF_SIZE];
+ vlen = scanner.entry().getValueLength();
+ scanner.entry().getValue(vbuf);
+ Assert.assertEquals(new String(vbuf, 0, vlen), "valueM");
+
+ kbuf = new byte[BUF_SIZE];
+ klen = scanner.entry().getKeyLength();
+ scanner.entry().getKey(kbuf);
+ Assert.assertEquals(new String(kbuf, 0, klen), "keyM");
+ }
+ finally {
+ scanner.close();
+ reader.close();
+ }
+ }
+
+ public void testFailureSeek() throws IOException {
+ Reader reader =
+ new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf);
+ Scanner scanner = reader.createScanner();
+
+ try {
+ // can't find ceil
+ try {
+ scanner.lowerBound("keyN".getBytes());
+ Assert.fail("Cannot search in a unsorted TFile!");
+ }
+ catch (Exception e) {
+ // noop, expecting excetions
+ }
+ finally {
+ }
+
+ // can't find higher
+ try {
+ scanner.upperBound("keyA".getBytes());
+ Assert.fail("Cannot search higher in a unsorted TFile!");
+ }
+ catch (Exception e) {
+ // noop, expecting excetions
+ }
+ finally {
+ }
+
+ // can't seek
+ try {
+ scanner.seekTo("keyM".getBytes());
+ Assert.fail("Cannot search a unsorted TFile!");
+ }
+ catch (Exception e) {
+ // noop, expecting excetions
+ }
+ finally {
+ }
+ }
+ finally {
+ scanner.close();
+ reader.close();
+ }
+ }
+
+ private void closeOutput() throws IOException {
+ if (writer != null) {
+ writer.close();
+ writer = null;
+ out.close();
+ out = null;
+ }
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+import java.util.Random;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class TestVLong extends TestCase {
+ private static String ROOT =
+ System.getProperty("test.build.data", "/tmp/tfile-test");
+ private Configuration conf;
+ private FileSystem fs;
+ private Path path;
+ private String outputFile = "TestVLong";
+
+ @Override
+ public void setUp() throws IOException {
+ conf = new Configuration();
+ path = new Path(ROOT, outputFile);
+ fs = path.getFileSystem(conf);
+ if (fs.exists(path)) {
+ fs.delete(path, false);
+ }
+ }
+
+ @Override
+ public void tearDown() throws IOException {
+ if (fs.exists(path)) {
+ fs.delete(path, false);
+ }
+ }
+
+ public void testVLongByte() throws IOException {
+ FSDataOutputStream out = fs.create(path);
+ for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; ++i) {
+ Utils.writeVLong(out, i);
+ }
+ out.close();
+ Assert.assertEquals("Incorrect encoded size", (1 << Byte.SIZE) + 96, fs
+ .getFileStatus(
+ path).getLen());
+
+ FSDataInputStream in = fs.open(path);
+ for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; ++i) {
+ long n = Utils.readVLong(in);
+ Assert.assertEquals(n, i);
+ }
+ in.close();
+ fs.delete(path, false);
+ }
+
+ private long writeAndVerify(int shift) throws IOException {
+ FSDataOutputStream out = fs.create(path);
+ for (int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; ++i) {
+ Utils.writeVLong(out, ((long) i) << shift);
+ }
+ out.close();
+ FSDataInputStream in = fs.open(path);
+ for (int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; ++i) {
+ long n = Utils.readVLong(in);
+ Assert.assertEquals(n, ((long) i) << shift);
+ }
+ in.close();
+ long ret = fs.getFileStatus(path).getLen();
+ fs.delete(path, false);
+ return ret;
+ }
+
+ public void testVLongShort() throws IOException {
+ long size = writeAndVerify(0);
+ Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 2
+ + ((1 << Byte.SIZE) - 40)
+ * (1 << Byte.SIZE) - 128 - 32, size);
+ }
+
+ public void testVLong3Bytes() throws IOException {
+ long size = writeAndVerify(Byte.SIZE);
+ Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 3
+ + ((1 << Byte.SIZE) - 32) * (1 << Byte.SIZE) - 40 - 1, size);
+ }
+
+ public void testVLong4Bytes() throws IOException {
+ long size = writeAndVerify(Byte.SIZE * 2);
+ Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 4
+ + ((1 << Byte.SIZE) - 16) * (1 << Byte.SIZE) - 32 - 2, size);
+ }
+
+ public void testVLong5Bytes() throws IOException {
+ long size = writeAndVerify(Byte.SIZE * 3);
+ Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 6 - 256
+ - 16 - 3, size);
+ }
+
+ private void verifySixOrMoreBytes(int bytes) throws IOException {
+ long size = writeAndVerify(Byte.SIZE * (bytes - 2));
+ Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE)
+ * (bytes + 1) - 256 - bytes + 1, size);
+ }
+ public void testVLong6Bytes() throws IOException {
+ verifySixOrMoreBytes(6);
+ }
+
+ public void testVLong7Bytes() throws IOException {
+ verifySixOrMoreBytes(7);
+ }
+
+ public void testVLong8Bytes() throws IOException {
+ verifySixOrMoreBytes(8);
+ }
+
+ public void testVLongRandom() throws IOException {
+ int count = 1024 * 1024;
+ long data[] = new long[count];
+ Random rng = new Random();
+ for (int i = 0; i < data.length; ++i) {
+ int shift = rng.nextInt(Long.SIZE) + 1;
+ long mask = (1L << shift) - 1;
+ long a = rng.nextInt() << 32;
+ long b = ((long) rng.nextInt()) & 0xffffffff;
+ data[i] = (a + b) & mask;
+ }
+
+ FSDataOutputStream out = fs.create(path);
+ for (int i = 0; i < data.length; ++i) {
+ Utils.writeVLong(out, data[i]);
+ }
+ out.close();
+
+ FSDataInputStream in = fs.open(path);
+ for (int i = 0; i < data.length; ++i) {
+ Assert.assertEquals(Utils.readVLong(in), data[i]);
+ }
+ in.close();
+ fs.delete(path, false);
+ }
+}
Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java?rev=793163&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java (added)
+++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java Sat Jul 11 01:22:14 2009
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.io.file.tfile;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+/**
+ * this class is a time class to
+ * measure to measure the time
+ * taken for some event.
+ */
+public class Timer {
+ long startTimeEpoch;
+ long finishTimeEpoch;
+ private DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+ public void startTime() throws IOException {
+ startTimeEpoch = System.currentTimeMillis();
+ }
+
+ public void stopTime() throws IOException {
+ finishTimeEpoch = System.currentTimeMillis();
+ }
+
+ public long getIntervalMillis() throws IOException {
+ return finishTimeEpoch - startTimeEpoch;
+ }
+
+ public void printlnWithTimestamp(String message) throws IOException {
+ System.out.println(formatCurrentTime() + " " + message);
+ }
+
+ public String formatTime(long millis) {
+ return formatter.format(millis);
+ }
+
+ public String getIntervalString() throws IOException {
+ long time = getIntervalMillis();
+ return formatTime(time);
+ }
+
+ public String formatCurrentTime() {
+ return formatTime(System.currentTimeMillis());
+ }
+
+}
+